mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-08-09 01:52:41 +00:00
Merge branch 'k2-fsa:master' into dev/speechllm
This commit is contained in:
commit
70f13e54d8
4
.github/scripts/docker/Dockerfile
vendored
4
.github/scripts/docker/Dockerfile
vendored
@ -55,9 +55,9 @@ RUN pip install --no-cache-dir \
|
||||
"numpy<2.0" \
|
||||
onnxoptimizer \
|
||||
onnxsim \
|
||||
onnx \
|
||||
onnx==1.17.0 \
|
||||
onnxmltools \
|
||||
onnxruntime \
|
||||
onnxruntime==1.17.1 \
|
||||
piper_phonemize -f https://k2-fsa.github.io/icefall/piper_phonemize.html \
|
||||
pypinyin==0.50.0 \
|
||||
pytest \
|
||||
|
21
.github/scripts/docker/generate_build_matrix.py
vendored
21
.github/scripts/docker/generate_build_matrix.py
vendored
@ -63,23 +63,24 @@ def get_torchaudio_version(torch_version):
|
||||
|
||||
|
||||
def get_matrix(min_torch_version, specified_torch_version, specified_python_version):
|
||||
k2_version = "1.24.4.dev20241029"
|
||||
kaldifeat_version = "1.25.5.dev20241029"
|
||||
version = "20241218"
|
||||
k2_version = "1.24.4.dev20250630"
|
||||
kaldifeat_version = "1.25.5.dev20250630"
|
||||
version = "20250630"
|
||||
|
||||
# torchaudio 2.5.0 does not support python 3.13
|
||||
python_version = ["3.8", "3.9", "3.10", "3.11", "3.12"]
|
||||
python_version = ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"]
|
||||
torch_version = []
|
||||
torch_version += ["1.13.0", "1.13.1"]
|
||||
torch_version += ["2.0.0", "2.0.1"]
|
||||
# torch_version += ["2.1.0", "2.1.1", "2.1.2"]
|
||||
# torch_version += ["2.2.0", "2.2.1", "2.2.2"]
|
||||
torch_version += ["2.1.0", "2.1.1", "2.1.2"]
|
||||
torch_version += ["2.2.0", "2.2.1", "2.2.2"]
|
||||
# Test only torch >= 2.3.0
|
||||
torch_version += ["2.3.0", "2.3.1"]
|
||||
torch_version += ["2.4.0"]
|
||||
torch_version += ["2.4.1"]
|
||||
torch_version += ["2.5.0"]
|
||||
torch_version += ["2.5.1"]
|
||||
torch_version += ["2.6.0", "2.7.0", "2.7.1"]
|
||||
|
||||
if specified_torch_version:
|
||||
torch_version = [specified_torch_version]
|
||||
@ -109,12 +110,8 @@ def get_matrix(min_torch_version, specified_torch_version, specified_python_vers
|
||||
# torch>=2.5 requires python 3.10
|
||||
continue
|
||||
|
||||
if t == "2.5.1":
|
||||
k2_version_2 = "1.24.4.dev20241122"
|
||||
kaldifeat_version_2 = "1.25.5.dev20241126"
|
||||
else:
|
||||
k2_version_2 = k2_version
|
||||
kaldifeat_version_2 = kaldifeat_version
|
||||
k2_version_2 = k2_version
|
||||
kaldifeat_version_2 = kaldifeat_version
|
||||
|
||||
matrix.append(
|
||||
{
|
||||
|
67
.github/scripts/generate-piper-phonemize-page.py
vendored
67
.github/scripts/generate-piper-phonemize-page.py
vendored
@ -1,7 +1,7 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
|
||||
def main():
|
||||
def get_v1_2_0_files():
|
||||
prefix = (
|
||||
"https://github.com/csukuangfj/piper-phonemize/releases/download/2023.12.5/"
|
||||
)
|
||||
@ -19,9 +19,70 @@ def main():
|
||||
"piper_phonemize-1.2.0-cp39-cp39-macosx_10_14_x86_64.whl",
|
||||
"piper_phonemize-1.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
|
||||
]
|
||||
ans = [prefix + f for f in files]
|
||||
ans.sort()
|
||||
return ans
|
||||
|
||||
|
||||
def get_v1_3_0_files():
|
||||
prefix = (
|
||||
"https://github.com/csukuangfj/piper-phonemize/releases/download/2025.06.23/"
|
||||
)
|
||||
files = [
|
||||
"piper_phonemize-1.3.0-cp310-cp310-macosx_10_9_universal2.whl",
|
||||
"piper_phonemize-1.3.0-cp310-cp310-macosx_10_9_x86_64.whl",
|
||||
"piper_phonemize-1.3.0-cp310-cp310-macosx_11_0_arm64.whl",
|
||||
"piper_phonemize-1.3.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl",
|
||||
"piper_phonemize-1.3.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl",
|
||||
"piper_phonemize-1.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
|
||||
"piper_phonemize-1.3.0-cp310-cp310-win_amd64.whl",
|
||||
"piper_phonemize-1.3.0-cp311-cp311-macosx_10_9_universal2.whl",
|
||||
"piper_phonemize-1.3.0-cp311-cp311-macosx_10_9_x86_64.whl",
|
||||
"piper_phonemize-1.3.0-cp311-cp311-macosx_11_0_arm64.whl",
|
||||
"piper_phonemize-1.3.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl",
|
||||
"piper_phonemize-1.3.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl",
|
||||
"piper_phonemize-1.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
|
||||
"piper_phonemize-1.3.0-cp311-cp311-win_amd64.whl",
|
||||
"piper_phonemize-1.3.0-cp312-cp312-macosx_10_13_universal2.whl",
|
||||
"piper_phonemize-1.3.0-cp312-cp312-macosx_10_13_x86_64.whl",
|
||||
"piper_phonemize-1.3.0-cp312-cp312-macosx_11_0_arm64.whl",
|
||||
"piper_phonemize-1.3.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl",
|
||||
"piper_phonemize-1.3.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl",
|
||||
"piper_phonemize-1.3.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
|
||||
"piper_phonemize-1.3.0-cp312-cp312-win_amd64.whl",
|
||||
"piper_phonemize-1.3.0-cp313-cp313-macosx_10_13_universal2.whl",
|
||||
"piper_phonemize-1.3.0-cp313-cp313-macosx_10_13_x86_64.whl",
|
||||
"piper_phonemize-1.3.0-cp313-cp313-macosx_11_0_arm64.whl",
|
||||
"piper_phonemize-1.3.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl",
|
||||
"piper_phonemize-1.3.0-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl",
|
||||
"piper_phonemize-1.3.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
|
||||
"piper_phonemize-1.3.0-cp313-cp313-win_amd64.whl",
|
||||
"piper_phonemize-1.3.0-cp38-cp38-macosx_10_9_universal2.whl",
|
||||
"piper_phonemize-1.3.0-cp38-cp38-macosx_10_9_x86_64.whl",
|
||||
"piper_phonemize-1.3.0-cp38-cp38-macosx_11_0_arm64.whl",
|
||||
"piper_phonemize-1.3.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl",
|
||||
"piper_phonemize-1.3.0-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl",
|
||||
"piper_phonemize-1.3.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
|
||||
"piper_phonemize-1.3.0-cp38-cp38-win_amd64.whl",
|
||||
"piper_phonemize-1.3.0-cp39-cp39-macosx_10_9_universal2.whl",
|
||||
"piper_phonemize-1.3.0-cp39-cp39-macosx_10_9_x86_64.whl",
|
||||
"piper_phonemize-1.3.0-cp39-cp39-macosx_11_0_arm64.whl",
|
||||
"piper_phonemize-1.3.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl",
|
||||
"piper_phonemize-1.3.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl",
|
||||
"piper_phonemize-1.3.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
|
||||
"piper_phonemize-1.3.0-cp39-cp39-win_amd64.whl",
|
||||
]
|
||||
ans = [prefix + f for f in files]
|
||||
ans.sort()
|
||||
return ans
|
||||
|
||||
|
||||
def main():
|
||||
files = get_v1_3_0_files() + get_v1_2_0_files()
|
||||
|
||||
with open("piper_phonemize.html", "w") as f:
|
||||
for file in files:
|
||||
url = prefix + file
|
||||
for url in files:
|
||||
file = url.split("/")[-1]
|
||||
f.write(f'<a href="{url}">{file}</a><br/>\n')
|
||||
|
||||
|
||||
|
200
.github/scripts/multi-zh-hans.sh
vendored
200
.github/scripts/multi-zh-hans.sh
vendored
@ -1,200 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -ex
|
||||
|
||||
git config --global user.name "k2-fsa"
|
||||
git config --global user.email "csukuangfj@gmail.com"
|
||||
git config --global lfs.allowincompletepush true
|
||||
|
||||
log() {
|
||||
# This function is from espnet
|
||||
local fname=${BASH_SOURCE[1]##*/}
|
||||
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
|
||||
}
|
||||
|
||||
log "pwd: $PWD"
|
||||
|
||||
cd egs/multi_zh-hans/ASR
|
||||
|
||||
repo_url=https://huggingface.co/zrjin/icefall-asr-multi-zh-hans-zipformer-2023-9-2
|
||||
log "Downloading pre-trained model from $repo_url"
|
||||
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
||||
repo=$(basename $repo_url)
|
||||
pushd $repo
|
||||
cd exp
|
||||
git lfs pull --include pretrained.pt
|
||||
ln -s pretrained.pt epoch-99.pt
|
||||
cd ../data/lang_bpe_2000
|
||||
ls -lh
|
||||
git lfs pull --include L.pt L_disambig.pt Linv.pt bpe.model
|
||||
git lfs pull --include "*.model"
|
||||
ls -lh
|
||||
popd
|
||||
|
||||
log "--------------------------------------------"
|
||||
log "Export non-streaming ONNX transducer models "
|
||||
log "--------------------------------------------"
|
||||
./zipformer/export-onnx.py \
|
||||
--tokens $repo/data/lang_bpe_2000/tokens.txt \
|
||||
--use-averaged-model 0 \
|
||||
--epoch 99 \
|
||||
--avg 1 \
|
||||
--exp-dir $repo/exp \
|
||||
--causal False
|
||||
|
||||
ls -lh $repo/exp
|
||||
|
||||
./zipformer/onnx_pretrained.py \
|
||||
--encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
|
||||
--decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
|
||||
--joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.onnx \
|
||||
--tokens $repo/data/lang_bpe_2000/tokens.txt \
|
||||
$repo/test_wavs/DEV_T0000000000.wav \
|
||||
$repo/test_wavs/DEV_T0000000001.wav \
|
||||
$repo/test_wavs/DEV_T0000000002.wav \
|
||||
$repo/test_wavs/TEST_MEETING_T0000000113.wav \
|
||||
$repo/test_wavs/TEST_MEETING_T0000000219.wav \
|
||||
$repo/test_wavs/TEST_MEETING_T0000000351.wav
|
||||
|
||||
rm -rf $repo
|
||||
|
||||
repo_url=https://huggingface.co/zrjin/icefall-asr-multi-zh-hans-zipformer-ctc-streaming-2023-11-05
|
||||
log "Downloading pre-trained model from $repo_url"
|
||||
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
||||
repo=$(basename $repo_url)
|
||||
|
||||
pushd $repo
|
||||
cd exp/
|
||||
git lfs pull --include pretrained.pt
|
||||
rm -fv epoch-20.pt
|
||||
rm -fv *.onnx
|
||||
ln -s pretrained.pt epoch-20.pt
|
||||
cd ../data/lang_bpe_2000
|
||||
ls -lh
|
||||
git lfs pull --include L.pt L_disambig.pt Linv.pt bpe.model
|
||||
git lfs pull --include "*.model"
|
||||
ls -lh
|
||||
popd
|
||||
|
||||
log "----------------------------------------"
|
||||
log "Export streaming ONNX CTC models "
|
||||
log "----------------------------------------"
|
||||
./zipformer/export-onnx-streaming-ctc.py \
|
||||
--exp-dir $repo/exp \
|
||||
--tokens $repo/data/lang_bpe_2000/tokens.txt \
|
||||
--causal 1 \
|
||||
--avg 1 \
|
||||
--epoch 20 \
|
||||
--use-averaged-model 0 \
|
||||
--chunk-size 16 \
|
||||
--left-context-frames 128 \
|
||||
--use-ctc 1
|
||||
|
||||
ls -lh $repo/exp/
|
||||
|
||||
log "------------------------------------------------------------"
|
||||
log "Test exported streaming ONNX CTC models (greedy search) "
|
||||
log "------------------------------------------------------------"
|
||||
|
||||
test_wavs=(
|
||||
DEV_T0000000000.wav
|
||||
DEV_T0000000001.wav
|
||||
DEV_T0000000002.wav
|
||||
TEST_MEETING_T0000000113.wav
|
||||
TEST_MEETING_T0000000219.wav
|
||||
TEST_MEETING_T0000000351.wav
|
||||
)
|
||||
|
||||
for w in ${test_wavs[@]}; do
|
||||
./zipformer/onnx_pretrained-streaming-ctc.py \
|
||||
--model-filename $repo/exp/ctc-epoch-20-avg-1-chunk-16-left-128.int8.onnx \
|
||||
--tokens $repo/data/lang_bpe_2000/tokens.txt \
|
||||
$repo/test_wavs/$w
|
||||
done
|
||||
|
||||
log "Upload onnx CTC models to huggingface"
|
||||
url=https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13
|
||||
GIT_LFS_SKIP_SMUDGE=1 git clone $url
|
||||
dst=$(basename $url)
|
||||
cp -v $repo/exp/ctc*.onnx $dst
|
||||
cp -v $repo/data/lang_bpe_2000/tokens.txt $dst
|
||||
cp -v $repo/data/lang_bpe_2000/bpe.model $dst
|
||||
mkdir -p $dst/test_wavs
|
||||
cp -v $repo/test_wavs/*.wav $dst/test_wavs
|
||||
cd $dst
|
||||
git lfs track "*.onnx" "bpe.model"
|
||||
ls -lh
|
||||
file bpe.model
|
||||
git status
|
||||
git add .
|
||||
git commit -m "upload model" && git push https://k2-fsa:${HF_TOKEN}@huggingface.co/k2-fsa/$dst main || true
|
||||
|
||||
log "Upload models to https://github.com/k2-fsa/sherpa-onnx"
|
||||
rm -rf .git
|
||||
rm -fv .gitattributes
|
||||
cd ..
|
||||
tar cjfv $dst.tar.bz2 $dst
|
||||
ls -lh *.tar.bz2
|
||||
mv -v $dst.tar.bz2 ../../../
|
||||
|
||||
log "----------------------------------------"
|
||||
log "Export streaming ONNX transducer models "
|
||||
log "----------------------------------------"
|
||||
|
||||
./zipformer/export-onnx-streaming.py \
|
||||
--exp-dir $repo/exp \
|
||||
--tokens $repo/data/lang_bpe_2000/tokens.txt \
|
||||
--causal 1 \
|
||||
--avg 1 \
|
||||
--epoch 20 \
|
||||
--use-averaged-model 0 \
|
||||
--chunk-size 16 \
|
||||
--left-context-frames 128 \
|
||||
--use-ctc 0
|
||||
|
||||
ls -lh $repo/exp
|
||||
|
||||
log "------------------------------------------------------------"
|
||||
log "Test exported streaming ONNX transducer models (Python code)"
|
||||
log "------------------------------------------------------------"
|
||||
|
||||
log "test fp32"
|
||||
./zipformer/onnx_pretrained-streaming.py \
|
||||
--encoder-model-filename $repo/exp/encoder-epoch-20-avg-1-chunk-16-left-128.onnx \
|
||||
--decoder-model-filename $repo/exp/decoder-epoch-20-avg-1-chunk-16-left-128.onnx \
|
||||
--joiner-model-filename $repo/exp/joiner-epoch-20-avg-1-chunk-16-left-128.onnx \
|
||||
--tokens $repo/data/lang_bpe_2000/tokens.txt \
|
||||
$repo/test_wavs/DEV_T0000000000.wav
|
||||
|
||||
log "test int8"
|
||||
./zipformer/onnx_pretrained-streaming.py \
|
||||
--encoder-model-filename $repo/exp/encoder-epoch-20-avg-1-chunk-16-left-128.int8.onnx \
|
||||
--decoder-model-filename $repo/exp/decoder-epoch-20-avg-1-chunk-16-left-128.onnx \
|
||||
--joiner-model-filename $repo/exp/joiner-epoch-20-avg-1-chunk-16-left-128.int8.onnx \
|
||||
--tokens $repo/data/lang_bpe_2000/tokens.txt \
|
||||
$repo/test_wavs/DEV_T0000000000.wav
|
||||
|
||||
log "Upload onnx transducer models to huggingface"
|
||||
|
||||
url=https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-multi-zh-hans-2023-12-12
|
||||
GIT_LFS_SKIP_SMUDGE=1 git clone $url
|
||||
dst=$(basename $url)
|
||||
cp -v $repo/exp/encoder*.onnx $dst
|
||||
cp -v $repo/exp/decoder*.onnx $dst
|
||||
cp -v $repo/exp/joiner*.onnx $dst
|
||||
cp -v $repo/data/lang_bpe_2000/tokens.txt $dst
|
||||
cp -v $repo/data/lang_bpe_2000/bpe.model $dst
|
||||
mkdir -p $dst/test_wavs
|
||||
cp -v $repo/test_wavs/*.wav $dst/test_wavs
|
||||
cd $dst
|
||||
git lfs track "*.onnx" bpe.model
|
||||
git add .
|
||||
git commit -m "upload model" && git push https://k2-fsa:${HF_TOKEN}@huggingface.co/k2-fsa/$dst main || true
|
||||
|
||||
log "Upload models to https://github.com/k2-fsa/sherpa-onnx"
|
||||
rm -rf .git
|
||||
rm -fv .gitattributes
|
||||
cd ..
|
||||
tar cjfv $dst.tar.bz2 $dst
|
||||
ls -lh *.tar.bz2
|
||||
mv -v $dst.tar.bz2 ../../../
|
756
.github/scripts/multi_zh-hans/ASR/run.sh
vendored
Executable file
756
.github/scripts/multi_zh-hans/ASR/run.sh
vendored
Executable file
@ -0,0 +1,756 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -ex
|
||||
|
||||
git config --global user.name "k2-fsa"
|
||||
git config --global user.email "csukuangfj@gmail.com"
|
||||
git config --global lfs.allowincompletepush true
|
||||
|
||||
python3 -m pip install onnxmltools==1.13.0 onnx==1.17.0 onnxruntime==1.17.1 sherpa-onnx
|
||||
|
||||
log() {
|
||||
# This function is from espnet
|
||||
local fname=${BASH_SOURCE[1]##*/}
|
||||
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
|
||||
}
|
||||
|
||||
cd egs/multi_zh-hans/ASR
|
||||
|
||||
log "pwd: $PWD"
|
||||
|
||||
function run_2023_9_2() {
|
||||
repo_url=https://huggingface.co/zrjin/icefall-asr-multi-zh-hans-zipformer-2023-9-2
|
||||
log "Downloading pre-trained model from $repo_url"
|
||||
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
||||
repo=$(basename $repo_url)
|
||||
pushd $repo
|
||||
cd exp
|
||||
git lfs pull --include pretrained.pt
|
||||
ln -s pretrained.pt epoch-99.pt
|
||||
cd ../data/lang_bpe_2000
|
||||
ls -lh
|
||||
git lfs pull --include L.pt L_disambig.pt Linv.pt bpe.model
|
||||
git lfs pull --include "*.model"
|
||||
ls -lh
|
||||
popd
|
||||
|
||||
log "--------------------------------------------"
|
||||
log "Export non-streaming ONNX transducer models "
|
||||
log "--------------------------------------------"
|
||||
./zipformer/export-onnx.py \
|
||||
--tokens $repo/data/lang_bpe_2000/tokens.txt \
|
||||
--use-averaged-model 0 \
|
||||
--epoch 99 \
|
||||
--avg 1 \
|
||||
--exp-dir $repo/exp \
|
||||
--causal False \
|
||||
--fp16 1
|
||||
|
||||
ls -lh $repo/exp
|
||||
|
||||
./zipformer/onnx_pretrained.py \
|
||||
--encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
|
||||
--decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
|
||||
--joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.onnx \
|
||||
--tokens $repo/data/lang_bpe_2000/tokens.txt \
|
||||
$repo/test_wavs/DEV_T0000000000.wav \
|
||||
$repo/test_wavs/DEV_T0000000001.wav \
|
||||
$repo/test_wavs/DEV_T0000000002.wav \
|
||||
$repo/test_wavs/TEST_MEETING_T0000000113.wav \
|
||||
$repo/test_wavs/TEST_MEETING_T0000000219.wav \
|
||||
$repo/test_wavs/TEST_MEETING_T0000000351.wav
|
||||
|
||||
./zipformer/onnx_pretrained.py \
|
||||
--encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.int8.onnx \
|
||||
--decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
|
||||
--joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.int8.onnx \
|
||||
--tokens $repo/data/lang_bpe_2000/tokens.txt \
|
||||
$repo/test_wavs/DEV_T0000000000.wav \
|
||||
$repo/test_wavs/DEV_T0000000001.wav \
|
||||
$repo/test_wavs/DEV_T0000000002.wav \
|
||||
$repo/test_wavs/TEST_MEETING_T0000000113.wav \
|
||||
$repo/test_wavs/TEST_MEETING_T0000000219.wav \
|
||||
$repo/test_wavs/TEST_MEETING_T0000000351.wav
|
||||
|
||||
./zipformer/onnx_pretrained.py \
|
||||
--encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.fp16.onnx \
|
||||
--decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.fp16.onnx \
|
||||
--joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.fp16.onnx \
|
||||
--tokens $repo/data/lang_bpe_2000/tokens.txt \
|
||||
$repo/test_wavs/DEV_T0000000000.wav \
|
||||
$repo/test_wavs/DEV_T0000000001.wav \
|
||||
$repo/test_wavs/DEV_T0000000002.wav \
|
||||
$repo/test_wavs/TEST_MEETING_T0000000113.wav \
|
||||
$repo/test_wavs/TEST_MEETING_T0000000219.wav \
|
||||
$repo/test_wavs/TEST_MEETING_T0000000351.wav
|
||||
|
||||
rm -rf $repo
|
||||
}
|
||||
|
||||
function run_2023_11_05_streaming() {
|
||||
repo_url=https://huggingface.co/zrjin/icefall-asr-multi-zh-hans-zipformer-ctc-streaming-2023-11-05
|
||||
log "Downloading pre-trained model from $repo_url"
|
||||
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
||||
repo=$(basename $repo_url)
|
||||
|
||||
pushd $repo
|
||||
cd exp/
|
||||
git lfs pull --include pretrained.pt
|
||||
rm -fv epoch-20.pt
|
||||
rm -fv *.onnx
|
||||
ln -s pretrained.pt epoch-20.pt
|
||||
cd ../data/lang_bpe_2000
|
||||
ls -lh
|
||||
git lfs pull --include L.pt L_disambig.pt Linv.pt bpe.model
|
||||
git lfs pull --include "*.model"
|
||||
ls -lh
|
||||
popd
|
||||
|
||||
log "----------------------------------------"
|
||||
log "Export streaming ONNX CTC models "
|
||||
log "----------------------------------------"
|
||||
./zipformer/export-onnx-streaming-ctc.py \
|
||||
--exp-dir $repo/exp \
|
||||
--tokens $repo/data/lang_bpe_2000/tokens.txt \
|
||||
--causal 1 \
|
||||
--avg 1 \
|
||||
--epoch 20 \
|
||||
--use-averaged-model 0 \
|
||||
--chunk-size 16 \
|
||||
--left-context-frames 128 \
|
||||
--use-ctc 1 \
|
||||
--fp16 1
|
||||
|
||||
ls -lh $repo/exp/
|
||||
|
||||
log "------------------------------------------------------------"
|
||||
log "Test exported streaming ONNX CTC models (greedy search) "
|
||||
log "------------------------------------------------------------"
|
||||
|
||||
test_wavs=(
|
||||
DEV_T0000000000.wav
|
||||
DEV_T0000000001.wav
|
||||
DEV_T0000000002.wav
|
||||
TEST_MEETING_T0000000113.wav
|
||||
TEST_MEETING_T0000000219.wav
|
||||
TEST_MEETING_T0000000351.wav
|
||||
)
|
||||
|
||||
for w in ${test_wavs[@]}; do
|
||||
log "----fp32----"
|
||||
./zipformer/onnx_pretrained-streaming-ctc.py \
|
||||
--model-filename $repo/exp/ctc-epoch-20-avg-1-chunk-16-left-128.onnx \
|
||||
--tokens $repo/data/lang_bpe_2000/tokens.txt \
|
||||
$repo/test_wavs/$w
|
||||
|
||||
log "----int8----"
|
||||
|
||||
./zipformer/onnx_pretrained-streaming-ctc.py \
|
||||
--model-filename $repo/exp/ctc-epoch-20-avg-1-chunk-16-left-128.int8.onnx \
|
||||
--tokens $repo/data/lang_bpe_2000/tokens.txt \
|
||||
$repo/test_wavs/$w
|
||||
|
||||
log "----fp16----"
|
||||
|
||||
./zipformer/onnx_pretrained-streaming-ctc.py \
|
||||
--model-filename $repo/exp/ctc-epoch-20-avg-1-chunk-16-left-128.fp16.onnx \
|
||||
--tokens $repo/data/lang_bpe_2000/tokens.txt \
|
||||
$repo/test_wavs/$w
|
||||
done
|
||||
|
||||
log "Upload onnx CTC models to huggingface"
|
||||
name=(
|
||||
sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13
|
||||
sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-int8-2023-12-13
|
||||
sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-fp16-2023-12-13
|
||||
)
|
||||
for n in ${name[@]}; do
|
||||
url=https://huggingface.co/k2-fsa/$n
|
||||
GIT_LFS_SKIP_SMUDGE=1 git clone $url
|
||||
dst=$(basename $url)
|
||||
if [[ $n == sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13 ]]; then
|
||||
cp -v $repo/exp/ctc-epoch-20-avg-1-chunk-16-left-128.onnx $dst
|
||||
elif [[ $n == sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-int8-2023-12-13 ]]; then
|
||||
cp -v $repo/exp/ctc-epoch-20-avg-1-chunk-16-left-128.int8.onnx $dst
|
||||
elif [[ $n == sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-fp16-2023-12-13 ]]; then
|
||||
cp -v $repo/exp/ctc-epoch-20-avg-1-chunk-16-left-128.fp16.onnx $dst
|
||||
fi
|
||||
|
||||
cp -v $repo/data/lang_bpe_2000/tokens.txt $dst
|
||||
cp -v $repo/data/lang_bpe_2000/bpe.model $dst
|
||||
mkdir -p $dst/test_wavs
|
||||
cp -v $repo/test_wavs/*.wav $dst/test_wavs
|
||||
cd $dst
|
||||
git lfs track "*.onnx" "bpe.model" "*.wav"
|
||||
ls -lh
|
||||
file bpe.model
|
||||
git status
|
||||
git add .
|
||||
git commit -m "upload model" && git push https://k2-fsa:${HF_TOKEN}@huggingface.co/k2-fsa/$dst main || true
|
||||
|
||||
log "Upload models to https://github.com/k2-fsa/sherpa-onnx"
|
||||
rm -rf .git
|
||||
rm -fv .gitattributes
|
||||
cd ..
|
||||
tar cjfv $dst.tar.bz2 $dst
|
||||
ls -lh *.tar.bz2
|
||||
mv -v $dst.tar.bz2 ../../../
|
||||
done
|
||||
|
||||
log "----------------------------------------"
|
||||
log "Export streaming ONNX transducer models "
|
||||
log "----------------------------------------"
|
||||
|
||||
./zipformer/export-onnx-streaming.py \
|
||||
--exp-dir $repo/exp \
|
||||
--tokens $repo/data/lang_bpe_2000/tokens.txt \
|
||||
--causal 1 \
|
||||
--avg 1 \
|
||||
--epoch 20 \
|
||||
--use-averaged-model 0 \
|
||||
--chunk-size 16 \
|
||||
--left-context-frames 128 \
|
||||
--use-ctc 0 \
|
||||
--fp16 1
|
||||
|
||||
ls -lh $repo/exp
|
||||
|
||||
log "------------------------------------------------------------"
|
||||
log "Test exported streaming ONNX transducer models (Python code)"
|
||||
log "------------------------------------------------------------"
|
||||
|
||||
log "test fp32"
|
||||
./zipformer/onnx_pretrained-streaming.py \
|
||||
--encoder-model-filename $repo/exp/encoder-epoch-20-avg-1-chunk-16-left-128.onnx \
|
||||
--decoder-model-filename $repo/exp/decoder-epoch-20-avg-1-chunk-16-left-128.onnx \
|
||||
--joiner-model-filename $repo/exp/joiner-epoch-20-avg-1-chunk-16-left-128.onnx \
|
||||
--tokens $repo/data/lang_bpe_2000/tokens.txt \
|
||||
$repo/test_wavs/DEV_T0000000000.wav
|
||||
|
||||
log "test int8"
|
||||
./zipformer/onnx_pretrained-streaming.py \
|
||||
--encoder-model-filename $repo/exp/encoder-epoch-20-avg-1-chunk-16-left-128.int8.onnx \
|
||||
--decoder-model-filename $repo/exp/decoder-epoch-20-avg-1-chunk-16-left-128.onnx \
|
||||
--joiner-model-filename $repo/exp/joiner-epoch-20-avg-1-chunk-16-left-128.int8.onnx \
|
||||
--tokens $repo/data/lang_bpe_2000/tokens.txt \
|
||||
$repo/test_wavs/DEV_T0000000000.wav
|
||||
|
||||
log "test fp16"
|
||||
./zipformer/onnx_pretrained-streaming.py \
|
||||
--encoder-model-filename $repo/exp/encoder-epoch-20-avg-1-chunk-16-left-128.fp16.onnx \
|
||||
--decoder-model-filename $repo/exp/decoder-epoch-20-avg-1-chunk-16-left-128.fp16.onnx \
|
||||
--joiner-model-filename $repo/exp/joiner-epoch-20-avg-1-chunk-16-left-128.fp16.onnx \
|
||||
--tokens $repo/data/lang_bpe_2000/tokens.txt \
|
||||
$repo/test_wavs/DEV_T0000000000.wav
|
||||
|
||||
name=(
|
||||
sherpa-onnx-streaming-zipformer-multi-zh-hans-2023-12-13
|
||||
sherpa-onnx-streaming-zipformer-multi-zh-hans-int8-2023-12-13
|
||||
sherpa-onnx-streaming-zipformer-multi-zh-hans-fp16-2023-12-13
|
||||
)
|
||||
|
||||
for n in ${name[@]}; do
|
||||
url=https://huggingface.co/csukuangfj/$n
|
||||
GIT_LFS_SKIP_SMUDGE=1 git clone $url
|
||||
dst=$(basename $url)
|
||||
if [[ $n == sherpa-onnx-streaming-zipformer-multi-zh-hans-2023-12-13 ]]; then
|
||||
cp -v $repo/exp/encoder-epoch-20-avg-1-chunk-16-left-128.onnx $dst
|
||||
cp -v $repo/exp/decoder-epoch-20-avg-1-chunk-16-left-128.onnx $dst
|
||||
cp -v $repo/exp/joiner-epoch-20-avg-1-chunk-16-left-128.onnx $dst
|
||||
elif [[ $n == sherpa-onnx-streaming-zipformer-multi-zh-hans-int8-2023-12-13 ]]; then
|
||||
cp -v $repo/exp/encoder-epoch-20-avg-1-chunk-16-left-128.int8.onnx $dst
|
||||
cp -v $repo/exp/decoder-epoch-20-avg-1-chunk-16-left-128.onnx $dst
|
||||
cp -v $repo/exp/joiner-epoch-20-avg-1-chunk-16-left-128.int8.onnx $dst
|
||||
elif [[ $n == sherpa-onnx-streaming-zipformer-multi-zh-hans-fp16-2023-12-13 ]]; then
|
||||
cp -v $repo/exp/encoder-epoch-20-avg-1-chunk-16-left-128.fp16.onnx $dst
|
||||
cp -v $repo/exp/decoder-epoch-20-avg-1-chunk-16-left-128.fp16.onnx $dst
|
||||
cp -v $repo/exp/joiner-epoch-20-avg-1-chunk-16-left-128.fp16.onnx $dst
|
||||
fi
|
||||
|
||||
cp -v $repo/data/lang_bpe_2000/tokens.txt $dst
|
||||
cp -v $repo/data/lang_bpe_2000/bpe.model $dst
|
||||
mkdir -p $dst/test_wavs
|
||||
cp -v $repo/test_wavs/*.wav $dst/test_wavs
|
||||
cd $dst
|
||||
git lfs track "*.onnx" "bpe.model" "*.wav"
|
||||
ls -lh
|
||||
file bpe.model
|
||||
git status
|
||||
git add .
|
||||
git commit -m "upload model" && git push https://csukuangfj:${HF_TOKEN}@huggingface.co/csukuangfj/$dst main || true
|
||||
|
||||
log "Upload models to https://github.com/k2-fsa/sherpa-onnx"
|
||||
rm -rf .git
|
||||
rm -fv .gitattributes
|
||||
cd ..
|
||||
tar cjfv $dst.tar.bz2 $dst
|
||||
ls -lh *.tar.bz2
|
||||
mv -v $dst.tar.bz2 ../../../
|
||||
done
|
||||
}
|
||||
|
||||
function run_2023_12_12_streaming() {
|
||||
log "Upload onnx transducer models to huggingface"
|
||||
|
||||
url=https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-multi-zh-hans-2023-12-12
|
||||
GIT_LFS_SKIP_SMUDGE=1 git clone $url
|
||||
dst=$(basename $url)
|
||||
cp -v $repo/exp/encoder*.onnx $dst
|
||||
cp -v $repo/exp/decoder*.onnx $dst
|
||||
cp -v $repo/exp/joiner*.onnx $dst
|
||||
cp -v $repo/data/lang_bpe_2000/tokens.txt $dst
|
||||
cp -v $repo/data/lang_bpe_2000/bpe.model $dst
|
||||
mkdir -p $dst/test_wavs
|
||||
cp -v $repo/test_wavs/*.wav $dst/test_wavs
|
||||
cd $dst
|
||||
git lfs track "*.onnx" bpe.model "*.wav"
|
||||
git add .
|
||||
git commit -m "upload model" && git push https://k2-fsa:${HF_TOKEN}@huggingface.co/k2-fsa/$dst main || true
|
||||
|
||||
log "Upload models to https://github.com/k2-fsa/sherpa-onnx"
|
||||
rm -rf .git
|
||||
rm -fv .gitattributes
|
||||
cd ..
|
||||
tar cjfv $dst.tar.bz2 $dst
|
||||
ls -lh *.tar.bz2
|
||||
mv -v $dst.tar.bz2 ../../../
|
||||
}
|
||||
|
||||
function run_yuekai_large() {
|
||||
repo_url=https://csukuangfj:${HF_TOKEN}@huggingface.co/yuekai/icefall-asr-multi-zh-hans-zipformer-large
|
||||
log "Downloading pre-trained model from $repo_url"
|
||||
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
||||
repo=$(basename $repo_url)
|
||||
pushd $repo
|
||||
git lfs pull --include pretrained.pt
|
||||
mv pretrained.pt epoch-99.pt
|
||||
curl -SL -O https://huggingface.co/pingzxy/icefall-asr-multi-zh-hans-zipformer-large-onnx/resolve/main/tokens.txt
|
||||
popd
|
||||
|
||||
log "----------------------------------------"
|
||||
log "Export streaming ONNX CTC models "
|
||||
log "----------------------------------------"
|
||||
./zipformer/export-onnx-streaming-ctc.py \
|
||||
--exp-dir $repo/ \
|
||||
--tokens $repo/tokens.txt \
|
||||
--causal 1 \
|
||||
--avg 1 \
|
||||
--epoch 99 \
|
||||
--use-averaged-model 0 \
|
||||
--chunk-size 16 \
|
||||
--left-context-frames 128 \
|
||||
--use-ctc 1 \
|
||||
\
|
||||
--num-encoder-layers 2,2,4,5,4,2 \
|
||||
--feedforward-dim 768,1024,1536,2048,1536,768 \
|
||||
--encoder-dim 256,384,512,768,512,256 \
|
||||
--encoder-unmasked-dim 192,192,256,320,256,192 \
|
||||
\
|
||||
--fp16 1 \
|
||||
--use-whisper-features 1
|
||||
|
||||
|
||||
ls -lh $repo/
|
||||
pushd $repo
|
||||
|
||||
cat >README.md <<EOF
|
||||
# Introduction
|
||||
|
||||
This model is converted
|
||||
from
|
||||
https://huggingface.co/yuekai/icefall-asr-multi-zh-hans-zipformer-large
|
||||
|
||||
The training code can be found at
|
||||
https://github.com/k2-fsa/icefall/blob/master/egs/multi_zh-hans/ASR/RESULTS.md#multi-chinese-datasets-char-based-training-results-streaming-on-zipformer-large-model
|
||||
EOF
|
||||
|
||||
mv -v ctc-epoch-99-avg-1-chunk-16-left-128.fp16.onnx model.fp16.onnx
|
||||
mv -v ctc-epoch-99-avg-1-chunk-16-left-128.int8.onnx model.int8.onnx
|
||||
mv -v ctc-epoch-99-avg-1-chunk-16-left-128.onnx model.onnx
|
||||
|
||||
ls -lh *.onnx
|
||||
|
||||
mkdir test_wavs
|
||||
cd test_wavs
|
||||
curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-small-ctc-zh-int8-2025-04-01/resolve/main/test_wavs/0.wav
|
||||
curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-small-ctc-zh-int8-2025-04-01/resolve/main/test_wavs/1.wav
|
||||
curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-small-ctc-zh-int8-2025-04-01/resolve/main/test_wavs/8k.wav
|
||||
popd
|
||||
|
||||
for w in 0.wav 1.wav 8k.wav; do
|
||||
log "---fp32---"
|
||||
sherpa-onnx \
|
||||
--zipformer2-ctc-model=$repo/model.onnx \
|
||||
--tokens=$repo/tokens.txt \
|
||||
$repo/test_wavs/$w
|
||||
|
||||
log "---int8---"
|
||||
|
||||
sherpa-onnx \
|
||||
--zipformer2-ctc-model=$repo/model.int8.onnx \
|
||||
--tokens=$repo/tokens.txt \
|
||||
$repo/test_wavs/$w
|
||||
|
||||
log "---fp16---"
|
||||
|
||||
sherpa-onnx \
|
||||
--zipformer2-ctc-model=$repo/model.fp16.onnx \
|
||||
--tokens=$repo/tokens.txt \
|
||||
$repo/test_wavs/$w
|
||||
done
|
||||
|
||||
name=(
|
||||
sherpa-onnx-streaming-zipformer-ctc-zh-2025-06-30
|
||||
sherpa-onnx-streaming-zipformer-ctc-zh-int8-2025-06-30
|
||||
sherpa-onnx-streaming-zipformer-ctc-zh-fp16-2025-06-30
|
||||
)
|
||||
for n in ${name[@]}; do
|
||||
url=https://huggingface.co/csukuangfj/$n
|
||||
GIT_LFS_SKIP_SMUDGE=1 git clone $url
|
||||
dst=$(basename $url)
|
||||
if [[ $n == sherpa-onnx-streaming-zipformer-ctc-zh-2025-06-30 ]]; then
|
||||
cp -v $repo/model.onnx $dst
|
||||
elif [[ $n == sherpa-onnx-streaming-zipformer-ctc-zh-int8-2025-06-30 ]]; then
|
||||
cp -v $repo/model.int8.onnx $dst
|
||||
elif [[ $n == sherpa-onnx-streaming-zipformer-ctc-zh-fp16-2025-06-30 ]]; then
|
||||
cp -v $repo/model.fp16.onnx $dst
|
||||
fi
|
||||
|
||||
cp -v $repo/tokens.txt $dst
|
||||
cp -v $repo/README.md $dst
|
||||
mkdir -p $dst/test_wavs
|
||||
cp -v $repo/test_wavs/*.wav $dst/test_wavs
|
||||
cd $dst
|
||||
git lfs track "*.onnx" "*.wav"
|
||||
ls -lh
|
||||
git status
|
||||
git add .
|
||||
git commit -m "upload model" && git push https://csukuangfj:${HF_TOKEN}@huggingface.co/csukuangfj/$dst main || true
|
||||
|
||||
log "Upload models to https://github.com/k2-fsa/sherpa-onnx"
|
||||
rm -rf .git
|
||||
rm -fv .gitattributes
|
||||
cd ..
|
||||
tar cjfv $dst.tar.bz2 $dst
|
||||
ls -lh *.tar.bz2
|
||||
mv -v $dst.tar.bz2 ../../../
|
||||
done
|
||||
|
||||
rm $repo/*.onnx
|
||||
|
||||
log "----------------------------------------"
|
||||
log "Export streaming ONNX transducer models "
|
||||
log "----------------------------------------"
|
||||
|
||||
./zipformer/export-onnx-streaming.py \
|
||||
--exp-dir $repo \
|
||||
--tokens $repo/tokens.txt \
|
||||
--causal 1 \
|
||||
--avg 1 \
|
||||
--epoch 99 \
|
||||
--use-averaged-model 0 \
|
||||
--chunk-size 16 \
|
||||
--left-context-frames 128 \
|
||||
--use-ctc 0 \
|
||||
\
|
||||
--num-encoder-layers 2,2,4,5,4,2 \
|
||||
--feedforward-dim 768,1024,1536,2048,1536,768 \
|
||||
--encoder-dim 256,384,512,768,512,256 \
|
||||
--encoder-unmasked-dim 192,192,256,320,256,192 \
|
||||
\
|
||||
--fp16 1 \
|
||||
--use-whisper-features 1
|
||||
|
||||
ls -lh $repo
|
||||
pushd $repo
|
||||
for m in encoder decoder joiner; do
|
||||
mv -v $m-epoch-99-avg-1-chunk-16-left-128.onnx $m.onnx
|
||||
mv -v $m-epoch-99-avg-1-chunk-16-left-128.fp16.onnx $m.fp16.onnx
|
||||
mv -v $m-epoch-99-avg-1-chunk-16-left-128.int8.onnx $m.int8.onnx
|
||||
done
|
||||
ls -lh *.onnx
|
||||
popd
|
||||
|
||||
for w in 0.wav 1.wav 8k.wav; do
|
||||
log "---fp32---"
|
||||
sherpa-onnx \
|
||||
--encoder=$repo/encoder.onnx \
|
||||
--decoder=$repo/decoder.onnx \
|
||||
--joiner=$repo/joiner.onnx \
|
||||
--tokens=$repo/tokens.txt \
|
||||
$repo/test_wavs/$w
|
||||
|
||||
log "---int8---"
|
||||
|
||||
sherpa-onnx \
|
||||
--encoder=$repo/encoder.int8.onnx \
|
||||
--decoder=$repo/decoder.onnx \
|
||||
--joiner=$repo/joiner.int8.onnx \
|
||||
--tokens=$repo/tokens.txt \
|
||||
$repo/test_wavs/$w
|
||||
|
||||
log "---fp16---"
|
||||
|
||||
sherpa-onnx \
|
||||
--encoder=$repo/encoder.fp16.onnx \
|
||||
--decoder=$repo/decoder.fp16.onnx \
|
||||
--joiner=$repo/joiner.fp16.onnx \
|
||||
--tokens=$repo/tokens.txt \
|
||||
$repo/test_wavs/$w
|
||||
done
|
||||
|
||||
name=(
|
||||
sherpa-onnx-streaming-zipformer-zh-2025-06-30
|
||||
sherpa-onnx-streaming-zipformer-zh-int8-2025-06-30
|
||||
sherpa-onnx-streaming-zipformer-zh-fp16-2025-06-30
|
||||
)
|
||||
for n in ${name[@]}; do
|
||||
url=https://huggingface.co/csukuangfj/$n
|
||||
GIT_LFS_SKIP_SMUDGE=1 git clone $url
|
||||
dst=$(basename $url)
|
||||
if [[ $n == sherpa-onnx-streaming-zipformer-zh-2025-06-30 ]]; then
|
||||
cp -v $repo/encoder.onnx $dst
|
||||
cp -v $repo/decoder.onnx $dst
|
||||
cp -v $repo/joiner.onnx $dst
|
||||
elif [[ $n == sherpa-onnx-streaming-zipformer-zh-int8-2025-06-30 ]]; then
|
||||
cp -v $repo/encoder.int8.onnx $dst
|
||||
cp -v $repo/decoder.onnx $dst
|
||||
cp -v $repo/joiner.int8.onnx $dst
|
||||
elif [[ $n == sherpa-onnx-streaming-zipformer-zh-fp16-2025-06-30 ]]; then
|
||||
cp -v $repo/encoder.fp16.onnx $dst
|
||||
cp -v $repo/decoder.fp16.onnx $dst
|
||||
cp -v $repo/joiner.fp16.onnx $dst
|
||||
fi
|
||||
|
||||
cp -v $repo/tokens.txt $dst
|
||||
cp -v $repo/README.md $dst
|
||||
mkdir -p $dst/test_wavs
|
||||
cp -v $repo/test_wavs/*.wav $dst/test_wavs
|
||||
cd $dst
|
||||
git lfs track "*.onnx" "*.wav"
|
||||
ls -lh
|
||||
git status
|
||||
git add .
|
||||
git commit -m "upload model" && git push https://csukuangfj:${HF_TOKEN}@huggingface.co/csukuangfj/$dst main || true
|
||||
|
||||
log "Upload models to https://github.com/k2-fsa/sherpa-onnx"
|
||||
rm -rf .git
|
||||
rm -fv .gitattributes
|
||||
cd ..
|
||||
tar cjfv $dst.tar.bz2 $dst
|
||||
ls -lh *.tar.bz2
|
||||
mv -v $dst.tar.bz2 ../../../
|
||||
done
|
||||
}
|
||||
|
||||
function run_yuekai_xl() {
|
||||
repo_url=https://csukuangfj:${HF_TOKEN}@huggingface.co/yuekai/icefall-asr-multi-zh-hans-zipformer-xl
|
||||
log "Downloading pre-trained model from $repo_url"
|
||||
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
||||
repo=$(basename $repo_url)
|
||||
|
||||
pushd $repo
|
||||
git lfs pull --include pretrained.pt
|
||||
git lfs pull --include data/lang_bpe_2000/bpe.model
|
||||
mv pretrained.pt epoch-99.pt
|
||||
ls -lh *.pt
|
||||
popd
|
||||
|
||||
log "----------------------------------------"
|
||||
log "Export streaming ONNX CTC models "
|
||||
log "----------------------------------------"
|
||||
./zipformer/export-onnx-streaming-ctc.py \
|
||||
--exp-dir $repo/ \
|
||||
--tokens $repo/data/lang_bpe_2000/tokens.txt \
|
||||
--causal 1 \
|
||||
--avg 1 \
|
||||
--epoch 99 \
|
||||
--use-averaged-model 0 \
|
||||
--chunk-size 16 \
|
||||
--left-context-frames 128 \
|
||||
--use-ctc 1 \
|
||||
\
|
||||
--num-encoder-layers 2,3,5,6,5,3 \
|
||||
--feedforward-dim 1536,2048,3072,4096,3072,1536 \
|
||||
--encoder-dim 512,768,1024,1536,1024,512 \
|
||||
--encoder-unmasked-dim 192,192,256,320,256,192 \
|
||||
--decoder-dim 768 --joiner-dim 768 \
|
||||
--value-head-dim 18 \
|
||||
--query-head-dim 48 \
|
||||
--num-heads 4,4,4,8,4,4 \
|
||||
\
|
||||
--fp16 1 \
|
||||
--use-whisper-features 1 \
|
||||
--use-external-data 1
|
||||
|
||||
mv -v ctc-epoch-99-avg-1-chunk-16-left-128.int8.onnx model.int8.onnx
|
||||
mv -v ctc-epoch-99-avg-1-chunk-16-left-128.fp16.onnx model.fp16.onnx
|
||||
|
||||
ls -lh *.onnx
|
||||
|
||||
mkdir test_wavs
|
||||
pushd test_wavs
|
||||
curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-small-ctc-zh-int8-2025-04-01/resolve/main/test_wavs/0.wav
|
||||
curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-small-ctc-zh-int8-2025-04-01/resolve/main/test_wavs/1.wav
|
||||
curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-small-ctc-zh-int8-2025-04-01/resolve/main/test_wavs/8k.wav
|
||||
popd
|
||||
|
||||
for w in 0.wav 1.wav 8k.wav; do
|
||||
log "---int8---"
|
||||
|
||||
sherpa-onnx \
|
||||
--zipformer2-ctc-model=./model.int8.onnx \
|
||||
--tokens=$repo/data/lang_bpe_2000/tokens.txt \
|
||||
test_wavs/$w
|
||||
|
||||
log "---fp16---"
|
||||
|
||||
sherpa-onnx \
|
||||
--zipformer2-ctc-model=./model.fp16.onnx \
|
||||
--tokens=$repo/data/lang_bpe_2000/tokens.txt \
|
||||
test_wavs/$w
|
||||
done
|
||||
|
||||
pushd $repo
|
||||
cat >README.md <<EOF
|
||||
# Introduction
|
||||
|
||||
This model is converted
|
||||
from
|
||||
https://huggingface.co/yuekai/icefall-asr-multi-zh-hans-zipformer-xl
|
||||
|
||||
The training code can be found at
|
||||
https://github.com/k2-fsa/icefall/blob/master/egs/multi_zh-hans/ASR/RESULTS.md#multi-chinese-datasets-char-based-training-results-streaming-on-zipformer-xl-model
|
||||
EOF
|
||||
popd
|
||||
|
||||
name=(
|
||||
sherpa-onnx-streaming-zipformer-ctc-zh-xlarge-int8-2025-06-30
|
||||
sherpa-onnx-streaming-zipformer-ctc-zh-xlarge-fp16-2025-06-30
|
||||
)
|
||||
|
||||
for n in ${name[@]}; do
|
||||
url=https://huggingface.co/csukuangfj/$n
|
||||
GIT_LFS_SKIP_SMUDGE=1 git clone $url
|
||||
dst=$(basename $url)
|
||||
if [[ $n == sherpa-onnx-streaming-zipformer-ctc-zh-xlarge-fp16-2025-06-30 ]]; then
|
||||
cp -v model.fp16.onnx $dst
|
||||
elif [[ $n == sherpa-onnx-streaming-zipformer-ctc-zh-xlarge-int8-2025-06-30 ]]; then
|
||||
cp -v model.int8.onnx $dst
|
||||
fi
|
||||
|
||||
cp -v $repo/data/lang_bpe_2000/tokens.txt $dst
|
||||
cp -v $repo/data/lang_bpe_2000/bpe.model $dst
|
||||
cp -v $repo/README.md $dst
|
||||
mkdir -p $dst/test_wavs
|
||||
cp -v ./test_wavs/*.wav $dst/test_wavs
|
||||
cd $dst
|
||||
git lfs track "*.onnx" "*.wav" "bpe.model"
|
||||
ls -lh
|
||||
git status
|
||||
git add .
|
||||
git commit -m "upload model" && git push https://csukuangfj:${HF_TOKEN}@huggingface.co/csukuangfj/$dst main || true
|
||||
|
||||
log "Upload models to https://github.com/k2-fsa/sherpa-onnx"
|
||||
rm -rf .git
|
||||
rm -fv .gitattributes
|
||||
cd ..
|
||||
|
||||
ls -lh $dst
|
||||
tar cjfv $dst.tar.bz2 $dst
|
||||
ls -lh *.tar.bz2
|
||||
mv -v $dst.tar.bz2 ../../../
|
||||
done
|
||||
|
||||
rm -fv *.onnx *.weights
|
||||
|
||||
log "----------------------------------------"
|
||||
log "Export streaming ONNX transducer models "
|
||||
log "----------------------------------------"
|
||||
|
||||
./zipformer/export-onnx-streaming.py \
|
||||
--exp-dir $repo/ \
|
||||
--tokens $repo/data/lang_bpe_2000/tokens.txt \
|
||||
--causal 1 \
|
||||
--avg 1 \
|
||||
--epoch 99 \
|
||||
--use-averaged-model 0 \
|
||||
--chunk-size 16 \
|
||||
--left-context-frames 128 \
|
||||
--use-ctc 0 \
|
||||
\
|
||||
--num-encoder-layers 2,3,5,6,5,3 \
|
||||
--feedforward-dim 1536,2048,3072,4096,3072,1536 \
|
||||
--encoder-dim 512,768,1024,1536,1024,512 \
|
||||
--encoder-unmasked-dim 192,192,256,320,256,192 \
|
||||
--decoder-dim 768 --joiner-dim 768 \
|
||||
--value-head-dim 18 \
|
||||
--query-head-dim 48 \
|
||||
--num-heads 4,4,4,8,4,4 \
|
||||
\
|
||||
--fp16 1 \
|
||||
--use-whisper-features 1 \
|
||||
--use-external-data 1
|
||||
|
||||
ls -lh *.onnx
|
||||
ls -lh *.weights
|
||||
|
||||
mv encoder-epoch-99-avg-1-chunk-16-left-128.fp16.onnx encoder.fp16.onnx
|
||||
mv encoder-epoch-99-avg-1-chunk-16-left-128.int8.onnx encoder.int8.onnx
|
||||
|
||||
mv $repo/decoder-epoch-99-avg-1-chunk-16-left-128.onnx decoder.onnx
|
||||
mv $repo/decoder-epoch-99-avg-1-chunk-16-left-128.fp16.onnx decoder.fp16.onnx
|
||||
|
||||
mv $repo/joiner-epoch-99-avg-1-chunk-16-left-128.int8.onnx joiner.int8.onnx
|
||||
mv $repo/joiner-epoch-99-avg-1-chunk-16-left-128.fp16.onnx joiner.fp16.onnx
|
||||
|
||||
name=(
|
||||
sherpa-onnx-streaming-zipformer-zh-xlarge-int8-2025-06-30
|
||||
sherpa-onnx-streaming-zipformer-zh-xlarge-fp16-2025-06-30
|
||||
)
|
||||
|
||||
for n in ${name[@]}; do
|
||||
url=https://huggingface.co/csukuangfj/$n
|
||||
GIT_LFS_SKIP_SMUDGE=1 git clone $url
|
||||
dst=$(basename $url)
|
||||
if [[ $n == sherpa-onnx-streaming-zipformer-zh-xlarge-fp16-2025-06-30 ]]; then
|
||||
cp -v encoder.fp16.onnx $dst
|
||||
cp -v decoder.fp16.onnx $dst
|
||||
cp -v joiner.fp16.onnx $dst
|
||||
elif [[ $n == sherpa-onnx-streaming-zipformer-zh-xlarge-int8-2025-06-30 ]]; then
|
||||
cp -v encoder.int8.onnx $dst
|
||||
cp -v decoder.onnx $dst
|
||||
cp -v joiner.int8.onnx $dst
|
||||
fi
|
||||
|
||||
cp -v $repo/data/lang_bpe_2000/tokens.txt $dst
|
||||
cp -v $repo/data/lang_bpe_2000/bpe.model $dst
|
||||
cp -v $repo/README.md $dst
|
||||
mkdir -p $dst/test_wavs
|
||||
cp -v ./test_wavs/*.wav $dst/test_wavs
|
||||
cd $dst
|
||||
git lfs track "*.onnx" "*.wav" "bpe.model"
|
||||
ls -lh
|
||||
git status
|
||||
git add .
|
||||
git commit -m "upload model" && git push https://csukuangfj:${HF_TOKEN}@huggingface.co/csukuangfj/$dst main || true
|
||||
|
||||
log "Upload models to https://github.com/k2-fsa/sherpa-onnx"
|
||||
rm -rf .git
|
||||
rm -fv .gitattributes
|
||||
cd ..
|
||||
|
||||
ls -lh $dst
|
||||
tar cjfv $dst.tar.bz2 $dst
|
||||
ls -lh *.tar.bz2
|
||||
mv -v $dst.tar.bz2 ../../../
|
||||
done
|
||||
|
||||
rm -fv *.onnx *.weights
|
||||
}
|
||||
|
||||
# run_yuekai_large
|
||||
# run_yuekai_xl
|
||||
# run_2023_9_2
|
||||
run_2023_11_05_streaming
|
||||
# run_2023_12_12_streaming
|
6
.github/workflows/aishell.yml
vendored
6
.github/workflows/aishell.yml
vendored
@ -17,7 +17,7 @@ concurrency:
|
||||
|
||||
jobs:
|
||||
generate_build_matrix:
|
||||
if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && (github.event.label.name == 'ready' || github.event_name == 'push' || github.event_name == 'aishell')
|
||||
if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa'
|
||||
|
||||
# see https://github.com/pytorch/pytorch/pull/50633
|
||||
runs-on: ubuntu-latest
|
||||
@ -31,8 +31,8 @@ jobs:
|
||||
id: set-matrix
|
||||
run: |
|
||||
# outputting for debugging purposes
|
||||
python ./.github/scripts/docker/generate_build_matrix.py
|
||||
MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py)
|
||||
python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10"
|
||||
MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10")
|
||||
echo "::set-output name=matrix::${MATRIX}"
|
||||
aishell:
|
||||
needs: generate_build_matrix
|
||||
|
10
.github/workflows/audioset.yml
vendored
10
.github/workflows/audioset.yml
vendored
@ -30,8 +30,8 @@ jobs:
|
||||
id: set-matrix
|
||||
run: |
|
||||
# outputting for debugging purposes
|
||||
python ./.github/scripts/docker/generate_build_matrix.py
|
||||
MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py)
|
||||
python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10"
|
||||
MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10")
|
||||
echo "::set-output name=matrix::${MATRIX}"
|
||||
|
||||
audioset:
|
||||
@ -83,7 +83,7 @@ jobs:
|
||||
ls -lh ./model-onnx/*
|
||||
|
||||
- name: Upload model to huggingface
|
||||
if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0' && github.event_name == 'push'
|
||||
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0' && github.event_name == 'push'
|
||||
env:
|
||||
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
||||
uses: nick-fields/retry@v3
|
||||
@ -116,7 +116,7 @@ jobs:
|
||||
rm -rf huggingface
|
||||
|
||||
- name: Prepare for release
|
||||
if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0' && github.event_name == 'push'
|
||||
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0' && github.event_name == 'push'
|
||||
shell: bash
|
||||
run: |
|
||||
d=sherpa-onnx-zipformer-audio-tagging-2024-04-09
|
||||
@ -125,7 +125,7 @@ jobs:
|
||||
ls -lh
|
||||
|
||||
- name: Release exported onnx models
|
||||
if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0' && github.event_name == 'push'
|
||||
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0' && github.event_name == 'push'
|
||||
uses: svenstaro/upload-release-action@v2
|
||||
with:
|
||||
file_glob: true
|
||||
|
20
.github/workflows/baker_zh.yml
vendored
20
.github/workflows/baker_zh.yml
vendored
@ -31,8 +31,8 @@ jobs:
|
||||
id: set-matrix
|
||||
run: |
|
||||
# outputting for debugging purposes
|
||||
python ./.github/scripts/docker/generate_build_matrix.py --min-torch-version "2.3"
|
||||
MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py --min-torch-version "2.3")
|
||||
python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10"
|
||||
MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10")
|
||||
echo "::set-output name=matrix::${MATRIX}"
|
||||
|
||||
baker_zh:
|
||||
@ -84,43 +84,43 @@ jobs:
|
||||
ls -lh
|
||||
|
||||
- uses: actions/upload-artifact@v4
|
||||
if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0'
|
||||
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
|
||||
with:
|
||||
name: generated-test-files-${{ matrix.python-version }}-${{ matrix.torch-version }}
|
||||
path: ./*.wav
|
||||
|
||||
- uses: actions/upload-artifact@v4
|
||||
if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0'
|
||||
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
|
||||
with:
|
||||
name: step-2
|
||||
path: ./model-steps-2.onnx
|
||||
|
||||
- uses: actions/upload-artifact@v4
|
||||
if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0'
|
||||
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
|
||||
with:
|
||||
name: step-3
|
||||
path: ./model-steps-3.onnx
|
||||
|
||||
- uses: actions/upload-artifact@v4
|
||||
if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0'
|
||||
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
|
||||
with:
|
||||
name: step-4
|
||||
path: ./model-steps-4.onnx
|
||||
|
||||
- uses: actions/upload-artifact@v4
|
||||
if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0'
|
||||
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
|
||||
with:
|
||||
name: step-5
|
||||
path: ./model-steps-5.onnx
|
||||
|
||||
- uses: actions/upload-artifact@v4
|
||||
if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0'
|
||||
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
|
||||
with:
|
||||
name: step-6
|
||||
path: ./model-steps-6.onnx
|
||||
|
||||
- name: Upload models to huggingface
|
||||
if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0' && github.event_name == 'push'
|
||||
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0' && github.event_name == 'push'
|
||||
shell: bash
|
||||
env:
|
||||
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
||||
@ -141,7 +141,7 @@ jobs:
|
||||
popd
|
||||
|
||||
- name: Release exported onnx models
|
||||
if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0' && github.event_name == 'push'
|
||||
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0' && github.event_name == 'push'
|
||||
uses: svenstaro/upload-release-action@v2
|
||||
with:
|
||||
file_glob: true
|
||||
|
4
.github/workflows/build-doc.yml
vendored
4
.github/workflows/build-doc.yml
vendored
@ -34,7 +34,7 @@ concurrency:
|
||||
|
||||
jobs:
|
||||
build-doc:
|
||||
if: github.event.label.name == 'doc' || github.event_name == 'push'
|
||||
# if: github.event.label.name == 'doc' || github.event_name == 'push'
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
@ -43,7 +43,7 @@ jobs:
|
||||
python-version: ["3.8"]
|
||||
steps:
|
||||
# refer to https://github.com/actions/checkout
|
||||
- uses: actions/checkout@v2
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
|
5
.github/workflows/librispeech.yml
vendored
5
.github/workflows/librispeech.yml
vendored
@ -29,8 +29,9 @@ jobs:
|
||||
id: set-matrix
|
||||
run: |
|
||||
# outputting for debugging purposes
|
||||
python ./.github/scripts/docker/generate_build_matrix.py
|
||||
MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py)
|
||||
python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10"
|
||||
# MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10")
|
||||
MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10" --min-torch-version "2.6.0")
|
||||
echo "::set-output name=matrix::${MATRIX}"
|
||||
librispeech:
|
||||
needs: generate_build_matrix
|
||||
|
22
.github/workflows/ljspeech.yml
vendored
22
.github/workflows/ljspeech.yml
vendored
@ -30,8 +30,8 @@ jobs:
|
||||
id: set-matrix
|
||||
run: |
|
||||
# outputting for debugging purposes
|
||||
python ./.github/scripts/docker/generate_build_matrix.py --min-torch-version "2.3"
|
||||
MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py --min-torch-version "2.3")
|
||||
python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10"
|
||||
MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10")
|
||||
echo "::set-output name=matrix::${MATRIX}"
|
||||
|
||||
ljspeech:
|
||||
@ -83,13 +83,13 @@ jobs:
|
||||
ls -lh
|
||||
|
||||
- uses: actions/upload-artifact@v4
|
||||
if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0'
|
||||
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
|
||||
with:
|
||||
name: generated-test-files-${{ matrix.python-version }}-${{ matrix.torch-version }}
|
||||
path: ./*.wav
|
||||
|
||||
- name: Release exported onnx models
|
||||
if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0' && github.event_name == 'push'
|
||||
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0' && github.event_name == 'push'
|
||||
uses: svenstaro/upload-release-action@v2
|
||||
with:
|
||||
file_glob: true
|
||||
@ -100,37 +100,37 @@ jobs:
|
||||
tag: tts-models
|
||||
|
||||
- uses: actions/upload-artifact@v4
|
||||
if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0'
|
||||
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
|
||||
with:
|
||||
name: step-2
|
||||
path: ./model-steps-2.onnx
|
||||
|
||||
- uses: actions/upload-artifact@v4
|
||||
if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0'
|
||||
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
|
||||
with:
|
||||
name: step-3
|
||||
path: ./model-steps-3.onnx
|
||||
|
||||
- uses: actions/upload-artifact@v4
|
||||
if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0'
|
||||
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
|
||||
with:
|
||||
name: step-4
|
||||
path: ./model-steps-4.onnx
|
||||
|
||||
- uses: actions/upload-artifact@v4
|
||||
if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0'
|
||||
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
|
||||
with:
|
||||
name: step-5
|
||||
path: ./model-steps-5.onnx
|
||||
|
||||
- uses: actions/upload-artifact@v4
|
||||
if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0'
|
||||
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
|
||||
with:
|
||||
name: step-6
|
||||
path: ./model-steps-6.onnx
|
||||
|
||||
- name: Upload models to huggingface
|
||||
if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0'
|
||||
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
|
||||
shell: bash
|
||||
env:
|
||||
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
||||
@ -155,7 +155,7 @@ jobs:
|
||||
popd
|
||||
|
||||
- name: Release exported onnx models
|
||||
if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0'
|
||||
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
|
||||
uses: svenstaro/upload-release-action@v2
|
||||
with:
|
||||
file_glob: true
|
||||
|
85
.github/workflows/multi-zh-hans.yml
vendored
85
.github/workflows/multi-zh-hans.yml
vendored
@ -1,4 +1,4 @@
|
||||
name: run-multi-zh-hans
|
||||
name: multi-zh-hans
|
||||
|
||||
on:
|
||||
push:
|
||||
@ -8,65 +8,72 @@ on:
|
||||
workflow_dispatch:
|
||||
|
||||
concurrency:
|
||||
group: run-multi-zh-hans-${{ github.ref }}
|
||||
group: multi-zh-hans-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
|
||||
jobs:
|
||||
generate_build_matrix:
|
||||
if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa'
|
||||
# see https://github.com/pytorch/pytorch/pull/50633
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- name: Generating build matrix
|
||||
id: set-matrix
|
||||
run: |
|
||||
# outputting for debugging purposes
|
||||
python ./.github/scripts/docker/generate_build_matrix.py --torch-version "2.7.0" --python-version "3.11"
|
||||
MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py --torch-version "2.7.0" --python-version "3.11")
|
||||
echo "::set-output name=matrix::${MATRIX}"
|
||||
multi-zh-hans:
|
||||
runs-on: ${{ matrix.os }}
|
||||
needs: generate_build_matrix
|
||||
name: py${{ matrix.python-version }} torch${{ matrix.torch-version }} v${{ matrix.version }}
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [ubuntu-latest]
|
||||
python-version: [3.8]
|
||||
${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Setup Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
cache: 'pip'
|
||||
cache-dependency-path: '**/requirements-ci.txt'
|
||||
|
||||
- name: Install Python dependencies
|
||||
run: |
|
||||
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
|
||||
pip uninstall -y protobuf
|
||||
pip install --no-binary protobuf protobuf==3.20.*
|
||||
|
||||
- name: Cache kaldifeat
|
||||
id: my-cache
|
||||
uses: actions/cache@v2
|
||||
with:
|
||||
path: |
|
||||
~/tmp/kaldifeat
|
||||
key: cache-tmp-${{ matrix.python-version }}-2023-05-22
|
||||
|
||||
- name: Install kaldifeat
|
||||
if: steps.my-cache.outputs.cache-hit != 'true'
|
||||
- name: Free space
|
||||
shell: bash
|
||||
run: |
|
||||
.github/scripts/install-kaldifeat.sh
|
||||
df -h
|
||||
rm -rf /opt/hostedtoolcache
|
||||
df -h
|
||||
echo "pwd: $PWD"
|
||||
echo "github.workspace ${{ github.workspace }}"
|
||||
|
||||
- name: export-model
|
||||
- name: Test with multi_zh-hans
|
||||
uses: addnab/docker-run-action@v3
|
||||
with:
|
||||
image: ghcr.io/${{ github.repository_owner }}/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-v${{ matrix.version }}
|
||||
options: |
|
||||
--volume ${{ github.workspace }}/:/icefall
|
||||
shell: bash
|
||||
run: |
|
||||
export PYTHONPATH=/icefall:$PYTHONPATH
|
||||
export HF_TOKEN=${{ secrets.HF_TOKEN }}
|
||||
cd /icefall
|
||||
git config --global --add safe.directory /icefall
|
||||
|
||||
.github/scripts/multi_zh-hans/ASR/run.sh
|
||||
|
||||
- name: Show models
|
||||
shell: bash
|
||||
env:
|
||||
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
||||
run: |
|
||||
sudo apt-get -qq install git-lfs tree
|
||||
export PYTHONPATH=$PWD:$PYTHONPATH
|
||||
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
|
||||
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
|
||||
|
||||
.github/scripts/multi-zh-hans.sh
|
||||
ls -lh
|
||||
ls -lh *.tar.bz2
|
||||
|
||||
- name: upload model to https://github.com/k2-fsa/sherpa-onnx
|
||||
uses: svenstaro/upload-release-action@v2
|
||||
|
4
.github/workflows/test.yml
vendored
4
.github/workflows/test.yml
vendored
@ -30,8 +30,8 @@ jobs:
|
||||
id: set-matrix
|
||||
run: |
|
||||
# outputting for debugging purposes
|
||||
python ./.github/scripts/docker/generate_build_matrix.py
|
||||
MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py)
|
||||
python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10"
|
||||
MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10")
|
||||
echo "::set-output name=matrix::${MATRIX}"
|
||||
test:
|
||||
needs: generate_build_matrix
|
||||
|
5
.github/workflows/yesno.yml
vendored
5
.github/workflows/yesno.yml
vendored
@ -30,8 +30,9 @@ jobs:
|
||||
id: set-matrix
|
||||
run: |
|
||||
# outputting for debugging purposes
|
||||
python ./.github/scripts/docker/generate_build_matrix.py
|
||||
MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py)
|
||||
python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10"
|
||||
MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10")
|
||||
# MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10" --min-torch-version "2.5.0")
|
||||
echo "::set-output name=matrix::${MATRIX}"
|
||||
yesno:
|
||||
needs: generate_build_matrix
|
||||
|
@ -79,7 +79,13 @@ from icefall.checkpoint import save_checkpoint_with_global_batch_idx
|
||||
from icefall.dist import cleanup_dist, setup_dist
|
||||
from icefall.env import get_env_info
|
||||
from icefall.lexicon import Lexicon
|
||||
from icefall.utils import AttributeDict, MetricsTracker, setup_logger, str2bool
|
||||
from icefall.utils import (
|
||||
AttributeDict,
|
||||
MetricsTracker,
|
||||
setup_logger,
|
||||
str2bool,
|
||||
torch_autocast,
|
||||
)
|
||||
|
||||
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
|
||||
|
||||
@ -638,7 +644,7 @@ def train_one_epoch(
|
||||
params.batch_idx_train += 1
|
||||
batch_size = len(batch["supervisions"]["text"])
|
||||
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, loss_info = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
@ -912,7 +918,7 @@ def scan_pessimistic_batches_for_oom(
|
||||
# warmup = 0.0 is so that the derivs for the pruned loss stay zero
|
||||
# (i.e. are not remembered by the decaying-average in adam), because
|
||||
# we want to avoid these params being subject to shrinkage in adam.
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, _ = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
|
@ -72,7 +72,13 @@ from icefall.checkpoint import save_checkpoint_with_global_batch_idx
|
||||
from icefall.dist import cleanup_dist, setup_dist
|
||||
from icefall.env import get_env_info
|
||||
from icefall.lexicon import Lexicon
|
||||
from icefall.utils import AttributeDict, MetricsTracker, setup_logger, str2bool
|
||||
from icefall.utils import (
|
||||
AttributeDict,
|
||||
MetricsTracker,
|
||||
setup_logger,
|
||||
str2bool,
|
||||
torch_autocast,
|
||||
)
|
||||
|
||||
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
|
||||
|
||||
@ -688,7 +694,7 @@ def train_one_epoch(
|
||||
batch_size = len(batch["supervisions"]["text"])
|
||||
|
||||
try:
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, loss_info = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
@ -989,7 +995,7 @@ def scan_pessimistic_batches_for_oom(
|
||||
# warmup = 0.0 is so that the derivs for the pruned loss stay zero
|
||||
# (i.e. are not remembered by the decaying-average in adam), because
|
||||
# we want to avoid these params being subject to shrinkage in adam.
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, _ = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
|
@ -23,7 +23,7 @@ import torch.nn as nn
|
||||
from encoder_interface import EncoderInterface
|
||||
from scaling import ScaledLinear
|
||||
|
||||
from icefall.utils import add_sos
|
||||
from icefall.utils import add_sos, torch_autocast
|
||||
|
||||
|
||||
class Transducer(nn.Module):
|
||||
@ -184,7 +184,7 @@ class Transducer(nn.Module):
|
||||
lm = simple_lm_proj(decoder_out)
|
||||
am = simple_am_proj(encoder_out)
|
||||
|
||||
with torch.cuda.amp.autocast(enabled=False):
|
||||
with torch_autocast(enabled=False):
|
||||
simple_loss, (px_grad, py_grad) = k2.rnnt_loss_smoothed(
|
||||
lm=lm.float(),
|
||||
am=am.float(),
|
||||
@ -219,7 +219,7 @@ class Transducer(nn.Module):
|
||||
# prior to do_rnnt_pruning (this is an optimization for speed).
|
||||
logits = joiner(am_pruned, lm_pruned, project_input=False)
|
||||
|
||||
with torch.cuda.amp.autocast(enabled=False):
|
||||
with torch_autocast(enabled=False):
|
||||
pruned_loss = k2.rnnt_loss_pruned(
|
||||
logits=logits.float(),
|
||||
symbols=y_padded,
|
||||
|
@ -94,7 +94,13 @@ from icefall.checkpoint import (
|
||||
from icefall.dist import cleanup_dist, setup_dist
|
||||
from icefall.env import get_env_info
|
||||
from icefall.lexicon import Lexicon
|
||||
from icefall.utils import AttributeDict, MetricsTracker, setup_logger, str2bool
|
||||
from icefall.utils import (
|
||||
AttributeDict,
|
||||
MetricsTracker,
|
||||
setup_logger,
|
||||
str2bool,
|
||||
torch_autocast,
|
||||
)
|
||||
|
||||
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
|
||||
|
||||
@ -797,7 +803,7 @@ def train_one_epoch(
|
||||
aishell = is_aishell(batch["supervisions"]["cut"][0])
|
||||
|
||||
try:
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, loss_info = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
@ -1202,7 +1208,7 @@ def scan_pessimistic_batches_for_oom(
|
||||
# warmup = 0.0 is so that the derivs for the pruned loss stay zero
|
||||
# (i.e. are not remembered by the decaying-average in adam), because
|
||||
# we want to avoid these params being subject to shrinkage in adam.
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, _ = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
|
@ -94,6 +94,7 @@ from icefall.utils import (
|
||||
filter_uneven_sized_batch,
|
||||
setup_logger,
|
||||
str2bool,
|
||||
torch_autocast,
|
||||
)
|
||||
|
||||
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
|
||||
@ -809,7 +810,7 @@ def train_one_epoch(
|
||||
batch_size = len(batch["supervisions"]["text"])
|
||||
|
||||
try:
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, loss_info = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
@ -1206,7 +1207,7 @@ def scan_pessimistic_batches_for_oom(
|
||||
for criterion, cuts in batches.items():
|
||||
batch = train_dl.dataset[cuts]
|
||||
try:
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, _ = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
|
@ -87,6 +87,7 @@ from icefall.utils import (
|
||||
setup_logger,
|
||||
str2bool,
|
||||
tokenize_by_CJK_char,
|
||||
torch_autocast,
|
||||
)
|
||||
|
||||
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
|
||||
@ -802,7 +803,7 @@ def train_one_epoch(
|
||||
batch_size = len(batch["supervisions"]["text"])
|
||||
|
||||
try:
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, loss_info = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
@ -1202,7 +1203,7 @@ def scan_pessimistic_batches_for_oom(
|
||||
for criterion, cuts in batches.items():
|
||||
batch = train_dl.dataset[cuts]
|
||||
try:
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, _ = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
|
@ -81,7 +81,13 @@ from icefall.env import get_env_info
|
||||
from icefall.err import raise_grad_scale_is_too_small_error
|
||||
from icefall.hooks import register_inf_check_hooks
|
||||
from icefall.lexicon import Lexicon
|
||||
from icefall.utils import AttributeDict, MetricsTracker, setup_logger, str2bool
|
||||
from icefall.utils import (
|
||||
AttributeDict,
|
||||
MetricsTracker,
|
||||
setup_logger,
|
||||
str2bool,
|
||||
torch_autocast,
|
||||
)
|
||||
|
||||
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
|
||||
|
||||
@ -812,7 +818,7 @@ def train_one_epoch(
|
||||
batch_size = len(batch["supervisions"]["text"])
|
||||
|
||||
try:
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, loss_info = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
@ -1202,7 +1208,7 @@ def scan_pessimistic_batches_for_oom(
|
||||
for criterion, cuts in batches.items():
|
||||
batch = train_dl.dataset[cuts]
|
||||
try:
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, _ = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
|
@ -92,7 +92,7 @@ class AishellAsrDataModule:
|
||||
group.add_argument(
|
||||
"--num-buckets",
|
||||
type=int,
|
||||
default=30,
|
||||
default=15,
|
||||
help="The number of buckets for the DynamicBucketingSampler"
|
||||
"(you might want to increase it for larger datasets).",
|
||||
)
|
||||
@ -275,8 +275,7 @@ class AishellAsrDataModule:
|
||||
max_duration=self.args.max_duration,
|
||||
shuffle=self.args.shuffle,
|
||||
num_buckets=self.args.num_buckets,
|
||||
buffer_size=self.args.num_buckets * 2000,
|
||||
shuffle_buffer_size=self.args.num_buckets * 5000,
|
||||
buffer_size=self.args.num_buckets * 5000,
|
||||
drop_last=self.args.drop_last,
|
||||
)
|
||||
else:
|
||||
|
@ -81,6 +81,7 @@ from icefall.utils import (
|
||||
filter_uneven_sized_batch,
|
||||
setup_logger,
|
||||
str2bool,
|
||||
torch_autocast,
|
||||
)
|
||||
|
||||
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
|
||||
@ -514,7 +515,7 @@ def compute_validation_loss(
|
||||
tot_loss = MetricsTracker()
|
||||
|
||||
for batch_idx, batch in enumerate(valid_dl):
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, loss_info = compute_loss(
|
||||
params=params,
|
||||
tokenizer=tokenizer,
|
||||
@ -608,7 +609,7 @@ def train_one_epoch(
|
||||
)
|
||||
|
||||
try:
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, loss_info = compute_loss(
|
||||
params=params,
|
||||
tokenizer=tokenizer,
|
||||
|
@ -95,6 +95,7 @@ from icefall.utils import (
|
||||
get_parameter_groups_with_lrs,
|
||||
setup_logger,
|
||||
str2bool,
|
||||
torch_autocast,
|
||||
)
|
||||
|
||||
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
|
||||
@ -910,7 +911,7 @@ def train_one_epoch(
|
||||
batch_size = len(batch["supervisions"]["text"])
|
||||
|
||||
try:
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, loss_info = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
@ -1302,7 +1303,7 @@ def scan_pessimistic_batches_for_oom(
|
||||
for criterion, cuts in batches.items():
|
||||
batch = train_dl.dataset[cuts]
|
||||
try:
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, _ = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
|
@ -92,6 +92,7 @@ from icefall.utils import (
|
||||
setup_logger,
|
||||
str2bool,
|
||||
tokenize_by_CJK_char,
|
||||
torch_autocast,
|
||||
)
|
||||
|
||||
|
||||
@ -495,7 +496,7 @@ def train_one_epoch(
|
||||
batch_size = len(batch["supervisions"]["text"])
|
||||
|
||||
try:
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, loss_info = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
@ -895,7 +896,7 @@ def scan_pessimistic_batches_for_oom(
|
||||
for criterion, cuts in batches.items():
|
||||
batch = train_dl.dataset[cuts]
|
||||
try:
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, _ = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
|
@ -104,7 +104,7 @@ class AiShell2AsrDataModule:
|
||||
group.add_argument(
|
||||
"--num-buckets",
|
||||
type=int,
|
||||
default=30,
|
||||
default=15,
|
||||
help="The number of buckets for the DynamicBucketingSampler"
|
||||
"(you might want to increase it for larger datasets).",
|
||||
)
|
||||
@ -296,8 +296,7 @@ class AiShell2AsrDataModule:
|
||||
max_duration=self.args.max_duration,
|
||||
shuffle=self.args.shuffle,
|
||||
num_buckets=self.args.num_buckets,
|
||||
buffer_size=self.args.num_buckets * 2000,
|
||||
shuffle_buffer_size=self.args.num_buckets * 5000,
|
||||
buffer_size=self.args.num_buckets * 5000,
|
||||
drop_last=self.args.drop_last,
|
||||
)
|
||||
else:
|
||||
|
@ -90,7 +90,13 @@ from icefall.checkpoint import (
|
||||
from icefall.dist import cleanup_dist, setup_dist
|
||||
from icefall.env import get_env_info
|
||||
from icefall.lexicon import Lexicon
|
||||
from icefall.utils import AttributeDict, MetricsTracker, setup_logger, str2bool
|
||||
from icefall.utils import (
|
||||
AttributeDict,
|
||||
MetricsTracker,
|
||||
setup_logger,
|
||||
str2bool,
|
||||
torch_autocast,
|
||||
)
|
||||
|
||||
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
|
||||
|
||||
@ -734,7 +740,7 @@ def train_one_epoch(
|
||||
batch_size = len(batch["supervisions"]["text"])
|
||||
|
||||
try:
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, loss_info = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
@ -1062,7 +1068,7 @@ def scan_pessimistic_batches_for_oom(
|
||||
for criterion, cuts in batches.items():
|
||||
batch = train_dl.dataset[cuts]
|
||||
try:
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, _ = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
|
@ -83,7 +83,13 @@ from icefall.checkpoint import (
|
||||
from icefall.dist import cleanup_dist, setup_dist
|
||||
from icefall.env import get_env_info
|
||||
from icefall.lexicon import Lexicon
|
||||
from icefall.utils import AttributeDict, MetricsTracker, setup_logger, str2bool
|
||||
from icefall.utils import (
|
||||
AttributeDict,
|
||||
MetricsTracker,
|
||||
setup_logger,
|
||||
str2bool,
|
||||
torch_autocast,
|
||||
)
|
||||
|
||||
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
|
||||
|
||||
@ -727,7 +733,7 @@ def train_one_epoch(
|
||||
batch_size = len(batch["supervisions"]["text"])
|
||||
# print(batch["supervisions"])
|
||||
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, loss_info = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
@ -1034,7 +1040,7 @@ def scan_pessimistic_batches_for_oom(
|
||||
# warmup = 0.0 is so that the derivs for the pruned loss stay zero
|
||||
# (i.e. are not remembered by the decaying-average in adam), because
|
||||
# we want to avoid these params being subject to shrinkage in adam.
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, _ = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
|
@ -79,7 +79,13 @@ from icefall.checkpoint import save_checkpoint_with_global_batch_idx
|
||||
from icefall.dist import cleanup_dist, setup_dist
|
||||
from icefall.env import get_env_info
|
||||
from icefall.lexicon import Lexicon
|
||||
from icefall.utils import AttributeDict, MetricsTracker, setup_logger, str2bool
|
||||
from icefall.utils import (
|
||||
AttributeDict,
|
||||
MetricsTracker,
|
||||
setup_logger,
|
||||
str2bool,
|
||||
torch_autocast,
|
||||
)
|
||||
|
||||
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
|
||||
|
||||
@ -638,7 +644,7 @@ def train_one_epoch(
|
||||
params.batch_idx_train += 1
|
||||
batch_size = len(batch["supervisions"]["text"])
|
||||
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, loss_info = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
@ -912,7 +918,7 @@ def scan_pessimistic_batches_for_oom(
|
||||
# warmup = 0.0 is so that the derivs for the pruned loss stay zero
|
||||
# (i.e. are not remembered by the decaying-average in adam), because
|
||||
# we want to avoid these params being subject to shrinkage in adam.
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, _ = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
|
@ -73,7 +73,13 @@ from icefall.env import get_env_info
|
||||
from icefall.err import raise_grad_scale_is_too_small_error
|
||||
from icefall.hooks import register_inf_check_hooks
|
||||
from icefall.lexicon import Lexicon
|
||||
from icefall.utils import AttributeDict, MetricsTracker, setup_logger, str2bool
|
||||
from icefall.utils import (
|
||||
AttributeDict,
|
||||
MetricsTracker,
|
||||
setup_logger,
|
||||
str2bool,
|
||||
torch_autocast,
|
||||
)
|
||||
|
||||
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
|
||||
|
||||
@ -782,7 +788,7 @@ def train_one_epoch(
|
||||
batch_size = len(batch["supervisions"]["text"])
|
||||
|
||||
try:
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, loss_info = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
@ -1127,7 +1133,7 @@ def scan_pessimistic_batches_for_oom(
|
||||
for criterion, cuts in batches.items():
|
||||
batch = train_dl.dataset[cuts]
|
||||
try:
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, _ = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
|
@ -71,7 +71,13 @@ from icefall.dist import cleanup_dist, setup_dist
|
||||
from icefall.env import get_env_info
|
||||
from icefall.err import raise_grad_scale_is_too_small_error
|
||||
from icefall.hooks import register_inf_check_hooks
|
||||
from icefall.utils import AttributeDict, MetricsTracker, setup_logger, str2bool
|
||||
from icefall.utils import (
|
||||
AttributeDict,
|
||||
MetricsTracker,
|
||||
setup_logger,
|
||||
str2bool,
|
||||
torch_autocast,
|
||||
)
|
||||
|
||||
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
|
||||
|
||||
@ -773,7 +779,7 @@ def train_one_epoch(
|
||||
batch_size = len(batch["supervisions"]["text"])
|
||||
|
||||
try:
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, loss_info = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
@ -1134,7 +1140,7 @@ def scan_pessimistic_batches_for_oom(
|
||||
for criterion, cuts in batches.items():
|
||||
batch = train_dl.dataset[cuts]
|
||||
try:
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, _ = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
|
@ -76,7 +76,13 @@ from icefall.checkpoint import (
|
||||
from icefall.dist import cleanup_dist, setup_dist
|
||||
from icefall.env import get_env_info
|
||||
from icefall.err import raise_grad_scale_is_too_small_error
|
||||
from icefall.utils import AttributeDict, MetricsTracker, setup_logger, str2bool
|
||||
from icefall.utils import (
|
||||
AttributeDict,
|
||||
MetricsTracker,
|
||||
setup_logger,
|
||||
str2bool,
|
||||
torch_autocast,
|
||||
)
|
||||
|
||||
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
|
||||
|
||||
@ -1067,7 +1073,7 @@ def train_one_epoch(
|
||||
batch_size = batch["inputs"].shape[0]
|
||||
|
||||
try:
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, loss_info = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
|
@ -76,7 +76,13 @@ from icefall.checkpoint import (
|
||||
from icefall.dist import cleanup_dist, setup_dist
|
||||
from icefall.env import get_env_info
|
||||
from icefall.err import raise_grad_scale_is_too_small_error
|
||||
from icefall.utils import AttributeDict, MetricsTracker, setup_logger, str2bool
|
||||
from icefall.utils import (
|
||||
AttributeDict,
|
||||
MetricsTracker,
|
||||
setup_logger,
|
||||
str2bool,
|
||||
torch_autocast,
|
||||
)
|
||||
|
||||
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
|
||||
|
||||
@ -1058,7 +1064,7 @@ def train_one_epoch(
|
||||
batch_size = batch["inputs"].shape[0]
|
||||
|
||||
try:
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, loss_info = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
|
@ -74,6 +74,7 @@ from icefall.utils import (
|
||||
get_parameter_groups_with_lrs,
|
||||
setup_logger,
|
||||
str2bool,
|
||||
torch_autocast,
|
||||
)
|
||||
|
||||
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
|
||||
@ -799,7 +800,7 @@ def train_one_epoch(
|
||||
num_samples += batch_size
|
||||
|
||||
try:
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, loss_info = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
@ -1148,7 +1149,7 @@ def scan_pessimistic_batches_for_oom(
|
||||
for criterion, cuts in batches.items():
|
||||
batch = train_dl.dataset[cuts]
|
||||
try:
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, _ = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
|
@ -73,6 +73,8 @@ def compute_fbank_baker_zh(num_jobs: int):
|
||||
f_min=0,
|
||||
f_max=8000,
|
||||
)
|
||||
if not torch.cuda.is_available():
|
||||
config.device = "cpu"
|
||||
|
||||
prefix = "baker_zh"
|
||||
suffix = "jsonl.gz"
|
||||
|
@ -88,6 +88,7 @@ from icefall.utils import (
|
||||
filter_uneven_sized_batch,
|
||||
setup_logger,
|
||||
str2bool,
|
||||
torch_autocast,
|
||||
)
|
||||
|
||||
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
|
||||
@ -825,7 +826,7 @@ def train_one_epoch(
|
||||
batch_size = len(batch["supervisions"]["text"])
|
||||
|
||||
try:
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, loss_info = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
@ -1220,7 +1221,7 @@ def scan_pessimistic_batches_for_oom(
|
||||
for criterion, cuts in batches.items():
|
||||
batch = train_dl.dataset[cuts]
|
||||
try:
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, _ = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
|
@ -90,6 +90,7 @@ from icefall.utils import (
|
||||
filter_uneven_sized_batch,
|
||||
setup_logger,
|
||||
str2bool,
|
||||
torch_autocast,
|
||||
)
|
||||
|
||||
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
|
||||
@ -895,7 +896,7 @@ def train_one_epoch(
|
||||
batch_size = len(batch["supervisions"]["text"])
|
||||
|
||||
try:
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, loss_info = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
@ -1293,7 +1294,7 @@ def scan_pessimistic_batches_for_oom(
|
||||
for criterion, cuts in batches.items():
|
||||
batch = train_dl.dataset[cuts]
|
||||
try:
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, _ = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
|
@ -81,7 +81,13 @@ from icefall.dist import cleanup_dist, setup_dist
|
||||
from icefall.env import get_env_info
|
||||
from icefall.err import raise_grad_scale_is_too_small_error
|
||||
from icefall.hooks import register_inf_check_hooks
|
||||
from icefall.utils import AttributeDict, MetricsTracker, setup_logger, str2bool
|
||||
from icefall.utils import (
|
||||
AttributeDict,
|
||||
MetricsTracker,
|
||||
setup_logger,
|
||||
str2bool,
|
||||
torch_autocast,
|
||||
)
|
||||
|
||||
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
|
||||
|
||||
@ -840,7 +846,7 @@ def train_one_epoch(
|
||||
batch_size = len(batch["supervisions"]["text"])
|
||||
|
||||
try:
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, loss_info = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
@ -1237,7 +1243,7 @@ def scan_pessimistic_batches_for_oom(
|
||||
for criterion, cuts in batches.items():
|
||||
batch = train_dl.dataset[cuts]
|
||||
try:
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, _ = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
|
@ -97,6 +97,7 @@ from icefall.utils import (
|
||||
get_parameter_groups_with_lrs,
|
||||
setup_logger,
|
||||
str2bool,
|
||||
torch_autocast,
|
||||
)
|
||||
|
||||
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
|
||||
@ -969,7 +970,7 @@ def train_one_epoch(
|
||||
batch_size = len(batch["supervisions"]["text"])
|
||||
|
||||
try:
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, loss_info = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
@ -1365,7 +1366,7 @@ def scan_pessimistic_batches_for_oom(
|
||||
for criterion, cuts in batches.items():
|
||||
batch = train_dl.dataset[cuts]
|
||||
try:
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, _ = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
|
@ -97,6 +97,7 @@ from icefall.utils import (
|
||||
get_parameter_groups_with_lrs,
|
||||
setup_logger,
|
||||
str2bool,
|
||||
torch_autocast,
|
||||
)
|
||||
|
||||
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
|
||||
@ -604,7 +605,7 @@ def train_one_epoch(
|
||||
batch_size = len(batch["supervisions"]["text"])
|
||||
|
||||
try:
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, loss_info = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
@ -784,7 +785,7 @@ def scan_pessimistic_batches_for_oom(
|
||||
for criterion, cuts in batches.items():
|
||||
batch = train_dl.dataset[cuts]
|
||||
try:
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, _ = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
|
@ -83,7 +83,13 @@ from icefall.dist import cleanup_dist, setup_dist
|
||||
from icefall.env import get_env_info
|
||||
from icefall.err import raise_grad_scale_is_too_small_error
|
||||
from icefall.hooks import register_inf_check_hooks
|
||||
from icefall.utils import AttributeDict, MetricsTracker, setup_logger, str2bool
|
||||
from icefall.utils import (
|
||||
AttributeDict,
|
||||
MetricsTracker,
|
||||
setup_logger,
|
||||
str2bool,
|
||||
torch_autocast,
|
||||
)
|
||||
|
||||
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
|
||||
LOG_EPS = math.log(1e-10)
|
||||
@ -838,7 +844,7 @@ def train_one_epoch(
|
||||
batch_size = len(batch["supervisions"]["text"])
|
||||
|
||||
try:
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, loss_info = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
@ -1245,7 +1251,7 @@ def scan_pessimistic_batches_for_oom(
|
||||
for criterion, cuts in batches.items():
|
||||
batch = train_dl.dataset[cuts]
|
||||
try:
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, _ = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
|
@ -101,7 +101,7 @@ class GigaSpeechAsrDataModule:
|
||||
group.add_argument(
|
||||
"--num-buckets",
|
||||
type=int,
|
||||
default=30,
|
||||
default=15,
|
||||
help="The number of buckets for the DynamicBucketingSampler"
|
||||
"(you might want to increase it for larger datasets).",
|
||||
)
|
||||
@ -294,8 +294,7 @@ class GigaSpeechAsrDataModule:
|
||||
max_duration=self.args.max_duration,
|
||||
shuffle=self.args.shuffle,
|
||||
num_buckets=self.args.num_buckets,
|
||||
buffer_size=self.args.num_buckets * 2000,
|
||||
shuffle_buffer_size=self.args.num_buckets * 5000,
|
||||
buffer_size=self.args.num_buckets * 5000,
|
||||
drop_last=True,
|
||||
)
|
||||
else:
|
||||
|
@ -77,7 +77,13 @@ from icefall.checkpoint import (
|
||||
)
|
||||
from icefall.dist import cleanup_dist, setup_dist
|
||||
from icefall.env import get_env_info
|
||||
from icefall.utils import AttributeDict, MetricsTracker, setup_logger, str2bool
|
||||
from icefall.utils import (
|
||||
AttributeDict,
|
||||
MetricsTracker,
|
||||
setup_logger,
|
||||
str2bool,
|
||||
torch_autocast,
|
||||
)
|
||||
|
||||
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
|
||||
|
||||
@ -675,7 +681,7 @@ def train_one_epoch(
|
||||
params.batch_idx_train += 1
|
||||
batch_size = len(batch["supervisions"]["text"])
|
||||
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, loss_info = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
@ -944,7 +950,7 @@ def scan_pessimistic_batches_for_oom(
|
||||
# warmup = 0.0 is so that the derivs for the pruned loss stay zero
|
||||
# (i.e. are not remembered by the decaying-average in adam), because
|
||||
# we want to avoid these params being subject to shrinkage in adam.
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, _ = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
|
@ -105,7 +105,7 @@ class GigaSpeechAsrDataModule:
|
||||
group.add_argument(
|
||||
"--num-buckets",
|
||||
type=int,
|
||||
default=100,
|
||||
default=15,
|
||||
help="The number of buckets for the DynamicBucketingSampler"
|
||||
"(you might want to increase it for larger datasets).",
|
||||
)
|
||||
@ -311,8 +311,7 @@ class GigaSpeechAsrDataModule:
|
||||
max_duration=self.args.max_duration,
|
||||
shuffle=self.args.shuffle,
|
||||
num_buckets=self.args.num_buckets,
|
||||
buffer_size=self.args.num_buckets * 2000,
|
||||
shuffle_buffer_size=self.args.num_buckets * 5000,
|
||||
buffer_size=self.args.num_buckets * 5000,
|
||||
drop_last=self.args.drop_last,
|
||||
)
|
||||
else:
|
||||
@ -369,7 +368,7 @@ class GigaSpeechAsrDataModule:
|
||||
cuts_valid,
|
||||
max_duration=self.args.max_duration,
|
||||
num_buckets=self.args.num_buckets,
|
||||
buffer_size=self.args.num_buckets * 2000,
|
||||
buffer_size=self.args.num_buckets * 5000,
|
||||
shuffle=False,
|
||||
)
|
||||
logging.info("About to create dev dataloader")
|
||||
|
@ -97,6 +97,7 @@ from icefall.utils import (
|
||||
get_parameter_groups_with_lrs,
|
||||
setup_logger,
|
||||
str2bool,
|
||||
torch_autocast,
|
||||
)
|
||||
|
||||
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
|
||||
@ -958,7 +959,7 @@ def train_one_epoch(
|
||||
batch_size = len(batch["supervisions"]["text"])
|
||||
|
||||
try:
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, loss_info = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
@ -1317,7 +1318,7 @@ def scan_pessimistic_batches_for_oom(
|
||||
for criterion, cuts in batches.items():
|
||||
batch = train_dl.dataset[cuts]
|
||||
try:
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, _ = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
|
@ -1,477 +0,0 @@
|
||||
# Copyright 2021 Piotr Żelasko
|
||||
# Copyright 2024 Xiaomi Corporation (Author: Wei Kang)
|
||||
#
|
||||
# See ../../../../LICENSE for clarification regarding multiple authors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
import argparse
|
||||
import glob
|
||||
import inspect
|
||||
import logging
|
||||
import re
|
||||
from functools import lru_cache
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
import lhotse
|
||||
import torch
|
||||
from lhotse import CutSet, Fbank, FbankConfig, load_manifest, load_manifest_lazy
|
||||
from lhotse.dataset import (
|
||||
CutConcatenate,
|
||||
CutMix,
|
||||
DynamicBucketingSampler,
|
||||
K2SpeechRecognitionDataset,
|
||||
PrecomputedFeatures,
|
||||
SimpleCutSampler,
|
||||
SpecAugment,
|
||||
)
|
||||
from lhotse.dataset.input_strategies import AudioSamples, OnTheFlyFeatures
|
||||
from lhotse.utils import fix_random_seed
|
||||
from torch.utils.data import DataLoader
|
||||
|
||||
from icefall.utils import str2bool
|
||||
|
||||
|
||||
class _SeedWorkers:
|
||||
def __init__(self, seed: int):
|
||||
self.seed = seed
|
||||
|
||||
def __call__(self, worker_id: int):
|
||||
fix_random_seed(self.seed + worker_id)
|
||||
|
||||
|
||||
class GigaSpeechAsrDataModule:
|
||||
"""
|
||||
DataModule for k2 ASR experiments.
|
||||
It assumes there is always one train and valid dataloader,
|
||||
but there can be multiple test dataloaders (e.g. LibriSpeech test-clean
|
||||
and test-other).
|
||||
|
||||
It contains all the common data pipeline modules used in ASR
|
||||
experiments, e.g.:
|
||||
- dynamic batch size,
|
||||
- bucketing samplers,
|
||||
- cut concatenation,
|
||||
- augmentation,
|
||||
- on-the-fly feature extraction
|
||||
|
||||
This class should be derived for specific corpora used in ASR tasks.
|
||||
"""
|
||||
|
||||
def __init__(self, args: argparse.Namespace):
|
||||
self.args = args
|
||||
|
||||
@classmethod
|
||||
def add_arguments(cls, parser: argparse.ArgumentParser):
|
||||
group = parser.add_argument_group(
|
||||
title="ASR data related options",
|
||||
description="These options are used for the preparation of "
|
||||
"PyTorch DataLoaders from Lhotse CutSet's -- they control the "
|
||||
"effective batch sizes, sampling strategies, applied data "
|
||||
"augmentations, etc.",
|
||||
)
|
||||
group.add_argument(
|
||||
"--manifest-dir",
|
||||
type=Path,
|
||||
default=Path("data/fbank"),
|
||||
help="Path to directory with train/valid/test cuts.",
|
||||
)
|
||||
group.add_argument(
|
||||
"--max-duration",
|
||||
type=int,
|
||||
default=200.0,
|
||||
help="Maximum pooled recordings duration (seconds) in a "
|
||||
"single batch. You can reduce it if it causes CUDA OOM.",
|
||||
)
|
||||
group.add_argument(
|
||||
"--bucketing-sampler",
|
||||
type=str2bool,
|
||||
default=True,
|
||||
help="When enabled, the batches will come from buckets of "
|
||||
"similar duration (saves padding frames).",
|
||||
)
|
||||
group.add_argument(
|
||||
"--num-buckets",
|
||||
type=int,
|
||||
default=30,
|
||||
help="The number of buckets for the DynamicBucketingSampler"
|
||||
"(you might want to increase it for larger datasets).",
|
||||
)
|
||||
group.add_argument(
|
||||
"--concatenate-cuts",
|
||||
type=str2bool,
|
||||
default=False,
|
||||
help="When enabled, utterances (cuts) will be concatenated "
|
||||
"to minimize the amount of padding.",
|
||||
)
|
||||
group.add_argument(
|
||||
"--duration-factor",
|
||||
type=float,
|
||||
default=1.0,
|
||||
help="Determines the maximum duration of a concatenated cut "
|
||||
"relative to the duration of the longest cut in a batch.",
|
||||
)
|
||||
group.add_argument(
|
||||
"--gap",
|
||||
type=float,
|
||||
default=1.0,
|
||||
help="The amount of padding (in seconds) inserted between "
|
||||
"concatenated cuts. This padding is filled with noise when "
|
||||
"noise augmentation is used.",
|
||||
)
|
||||
group.add_argument(
|
||||
"--on-the-fly-feats",
|
||||
type=str2bool,
|
||||
default=False,
|
||||
help="When enabled, use on-the-fly cut mixing and feature "
|
||||
"extraction. Will drop existing precomputed feature manifests "
|
||||
"if available.",
|
||||
)
|
||||
group.add_argument(
|
||||
"--shuffle",
|
||||
type=str2bool,
|
||||
default=True,
|
||||
help="When enabled (=default), the examples will be "
|
||||
"shuffled for each epoch.",
|
||||
)
|
||||
group.add_argument(
|
||||
"--drop-last",
|
||||
type=str2bool,
|
||||
default=True,
|
||||
help="Whether to drop last batch. Used by sampler.",
|
||||
)
|
||||
group.add_argument(
|
||||
"--return-cuts",
|
||||
type=str2bool,
|
||||
default=True,
|
||||
help="When enabled, each batch will have the "
|
||||
"field: batch['supervisions']['cut'] with the cuts that "
|
||||
"were used to construct it.",
|
||||
)
|
||||
|
||||
group.add_argument(
|
||||
"--num-workers",
|
||||
type=int,
|
||||
default=2,
|
||||
help="The number of training dataloader workers that "
|
||||
"collect the batches.",
|
||||
)
|
||||
|
||||
group.add_argument(
|
||||
"--enable-spec-aug",
|
||||
type=str2bool,
|
||||
default=True,
|
||||
help="When enabled, use SpecAugment for training dataset.",
|
||||
)
|
||||
|
||||
group.add_argument(
|
||||
"--spec-aug-time-warp-factor",
|
||||
type=int,
|
||||
default=80,
|
||||
help="Used only when --enable-spec-aug is True. "
|
||||
"It specifies the factor for time warping in SpecAugment. "
|
||||
"Larger values mean more warping. "
|
||||
"A value less than 1 means to disable time warp.",
|
||||
)
|
||||
|
||||
group.add_argument(
|
||||
"--enable-musan",
|
||||
type=str2bool,
|
||||
default=True,
|
||||
help="When enabled, select noise from MUSAN and mix it"
|
||||
"with training dataset. ",
|
||||
)
|
||||
|
||||
group.add_argument(
|
||||
"--input-strategy",
|
||||
type=str,
|
||||
default="PrecomputedFeatures",
|
||||
help="AudioSamples or PrecomputedFeatures",
|
||||
)
|
||||
|
||||
# GigaSpeech specific arguments
|
||||
group.add_argument(
|
||||
"--subset",
|
||||
type=str,
|
||||
default="XL",
|
||||
help="Select the GigaSpeech subset (XS|S|M|L|XL)",
|
||||
)
|
||||
group.add_argument(
|
||||
"--small-dev",
|
||||
type=str2bool,
|
||||
default=False,
|
||||
help="Should we use only 1000 utterances for dev (speeds up training)",
|
||||
)
|
||||
|
||||
def train_dataloaders(
|
||||
self,
|
||||
cuts_train: CutSet,
|
||||
sampler_state_dict: Optional[Dict[str, Any]] = None,
|
||||
) -> DataLoader:
|
||||
"""
|
||||
Args:
|
||||
cuts_train:
|
||||
CutSet for training.
|
||||
sampler_state_dict:
|
||||
The state dict for the training sampler.
|
||||
"""
|
||||
transforms = []
|
||||
if self.args.enable_musan:
|
||||
logging.info("Enable MUSAN")
|
||||
logging.info("About to get Musan cuts")
|
||||
cuts_musan = load_manifest(self.args.manifest_dir / "musan_cuts.jsonl.gz")
|
||||
transforms.append(
|
||||
CutMix(cuts=cuts_musan, p=0.5, snr=(10, 20), preserve_id=True)
|
||||
)
|
||||
else:
|
||||
logging.info("Disable MUSAN")
|
||||
|
||||
if self.args.concatenate_cuts:
|
||||
logging.info(
|
||||
f"Using cut concatenation with duration factor "
|
||||
f"{self.args.duration_factor} and gap {self.args.gap}."
|
||||
)
|
||||
# Cut concatenation should be the first transform in the list,
|
||||
# so that if we e.g. mix noise in, it will fill the gaps between
|
||||
# different utterances.
|
||||
transforms = [
|
||||
CutConcatenate(
|
||||
duration_factor=self.args.duration_factor, gap=self.args.gap
|
||||
)
|
||||
] + transforms
|
||||
|
||||
input_transforms = []
|
||||
if self.args.enable_spec_aug:
|
||||
logging.info("Enable SpecAugment")
|
||||
logging.info(f"Time warp factor: {self.args.spec_aug_time_warp_factor}")
|
||||
# Set the value of num_frame_masks according to Lhotse's version.
|
||||
# In different Lhotse's versions, the default of num_frame_masks is
|
||||
# different.
|
||||
num_frame_masks = 10
|
||||
num_frame_masks_parameter = inspect.signature(
|
||||
SpecAugment.__init__
|
||||
).parameters["num_frame_masks"]
|
||||
if num_frame_masks_parameter.default == 1:
|
||||
num_frame_masks = 2
|
||||
logging.info(f"Num frame mask: {num_frame_masks}")
|
||||
input_transforms.append(
|
||||
SpecAugment(
|
||||
time_warp_factor=self.args.spec_aug_time_warp_factor,
|
||||
num_frame_masks=num_frame_masks,
|
||||
features_mask_size=27,
|
||||
num_feature_masks=2,
|
||||
frames_mask_size=100,
|
||||
)
|
||||
)
|
||||
else:
|
||||
logging.info("Disable SpecAugment")
|
||||
|
||||
logging.info("About to create train dataset")
|
||||
train = K2SpeechRecognitionDataset(
|
||||
input_strategy=eval(self.args.input_strategy)(),
|
||||
cut_transforms=transforms,
|
||||
input_transforms=input_transforms,
|
||||
return_cuts=self.args.return_cuts,
|
||||
)
|
||||
|
||||
if self.args.on_the_fly_feats:
|
||||
# NOTE: the PerturbSpeed transform should be added only if we
|
||||
# remove it from data prep stage.
|
||||
# Add on-the-fly speed perturbation; since originally it would
|
||||
# have increased epoch size by 3, we will apply prob 2/3 and use
|
||||
# 3x more epochs.
|
||||
# Speed perturbation probably should come first before
|
||||
# concatenation, but in principle the transforms order doesn't have
|
||||
# to be strict (e.g. could be randomized)
|
||||
# transforms = [PerturbSpeed(factors=[0.9, 1.1], p=2/3)] + transforms # noqa
|
||||
# Drop feats to be on the safe side.
|
||||
train = K2SpeechRecognitionDataset(
|
||||
cut_transforms=transforms,
|
||||
input_strategy=OnTheFlyFeatures(Fbank(FbankConfig(num_mel_bins=80))),
|
||||
input_transforms=input_transforms,
|
||||
return_cuts=self.args.return_cuts,
|
||||
)
|
||||
|
||||
if self.args.bucketing_sampler:
|
||||
logging.info("Using DynamicBucketingSampler.")
|
||||
train_sampler = DynamicBucketingSampler(
|
||||
cuts_train,
|
||||
max_duration=self.args.max_duration,
|
||||
shuffle=self.args.shuffle,
|
||||
num_buckets=self.args.num_buckets,
|
||||
drop_last=self.args.drop_last,
|
||||
buffer_size=self.args.num_buckets * 2000,
|
||||
shuffle_buffer_size=self.args.num_buckets * 5000,
|
||||
)
|
||||
else:
|
||||
logging.info("Using SimpleCutSampler.")
|
||||
train_sampler = SimpleCutSampler(
|
||||
cuts_train,
|
||||
max_duration=self.args.max_duration,
|
||||
shuffle=self.args.shuffle,
|
||||
)
|
||||
logging.info("About to create train dataloader")
|
||||
|
||||
if sampler_state_dict is not None:
|
||||
logging.info("Loading sampler state dict")
|
||||
train_sampler.load_state_dict(sampler_state_dict)
|
||||
|
||||
# 'seed' is derived from the current random state, which will have
|
||||
# previously been set in the main process.
|
||||
seed = torch.randint(0, 100000, ()).item()
|
||||
worker_init_fn = _SeedWorkers(seed)
|
||||
|
||||
train_dl = DataLoader(
|
||||
train,
|
||||
sampler=train_sampler,
|
||||
batch_size=None,
|
||||
num_workers=self.args.num_workers,
|
||||
persistent_workers=False,
|
||||
worker_init_fn=worker_init_fn,
|
||||
)
|
||||
|
||||
return train_dl
|
||||
|
||||
def valid_dataloaders(self, cuts_valid: CutSet) -> DataLoader:
|
||||
transforms = []
|
||||
if self.args.concatenate_cuts:
|
||||
transforms = [
|
||||
CutConcatenate(
|
||||
duration_factor=self.args.duration_factor, gap=self.args.gap
|
||||
)
|
||||
] + transforms
|
||||
|
||||
logging.info("About to create dev dataset")
|
||||
if self.args.on_the_fly_feats:
|
||||
validate = K2SpeechRecognitionDataset(
|
||||
cut_transforms=transforms,
|
||||
input_strategy=OnTheFlyFeatures(Fbank(FbankConfig(num_mel_bins=80))),
|
||||
return_cuts=self.args.return_cuts,
|
||||
)
|
||||
else:
|
||||
validate = K2SpeechRecognitionDataset(
|
||||
cut_transforms=transforms,
|
||||
return_cuts=self.args.return_cuts,
|
||||
)
|
||||
valid_sampler = DynamicBucketingSampler(
|
||||
cuts_valid,
|
||||
max_duration=self.args.max_duration,
|
||||
num_buckets=self.args.num_buckets,
|
||||
buffer_size=self.args.num_buckets * 2000,
|
||||
shuffle=False,
|
||||
)
|
||||
logging.info("About to create dev dataloader")
|
||||
valid_dl = DataLoader(
|
||||
validate,
|
||||
sampler=valid_sampler,
|
||||
batch_size=None,
|
||||
num_workers=2,
|
||||
persistent_workers=False,
|
||||
)
|
||||
|
||||
return valid_dl
|
||||
|
||||
def test_dataloaders(self, cuts: CutSet) -> DataLoader:
|
||||
logging.debug("About to create test dataset")
|
||||
test = K2SpeechRecognitionDataset(
|
||||
input_strategy=OnTheFlyFeatures(Fbank(FbankConfig(num_mel_bins=80)))
|
||||
if self.args.on_the_fly_feats
|
||||
else eval(self.args.input_strategy)(),
|
||||
return_cuts=self.args.return_cuts,
|
||||
)
|
||||
sampler = DynamicBucketingSampler(
|
||||
cuts,
|
||||
max_duration=self.args.max_duration,
|
||||
shuffle=False,
|
||||
)
|
||||
logging.debug("About to create test dataloader")
|
||||
test_dl = DataLoader(
|
||||
test,
|
||||
batch_size=None,
|
||||
sampler=sampler,
|
||||
num_workers=self.args.num_workers,
|
||||
)
|
||||
return test_dl
|
||||
|
||||
@lru_cache()
|
||||
def train_cuts(self) -> CutSet:
|
||||
logging.info(f"About to get train {self.args.subset} cuts")
|
||||
if self.args.subset == "XL":
|
||||
filenames = glob.glob(
|
||||
f"{self.args.manifest_dir}/XL_split/gigaspeech_cuts_XL.*.jsonl.gz"
|
||||
)
|
||||
pattern = re.compile(r"gigaspeech_cuts_XL.([0-9]+).jsonl.gz")
|
||||
idx_filenames = ((int(pattern.search(f).group(1)), f) for f in filenames)
|
||||
idx_filenames = sorted(idx_filenames, key=lambda x: x[0])
|
||||
sorted_filenames = [f[1] for f in idx_filenames]
|
||||
logging.info(
|
||||
f"Loading GigaSpeech {len(sorted_filenames)} splits in lazy mode"
|
||||
)
|
||||
|
||||
cuts_train = lhotse.combine(
|
||||
lhotse.load_manifest_lazy(p) for p in sorted_filenames
|
||||
)
|
||||
else:
|
||||
path = (
|
||||
self.args.manifest_dir / f"gigaspeech_cuts_{self.args.subset}.jsonl.gz"
|
||||
)
|
||||
cuts_train = CutSet.from_jsonl_lazy(path)
|
||||
return cuts_train
|
||||
|
||||
@lru_cache()
|
||||
def dev_cuts(self) -> CutSet:
|
||||
logging.info("About to get dev cuts")
|
||||
cuts_valid = load_manifest_lazy(
|
||||
self.args.manifest_dir / "gigaspeech_cuts_DEV.jsonl.gz"
|
||||
)
|
||||
if self.args.small_dev:
|
||||
return cuts_valid.subset(first=1000)
|
||||
else:
|
||||
return cuts_valid
|
||||
|
||||
@lru_cache()
|
||||
def test_cuts(self) -> CutSet:
|
||||
logging.info("About to get test cuts")
|
||||
return load_manifest_lazy(
|
||||
self.args.manifest_dir / "gigaspeech_cuts_TEST.jsonl.gz"
|
||||
)
|
||||
|
||||
@lru_cache()
|
||||
def fsc_train_cuts(self) -> CutSet:
|
||||
logging.info("About to get fluent speech commands train cuts")
|
||||
return load_manifest_lazy(
|
||||
self.args.manifest_dir / "fluent_speech_commands_cuts_train.jsonl.gz"
|
||||
)
|
||||
|
||||
@lru_cache()
|
||||
def fsc_valid_cuts(self) -> CutSet:
|
||||
logging.info("About to get fluent speech commands valid cuts")
|
||||
return load_manifest_lazy(
|
||||
self.args.manifest_dir / "fluent_speech_commands_cuts_valid.jsonl.gz"
|
||||
)
|
||||
|
||||
@lru_cache()
|
||||
def fsc_test_small_cuts(self) -> CutSet:
|
||||
logging.info("About to get fluent speech commands small test cuts")
|
||||
return load_manifest_lazy(
|
||||
self.args.manifest_dir / "fluent_speech_commands_cuts_small.jsonl.gz"
|
||||
)
|
||||
|
||||
@lru_cache()
|
||||
def fsc_test_large_cuts(self) -> CutSet:
|
||||
logging.info("About to get fluent speech commands large test cuts")
|
||||
return load_manifest_lazy(
|
||||
self.args.manifest_dir / "fluent_speech_commands_cuts_large.jsonl.gz"
|
||||
)
|
1
egs/gigaspeech/KWS/zipformer/asr_datamodule.py
Symbolic link
1
egs/gigaspeech/KWS/zipformer/asr_datamodule.py
Symbolic link
@ -0,0 +1 @@
|
||||
../../ASR/zipformer/asr_datamodule.py
|
@ -97,6 +97,7 @@ from icefall.utils import (
|
||||
get_parameter_groups_with_lrs,
|
||||
setup_logger,
|
||||
str2bool,
|
||||
torch_autocast,
|
||||
)
|
||||
|
||||
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
|
||||
@ -961,7 +962,7 @@ def train_one_epoch(
|
||||
batch_size = len(batch["supervisions"]["text"])
|
||||
|
||||
try:
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, loss_info = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
@ -1320,7 +1321,7 @@ def scan_pessimistic_batches_for_oom(
|
||||
for criterion, cuts in batches.items():
|
||||
batch = train_dl.dataset[cuts]
|
||||
try:
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, _ = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
|
@ -77,7 +77,13 @@ from icefall.dist import cleanup_dist, setup_dist
|
||||
from icefall.env import get_env_info
|
||||
from icefall.err import raise_grad_scale_is_too_small_error
|
||||
from icefall.hooks import register_inf_check_hooks
|
||||
from icefall.utils import AttributeDict, MetricsTracker, setup_logger, str2bool
|
||||
from icefall.utils import (
|
||||
AttributeDict,
|
||||
MetricsTracker,
|
||||
setup_logger,
|
||||
str2bool,
|
||||
torch_autocast,
|
||||
)
|
||||
|
||||
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
|
||||
|
||||
@ -805,7 +811,7 @@ def train_one_epoch(
|
||||
batch_size = len(batch["supervisions"]["text"])
|
||||
|
||||
try:
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, loss_info = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
@ -1196,7 +1202,7 @@ def scan_pessimistic_batches_for_oom(
|
||||
for criterion, cuts in batches.items():
|
||||
batch = train_dl.dataset[cuts]
|
||||
try:
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, _ = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
|
@ -92,6 +92,7 @@ from icefall.utils import (
|
||||
get_parameter_groups_with_lrs,
|
||||
setup_logger,
|
||||
str2bool,
|
||||
torch_autocast,
|
||||
)
|
||||
|
||||
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
|
||||
@ -942,7 +943,7 @@ def train_one_epoch(
|
||||
batch_size = len(batch["supervisions"]["text"])
|
||||
|
||||
try:
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, loss_info = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
@ -1333,7 +1334,7 @@ def scan_pessimistic_batches_for_oom(
|
||||
for criterion, cuts in batches.items():
|
||||
batch = train_dl.dataset[cuts]
|
||||
try:
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, _ = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
|
@ -667,7 +667,9 @@ def main():
|
||||
H = None
|
||||
bpe_model = None
|
||||
HLG = k2.Fsa.from_dict(
|
||||
torch.load(f"{params.lang_dir}/HLG.pt", map_location=device)
|
||||
torch.load(
|
||||
f"{params.lang_dir}/HLG.pt", map_location=device, weights_only=False
|
||||
)
|
||||
)
|
||||
assert HLG.requires_grad is False
|
||||
|
||||
@ -707,7 +709,9 @@ def main():
|
||||
torch.save(G.as_dict(), params.lm_dir / "G_4_gram.pt")
|
||||
else:
|
||||
logging.info("Loading pre-compiled G_4_gram.pt")
|
||||
d = torch.load(params.lm_dir / "G_4_gram.pt", map_location=device)
|
||||
d = torch.load(
|
||||
params.lm_dir / "G_4_gram.pt", map_location=device, weights_only=False
|
||||
)
|
||||
G = k2.Fsa.from_dict(d)
|
||||
|
||||
if params.method in [
|
||||
|
@ -271,7 +271,7 @@ def main():
|
||||
use_feat_batchnorm=params.use_feat_batchnorm,
|
||||
)
|
||||
|
||||
checkpoint = torch.load(args.checkpoint, map_location="cpu")
|
||||
checkpoint = torch.load(args.checkpoint, map_location="cpu", weights_only=False)
|
||||
model.load_state_dict(checkpoint["model"], strict=False)
|
||||
model.to(device)
|
||||
model.eval()
|
||||
@ -351,7 +351,9 @@ def main():
|
||||
"attention-decoder",
|
||||
]:
|
||||
logging.info(f"Loading HLG from {params.HLG}")
|
||||
HLG = k2.Fsa.from_dict(torch.load(params.HLG, map_location="cpu"))
|
||||
HLG = k2.Fsa.from_dict(
|
||||
torch.load(params.HLG, map_location="cpu", weights_only=False)
|
||||
)
|
||||
HLG = HLG.to(device)
|
||||
if not hasattr(HLG, "lm_scores"):
|
||||
# For whole-lattice-rescoring and attention-decoder
|
||||
@ -362,7 +364,9 @@ def main():
|
||||
"attention-decoder",
|
||||
]:
|
||||
logging.info(f"Loading G from {params.G}")
|
||||
G = k2.Fsa.from_dict(torch.load(params.G, map_location="cpu"))
|
||||
G = k2.Fsa.from_dict(
|
||||
torch.load(params.G, map_location="cpu", weights_only=False)
|
||||
)
|
||||
# Add epsilon self-loops to G as we will compose
|
||||
# it with the whole lattice later
|
||||
G = G.to(device)
|
||||
|
@ -774,7 +774,9 @@ def main():
|
||||
H = None
|
||||
bpe_model = None
|
||||
HLG = k2.Fsa.from_dict(
|
||||
torch.load(f"{params.lang_dir}/HLG.pt", map_location=device)
|
||||
torch.load(
|
||||
f"{params.lang_dir}/HLG.pt", map_location=device, weights_only=False
|
||||
)
|
||||
)
|
||||
assert HLG.requires_grad is False
|
||||
|
||||
@ -814,7 +816,9 @@ def main():
|
||||
torch.save(G.as_dict(), params.lm_dir / "G_4_gram.pt")
|
||||
else:
|
||||
logging.info("Loading pre-compiled G_4_gram.pt")
|
||||
d = torch.load(params.lm_dir / "G_4_gram.pt", map_location=device)
|
||||
d = torch.load(
|
||||
params.lm_dir / "G_4_gram.pt", map_location=device, weights_only=False
|
||||
)
|
||||
G = k2.Fsa.from_dict(d)
|
||||
|
||||
if params.method in [
|
||||
|
@ -65,7 +65,6 @@ from lhotse.dataset.sampling.base import CutSampler
|
||||
from lhotse.utils import fix_random_seed
|
||||
from optim import Eden, Eve
|
||||
from torch import Tensor
|
||||
from torch.cuda.amp import GradScaler
|
||||
from torch.nn.parallel import DistributedDataParallel as DDP
|
||||
from torch.utils.tensorboard import SummaryWriter
|
||||
|
||||
@ -84,9 +83,11 @@ from icefall.lexicon import Lexicon
|
||||
from icefall.utils import (
|
||||
AttributeDict,
|
||||
MetricsTracker,
|
||||
create_grad_scaler,
|
||||
encode_supervisions,
|
||||
setup_logger,
|
||||
str2bool,
|
||||
torch_autocast,
|
||||
)
|
||||
|
||||
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
|
||||
@ -420,7 +421,7 @@ def save_checkpoint(
|
||||
optimizer: Optional[torch.optim.Optimizer] = None,
|
||||
scheduler: Optional[LRSchedulerType] = None,
|
||||
sampler: Optional[CutSampler] = None,
|
||||
scaler: Optional[GradScaler] = None,
|
||||
scaler: Optional["GradScaler"] = None,
|
||||
rank: int = 0,
|
||||
) -> None:
|
||||
"""Save model, optimizer, scheduler and training stats to file.
|
||||
@ -629,7 +630,7 @@ def train_one_epoch(
|
||||
scheduler: LRSchedulerType,
|
||||
train_dl: torch.utils.data.DataLoader,
|
||||
valid_dl: torch.utils.data.DataLoader,
|
||||
scaler: GradScaler,
|
||||
scaler: "GradScaler",
|
||||
model_avg: Optional[nn.Module] = None,
|
||||
tb_writer: Optional[SummaryWriter] = None,
|
||||
world_size: int = 1,
|
||||
@ -676,7 +677,7 @@ def train_one_epoch(
|
||||
params.batch_idx_train += 1
|
||||
batch_size = len(batch["supervisions"]["text"])
|
||||
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, loss_info = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
@ -965,7 +966,7 @@ def run(rank, world_size, args):
|
||||
params=params,
|
||||
)
|
||||
|
||||
scaler = GradScaler(enabled=params.use_fp16)
|
||||
scaler = create_grad_scaler(enabled=params.use_fp16)
|
||||
if checkpoints and "grad_scaler" in checkpoints:
|
||||
logging.info("Loading grad scaler state dict")
|
||||
scaler.load_state_dict(checkpoints["grad_scaler"])
|
||||
@ -1036,7 +1037,7 @@ def scan_pessimistic_batches_for_oom(
|
||||
# warmup = 0.0 is so that the derivs for the pruned loss stay zero
|
||||
# (i.e. are not remembered by the decaying-average in adam), because
|
||||
# we want to avoid these params being subject to shrinkage in adam.
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, _ = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
|
@ -868,7 +868,9 @@ def main():
|
||||
H = None
|
||||
bpe_model = None
|
||||
HLG = k2.Fsa.from_dict(
|
||||
torch.load(f"{params.lang_dir}/HLG.pt", map_location=device)
|
||||
torch.load(
|
||||
f"{params.lang_dir}/HLG.pt", map_location=device, weights_only=False
|
||||
)
|
||||
)
|
||||
assert HLG.requires_grad is False
|
||||
|
||||
@ -907,7 +909,9 @@ def main():
|
||||
torch.save(G.as_dict(), params.lm_dir / "G_4_gram.pt")
|
||||
else:
|
||||
logging.info("Loading pre-compiled G_4_gram.pt")
|
||||
d = torch.load(params.lm_dir / "G_4_gram.pt", map_location=device)
|
||||
d = torch.load(
|
||||
params.lm_dir / "G_4_gram.pt", map_location=device, weights_only=False
|
||||
)
|
||||
G = k2.Fsa.from_dict(d)
|
||||
|
||||
if params.decoding_method == "whole-lattice-rescoring":
|
||||
|
@ -334,7 +334,9 @@ def main():
|
||||
"whole-lattice-rescoring",
|
||||
]:
|
||||
logging.info(f"Loading HLG from {params.HLG}")
|
||||
HLG = k2.Fsa.from_dict(torch.load(params.HLG, map_location="cpu"))
|
||||
HLG = k2.Fsa.from_dict(
|
||||
torch.load(params.HLG, map_location="cpu", weights_only=False)
|
||||
)
|
||||
HLG = HLG.to(device)
|
||||
if not hasattr(HLG, "lm_scores"):
|
||||
# For whole-lattice-rescoring and attention-decoder
|
||||
@ -345,7 +347,9 @@ def main():
|
||||
"whole-lattice-rescoring",
|
||||
]:
|
||||
logging.info(f"Loading G from {params.G}")
|
||||
G = k2.Fsa.from_dict(torch.load(params.G, map_location="cpu"))
|
||||
G = k2.Fsa.from_dict(
|
||||
torch.load(params.G, map_location="cpu", weights_only=False)
|
||||
)
|
||||
G = G.to(device)
|
||||
if params.method == "whole-lattice-rescoring":
|
||||
# Add epsilon self-loops to G as we will compose
|
||||
|
@ -290,7 +290,7 @@ def main():
|
||||
num_param = sum([p.numel() for p in model.parameters()])
|
||||
logging.info(f"Number of model parameters: {num_param}")
|
||||
|
||||
checkpoint = torch.load(args.checkpoint, map_location="cpu")
|
||||
checkpoint = torch.load(args.checkpoint, map_location="cpu", weights_only=False)
|
||||
model.load_state_dict(checkpoint["model"], strict=False)
|
||||
model.to(device)
|
||||
model.eval()
|
||||
@ -386,7 +386,9 @@ def main():
|
||||
"whole-lattice-rescoring",
|
||||
]:
|
||||
logging.info(f"Loading HLG from {params.HLG}")
|
||||
HLG = k2.Fsa.from_dict(torch.load(params.HLG, map_location="cpu"))
|
||||
HLG = k2.Fsa.from_dict(
|
||||
torch.load(params.HLG, map_location="cpu", weights_only=False)
|
||||
)
|
||||
HLG = HLG.to(device)
|
||||
if not hasattr(HLG, "lm_scores"):
|
||||
# For whole-lattice-rescoring and attention-decoder
|
||||
@ -397,7 +399,9 @@ def main():
|
||||
"whole-lattice-rescoring",
|
||||
]:
|
||||
logging.info(f"Loading G from {params.G}")
|
||||
G = k2.Fsa.from_dict(torch.load(params.G, map_location="cpu"))
|
||||
G = k2.Fsa.from_dict(
|
||||
torch.load(params.G, map_location="cpu", weights_only=False)
|
||||
)
|
||||
G = G.to(device)
|
||||
if params.method == "whole-lattice-rescoring":
|
||||
# Add epsilon self-loops to G as we will compose
|
||||
|
@ -76,7 +76,6 @@ from lhotse.utils import fix_random_seed
|
||||
from model import CTCModel
|
||||
from optim import Eden, Eve
|
||||
from torch import Tensor
|
||||
from torch.cuda.amp import GradScaler
|
||||
from torch.nn.parallel import DistributedDataParallel as DDP
|
||||
from torch.utils.tensorboard import SummaryWriter
|
||||
|
||||
@ -95,9 +94,11 @@ from icefall.lexicon import Lexicon
|
||||
from icefall.utils import (
|
||||
AttributeDict,
|
||||
MetricsTracker,
|
||||
create_grad_scaler,
|
||||
encode_supervisions,
|
||||
setup_logger,
|
||||
str2bool,
|
||||
torch_autocast,
|
||||
)
|
||||
|
||||
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
|
||||
@ -493,7 +494,7 @@ def save_checkpoint(
|
||||
optimizer: Optional[torch.optim.Optimizer] = None,
|
||||
scheduler: Optional[LRSchedulerType] = None,
|
||||
sampler: Optional[CutSampler] = None,
|
||||
scaler: Optional[GradScaler] = None,
|
||||
scaler: Optional["GradScaler"] = None,
|
||||
rank: int = 0,
|
||||
) -> None:
|
||||
"""Save model, optimizer, scheduler and training stats to file.
|
||||
@ -694,7 +695,7 @@ def train_one_epoch(
|
||||
graph_compiler: Union[BpeCtcTrainingGraphCompiler, CtcTrainingGraphCompiler],
|
||||
train_dl: torch.utils.data.DataLoader,
|
||||
valid_dl: torch.utils.data.DataLoader,
|
||||
scaler: GradScaler,
|
||||
scaler: "GradScaler",
|
||||
model_avg: Optional[nn.Module] = None,
|
||||
tb_writer: Optional[SummaryWriter] = None,
|
||||
world_size: int = 1,
|
||||
@ -743,7 +744,7 @@ def train_one_epoch(
|
||||
params.batch_idx_train += 1
|
||||
batch_size = len(batch["supervisions"]["text"])
|
||||
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, loss_info = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
@ -1004,7 +1005,7 @@ def run(rank, world_size, args):
|
||||
warmup=0.0 if params.start_epoch == 1 else 1.0,
|
||||
)
|
||||
|
||||
scaler = GradScaler(enabled=params.use_fp16)
|
||||
scaler = create_grad_scaler(enabled=params.use_fp16)
|
||||
if checkpoints and "grad_scaler" in checkpoints:
|
||||
logging.info("Loading grad scaler state dict")
|
||||
scaler.load_state_dict(checkpoints["grad_scaler"])
|
||||
@ -1073,7 +1074,7 @@ def scan_pessimistic_batches_for_oom(
|
||||
for criterion, cuts in batches.items():
|
||||
batch = train_dl.dataset[cuts]
|
||||
try:
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, _ = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
|
@ -574,7 +574,9 @@ def main():
|
||||
H = None
|
||||
bpe_model = None
|
||||
HLG = k2.Fsa.from_dict(
|
||||
torch.load(f"{params.lang_dir}/HLG.pt", map_location="cpu")
|
||||
torch.load(
|
||||
f"{params.lang_dir}/HLG.pt", map_location="cpu", weights_only=False
|
||||
)
|
||||
)
|
||||
HLG = HLG.to(device)
|
||||
assert HLG.requires_grad is False
|
||||
@ -609,7 +611,9 @@ def main():
|
||||
torch.save(G.as_dict(), params.lm_dir / "G_4_gram.pt")
|
||||
else:
|
||||
logging.info("Loading pre-compiled G_4_gram.pt")
|
||||
d = torch.load(params.lm_dir / "G_4_gram.pt", map_location="cpu")
|
||||
d = torch.load(
|
||||
params.lm_dir / "G_4_gram.pt", map_location="cpu", weights_only=False
|
||||
)
|
||||
G = k2.Fsa.from_dict(d).to(device)
|
||||
|
||||
if params.method in ["whole-lattice-rescoring", "attention-decoder"]:
|
||||
|
@ -80,7 +80,6 @@ from lhotse.utils import fix_random_seed
|
||||
from model import Transducer
|
||||
from optim import Eden, Eve
|
||||
from torch import Tensor
|
||||
from torch.cuda.amp import GradScaler
|
||||
from torch.nn.parallel import DistributedDataParallel as DDP
|
||||
from torch.utils.tensorboard import SummaryWriter
|
||||
|
||||
@ -93,7 +92,14 @@ from icefall.checkpoint import (
|
||||
)
|
||||
from icefall.dist import cleanup_dist, setup_dist
|
||||
from icefall.env import get_env_info
|
||||
from icefall.utils import AttributeDict, MetricsTracker, setup_logger, str2bool
|
||||
from icefall.utils import (
|
||||
AttributeDict,
|
||||
MetricsTracker,
|
||||
create_grad_scaler,
|
||||
setup_logger,
|
||||
str2bool,
|
||||
torch_autocast,
|
||||
)
|
||||
|
||||
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
|
||||
|
||||
@ -560,7 +566,7 @@ def save_checkpoint(
|
||||
optimizer: Optional[torch.optim.Optimizer] = None,
|
||||
scheduler: Optional[LRSchedulerType] = None,
|
||||
sampler: Optional[CutSampler] = None,
|
||||
scaler: Optional[GradScaler] = None,
|
||||
scaler: Optional["GradScaler"] = None,
|
||||
rank: int = 0,
|
||||
) -> None:
|
||||
"""Save model, optimizer, scheduler and training stats to file.
|
||||
@ -727,7 +733,7 @@ def train_one_epoch(
|
||||
sp: spm.SentencePieceProcessor,
|
||||
train_dl: torch.utils.data.DataLoader,
|
||||
valid_dl: torch.utils.data.DataLoader,
|
||||
scaler: GradScaler,
|
||||
scaler: "GradScaler",
|
||||
model_avg: Optional[nn.Module] = None,
|
||||
tb_writer: Optional[SummaryWriter] = None,
|
||||
world_size: int = 1,
|
||||
@ -772,7 +778,7 @@ def train_one_epoch(
|
||||
params.batch_idx_train += 1
|
||||
batch_size = len(batch["supervisions"]["text"])
|
||||
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, loss_info = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
@ -1002,7 +1008,7 @@ def run(rank, world_size, args):
|
||||
warmup=0.0 if params.start_epoch == 1 else 1.0,
|
||||
)
|
||||
|
||||
scaler = GradScaler(enabled=params.use_fp16)
|
||||
scaler = create_grad_scaler(enabled=params.use_fp16)
|
||||
if checkpoints and "grad_scaler" in checkpoints:
|
||||
logging.info("Loading grad scaler state dict")
|
||||
scaler.load_state_dict(checkpoints["grad_scaler"])
|
||||
@ -1071,7 +1077,7 @@ def scan_pessimistic_batches_for_oom(
|
||||
for criterion, cuts in batches.items():
|
||||
batch = train_dl.dataset[cuts]
|
||||
try:
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, _ = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
|
@ -80,7 +80,6 @@ from lhotse.utils import fix_random_seed
|
||||
from model import Transducer
|
||||
from optim import Eden, Eve
|
||||
from torch import Tensor
|
||||
from torch.cuda.amp import GradScaler
|
||||
from torch.nn.parallel import DistributedDataParallel as DDP
|
||||
from torch.utils.tensorboard import SummaryWriter
|
||||
|
||||
@ -93,7 +92,14 @@ from icefall.checkpoint import (
|
||||
)
|
||||
from icefall.dist import cleanup_dist, setup_dist
|
||||
from icefall.env import get_env_info
|
||||
from icefall.utils import AttributeDict, MetricsTracker, setup_logger, str2bool
|
||||
from icefall.utils import (
|
||||
AttributeDict,
|
||||
MetricsTracker,
|
||||
create_grad_scaler,
|
||||
setup_logger,
|
||||
str2bool,
|
||||
torch_autocast,
|
||||
)
|
||||
|
||||
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
|
||||
|
||||
@ -560,7 +566,7 @@ def save_checkpoint(
|
||||
optimizer: Optional[torch.optim.Optimizer] = None,
|
||||
scheduler: Optional[LRSchedulerType] = None,
|
||||
sampler: Optional[CutSampler] = None,
|
||||
scaler: Optional[GradScaler] = None,
|
||||
scaler: Optional["GradScaler"] = None,
|
||||
rank: int = 0,
|
||||
) -> None:
|
||||
"""Save model, optimizer, scheduler and training stats to file.
|
||||
@ -727,7 +733,7 @@ def train_one_epoch(
|
||||
sp: spm.SentencePieceProcessor,
|
||||
train_dl: torch.utils.data.DataLoader,
|
||||
valid_dl: torch.utils.data.DataLoader,
|
||||
scaler: GradScaler,
|
||||
scaler: "GradScaler",
|
||||
model_avg: Optional[nn.Module] = None,
|
||||
tb_writer: Optional[SummaryWriter] = None,
|
||||
world_size: int = 1,
|
||||
@ -772,7 +778,7 @@ def train_one_epoch(
|
||||
params.batch_idx_train += 1
|
||||
batch_size = len(batch["supervisions"]["text"])
|
||||
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, loss_info = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
@ -1001,7 +1007,7 @@ def run(rank, world_size, args):
|
||||
params=params,
|
||||
)
|
||||
|
||||
scaler = GradScaler(enabled=params.use_fp16)
|
||||
scaler = create_grad_scaler(enabled=params.use_fp16)
|
||||
if checkpoints and "grad_scaler" in checkpoints:
|
||||
logging.info("Loading grad scaler state dict")
|
||||
scaler.load_state_dict(checkpoints["grad_scaler"])
|
||||
@ -1072,7 +1078,7 @@ def scan_pessimistic_batches_for_oom(
|
||||
# warmup = 0.0 is so that the derivs for the pruned loss stay zero
|
||||
# (i.e. are not remembered by the decaying-average in adam), because
|
||||
# we want to avoid these params being subject to shrinkage in adam.
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, _ = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
|
@ -72,11 +72,11 @@ def compile_HLG(lang_dir: str, lm: str = "G_3_gram") -> k2.Fsa:
|
||||
max_token_id = max(lexicon.tokens)
|
||||
logging.info(f"Building ctc_topo. max_token_id: {max_token_id}")
|
||||
H = k2.ctc_topo(max_token_id)
|
||||
L = k2.Fsa.from_dict(torch.load(f"{lang_dir}/L_disambig.pt"))
|
||||
L = k2.Fsa.from_dict(torch.load(f"{lang_dir}/L_disambig.pt", weights_only=False))
|
||||
|
||||
if Path(f"data/lm/{lm}.pt").is_file():
|
||||
logging.info(f"Loading pre-compiled {lm}")
|
||||
d = torch.load(f"data/lm/{lm}.pt")
|
||||
d = torch.load(f"data/lm/{lm}.pt", weights_only=False)
|
||||
G = k2.Fsa.from_dict(d)
|
||||
else:
|
||||
logging.info(f"Loading {lm}.fst.txt")
|
||||
|
@ -66,11 +66,11 @@ def compile_LG(lang_dir: str, lm: str = "G_3_gram") -> k2.Fsa:
|
||||
An FSA representing LG.
|
||||
"""
|
||||
lexicon = Lexicon(lang_dir)
|
||||
L = k2.Fsa.from_dict(torch.load(f"{lang_dir}/L_disambig.pt"))
|
||||
L = k2.Fsa.from_dict(torch.load(f"{lang_dir}/L_disambig.pt", weights_only=False))
|
||||
|
||||
if Path(f"data/lm/{lm}.pt").is_file():
|
||||
logging.info(f"Loading pre-compiled {lm}")
|
||||
d = torch.load(f"data/lm/{lm}.pt")
|
||||
d = torch.load(f"data/lm/{lm}.pt", weights_only=False)
|
||||
G = k2.Fsa.from_dict(d)
|
||||
else:
|
||||
logging.info(f"Loading {lm}.fst.txt")
|
||||
|
@ -750,7 +750,7 @@ def main():
|
||||
lg_filename = params.lang_dir / "LG.pt"
|
||||
logging.info(f"Loading {lg_filename}")
|
||||
decoding_graph = k2.Fsa.from_dict(
|
||||
torch.load(lg_filename, map_location=device)
|
||||
torch.load(lg_filename, map_location=device, weights_only=False)
|
||||
)
|
||||
decoding_graph.scores *= params.ngram_lm_scale
|
||||
else:
|
||||
|
@ -23,7 +23,7 @@ import torch.nn as nn
|
||||
from encoder_interface import EncoderInterface
|
||||
from scaling import ScaledLinear
|
||||
|
||||
from icefall.utils import add_sos
|
||||
from icefall.utils import add_sos, torch_autocast
|
||||
|
||||
|
||||
class Transducer(nn.Module):
|
||||
@ -156,7 +156,7 @@ class Transducer(nn.Module):
|
||||
lm = self.simple_lm_proj(decoder_out)
|
||||
am = self.simple_am_proj(encoder_out)
|
||||
|
||||
with torch.cuda.amp.autocast(enabled=False):
|
||||
with torch_autocast(enabled=False):
|
||||
simple_loss, (px_grad, py_grad) = k2.rnnt_loss_smoothed(
|
||||
lm=lm.float(),
|
||||
am=am.float(),
|
||||
@ -192,7 +192,7 @@ class Transducer(nn.Module):
|
||||
# prior to do_rnnt_pruning (this is an optimization for speed).
|
||||
logits = self.joiner(am_pruned, lm_pruned, project_input=False)
|
||||
|
||||
with torch.cuda.amp.autocast(enabled=False):
|
||||
with torch_autocast(enabled=False):
|
||||
pruned_loss = k2.rnnt_loss_pruned(
|
||||
logits=logits.float(),
|
||||
symbols=y_padded,
|
||||
|
@ -238,7 +238,7 @@ def main():
|
||||
num_param = sum([p.numel() for p in model.parameters()])
|
||||
logging.info(f"Number of model parameters: {num_param}")
|
||||
|
||||
checkpoint = torch.load(args.checkpoint, map_location="cpu")
|
||||
checkpoint = torch.load(args.checkpoint, map_location="cpu", weights_only=False)
|
||||
model.load_state_dict(checkpoint["model"], strict=False)
|
||||
model.to(device)
|
||||
model.eval()
|
||||
|
@ -66,7 +66,6 @@ from lstm import RNN
|
||||
from model import Transducer
|
||||
from optim import Eden, Eve
|
||||
from torch import Tensor
|
||||
from torch.cuda.amp import GradScaler
|
||||
from torch.nn.parallel import DistributedDataParallel as DDP
|
||||
from torch.utils.tensorboard import SummaryWriter
|
||||
|
||||
@ -82,9 +81,11 @@ from icefall.env import get_env_info
|
||||
from icefall.utils import (
|
||||
AttributeDict,
|
||||
MetricsTracker,
|
||||
create_grad_scaler,
|
||||
display_and_save_batch,
|
||||
setup_logger,
|
||||
str2bool,
|
||||
torch_autocast,
|
||||
)
|
||||
|
||||
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
|
||||
@ -521,7 +522,7 @@ def save_checkpoint(
|
||||
optimizer: Optional[torch.optim.Optimizer] = None,
|
||||
scheduler: Optional[LRSchedulerType] = None,
|
||||
sampler: Optional[CutSampler] = None,
|
||||
scaler: Optional[GradScaler] = None,
|
||||
scaler: Optional["GradScaler"] = None,
|
||||
rank: int = 0,
|
||||
) -> None:
|
||||
"""Save model, optimizer, scheduler and training stats to file.
|
||||
@ -717,7 +718,7 @@ def train_one_epoch(
|
||||
sp: spm.SentencePieceProcessor,
|
||||
train_dl: torch.utils.data.DataLoader,
|
||||
valid_dl: torch.utils.data.DataLoader,
|
||||
scaler: GradScaler,
|
||||
scaler: "GradScaler",
|
||||
model_avg: Optional[nn.Module] = None,
|
||||
tb_writer: Optional[SummaryWriter] = None,
|
||||
world_size: int = 1,
|
||||
@ -763,7 +764,7 @@ def train_one_epoch(
|
||||
batch_size = len(batch["supervisions"]["text"])
|
||||
|
||||
try:
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, loss_info = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
@ -1023,7 +1024,7 @@ def run(rank, world_size, args):
|
||||
warmup=0.0 if params.start_epoch == 1 else 1.0,
|
||||
)
|
||||
|
||||
scaler = GradScaler(enabled=params.use_fp16)
|
||||
scaler = create_grad_scaler(enabled=params.use_fp16)
|
||||
if checkpoints and "grad_scaler" in checkpoints:
|
||||
logging.info("Loading grad scaler state dict")
|
||||
scaler.load_state_dict(checkpoints["grad_scaler"])
|
||||
@ -1092,7 +1093,7 @@ def scan_pessimistic_batches_for_oom(
|
||||
for criterion, cuts in batches.items():
|
||||
batch = train_dl.dataset[cuts]
|
||||
try:
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, _ = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
|
@ -935,7 +935,7 @@ def main():
|
||||
lg_filename = params.lang_dir / "LG.pt"
|
||||
logging.info(f"Loading {lg_filename}")
|
||||
decoding_graph = k2.Fsa.from_dict(
|
||||
torch.load(lg_filename, map_location=device)
|
||||
torch.load(lg_filename, map_location=device, weights_only=False)
|
||||
)
|
||||
decoding_graph.scores *= params.ngram_lm_scale
|
||||
else:
|
||||
|
@ -23,7 +23,7 @@ import torch.nn as nn
|
||||
from encoder_interface import EncoderInterface
|
||||
from scaling import ScaledLinear
|
||||
|
||||
from icefall.utils import add_sos
|
||||
from icefall.utils import add_sos, torch_autocast
|
||||
|
||||
|
||||
class Transducer(nn.Module):
|
||||
@ -195,7 +195,7 @@ class Transducer(nn.Module):
|
||||
lm = simple_lm_proj(decoder_out)
|
||||
am = simple_am_proj(encoder_out)
|
||||
|
||||
with torch.cuda.amp.autocast(enabled=False):
|
||||
with torch_autocast(enabled=False):
|
||||
simple_loss, (px_grad, py_grad) = k2.rnnt_loss_smoothed(
|
||||
lm=lm.float(),
|
||||
am=am.float(),
|
||||
@ -231,7 +231,7 @@ class Transducer(nn.Module):
|
||||
# prior to do_rnnt_pruning (this is an optimization for speed).
|
||||
logits = joiner(am_pruned, lm_pruned, project_input=False)
|
||||
|
||||
with torch.cuda.amp.autocast(enabled=False):
|
||||
with torch_autocast(enabled=False):
|
||||
pruned_loss = k2.rnnt_loss_pruned(
|
||||
logits=logits.float(),
|
||||
symbols=y_padded,
|
||||
|
@ -241,7 +241,7 @@ def main():
|
||||
num_param = sum([p.numel() for p in model.parameters()])
|
||||
logging.info(f"Number of model parameters: {num_param}")
|
||||
|
||||
checkpoint = torch.load(args.checkpoint, map_location="cpu")
|
||||
checkpoint = torch.load(args.checkpoint, map_location="cpu", weights_only=False)
|
||||
model.load_state_dict(checkpoint["model"], strict=False)
|
||||
model.to(device)
|
||||
model.eval()
|
||||
|
@ -74,7 +74,6 @@ from lstm import RNN
|
||||
from model import Transducer
|
||||
from optim import Eden, Eve
|
||||
from torch import Tensor
|
||||
from torch.cuda.amp import GradScaler
|
||||
from torch.nn.parallel import DistributedDataParallel as DDP
|
||||
from torch.utils.tensorboard import SummaryWriter
|
||||
|
||||
@ -90,9 +89,11 @@ from icefall.env import get_env_info
|
||||
from icefall.utils import (
|
||||
AttributeDict,
|
||||
MetricsTracker,
|
||||
create_grad_scaler,
|
||||
display_and_save_batch,
|
||||
setup_logger,
|
||||
str2bool,
|
||||
torch_autocast,
|
||||
)
|
||||
|
||||
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
|
||||
@ -560,7 +561,7 @@ def save_checkpoint(
|
||||
optimizer: Optional[torch.optim.Optimizer] = None,
|
||||
scheduler: Optional[LRSchedulerType] = None,
|
||||
sampler: Optional[CutSampler] = None,
|
||||
scaler: Optional[GradScaler] = None,
|
||||
scaler: Optional["GradScaler"] = None,
|
||||
rank: int = 0,
|
||||
) -> None:
|
||||
"""Save model, optimizer, scheduler and training stats to file.
|
||||
@ -772,7 +773,7 @@ def train_one_epoch(
|
||||
giga_train_dl: torch.utils.data.DataLoader,
|
||||
valid_dl: torch.utils.data.DataLoader,
|
||||
rng: random.Random,
|
||||
scaler: GradScaler,
|
||||
scaler: "GradScaler",
|
||||
model_avg: Optional[nn.Module] = None,
|
||||
tb_writer: Optional[SummaryWriter] = None,
|
||||
world_size: int = 1,
|
||||
@ -848,7 +849,7 @@ def train_one_epoch(
|
||||
libri = is_libri(batch["supervisions"]["cut"][0])
|
||||
|
||||
try:
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, loss_info = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
@ -1176,7 +1177,7 @@ def run(rank, world_size, args):
|
||||
else:
|
||||
logging.info("Skip scan_pessimistic_batches_for_oom")
|
||||
|
||||
scaler = GradScaler(enabled=params.use_fp16)
|
||||
scaler = create_grad_scaler(enabled=params.use_fp16)
|
||||
if checkpoints and "grad_scaler" in checkpoints:
|
||||
logging.info("Loading grad scaler state dict")
|
||||
scaler.load_state_dict(checkpoints["grad_scaler"])
|
||||
@ -1247,7 +1248,7 @@ def scan_pessimistic_batches_for_oom(
|
||||
for criterion, cuts in batches.items():
|
||||
batch = train_dl.dataset[cuts]
|
||||
try:
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, _ = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
|
@ -815,7 +815,7 @@ def main():
|
||||
lg_filename = params.lang_dir / "LG.pt"
|
||||
logging.info(f"Loading {lg_filename}")
|
||||
decoding_graph = k2.Fsa.from_dict(
|
||||
torch.load(lg_filename, map_location=device)
|
||||
torch.load(lg_filename, map_location=device, weights_only=False)
|
||||
)
|
||||
decoding_graph.scores *= params.ngram_lm_scale
|
||||
else:
|
||||
|
@ -239,7 +239,7 @@ def main():
|
||||
num_param = sum([p.numel() for p in model.parameters()])
|
||||
logging.info(f"Number of model parameters: {num_param}")
|
||||
|
||||
checkpoint = torch.load(args.checkpoint, map_location="cpu")
|
||||
checkpoint = torch.load(args.checkpoint, map_location="cpu", weights_only=False)
|
||||
model.load_state_dict(checkpoint["model"], strict=False)
|
||||
model.to(device)
|
||||
model.eval()
|
||||
|
@ -66,7 +66,6 @@ from lstm import RNN
|
||||
from model import Transducer
|
||||
from optim import Eden, Eve
|
||||
from torch import Tensor
|
||||
from torch.cuda.amp import GradScaler
|
||||
from torch.nn.parallel import DistributedDataParallel as DDP
|
||||
from torch.utils.tensorboard import SummaryWriter
|
||||
|
||||
@ -82,9 +81,11 @@ from icefall.env import get_env_info
|
||||
from icefall.utils import (
|
||||
AttributeDict,
|
||||
MetricsTracker,
|
||||
create_grad_scaler,
|
||||
display_and_save_batch,
|
||||
setup_logger,
|
||||
str2bool,
|
||||
torch_autocast,
|
||||
)
|
||||
|
||||
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
|
||||
@ -551,7 +552,7 @@ def save_checkpoint(
|
||||
optimizer: Optional[torch.optim.Optimizer] = None,
|
||||
scheduler: Optional[LRSchedulerType] = None,
|
||||
sampler: Optional[CutSampler] = None,
|
||||
scaler: Optional[GradScaler] = None,
|
||||
scaler: Optional["GradScaler"] = None,
|
||||
rank: int = 0,
|
||||
) -> None:
|
||||
"""Save model, optimizer, scheduler and training stats to file.
|
||||
@ -747,7 +748,7 @@ def train_one_epoch(
|
||||
sp: spm.SentencePieceProcessor,
|
||||
train_dl: torch.utils.data.DataLoader,
|
||||
valid_dl: torch.utils.data.DataLoader,
|
||||
scaler: GradScaler,
|
||||
scaler: "GradScaler",
|
||||
model_avg: Optional[nn.Module] = None,
|
||||
tb_writer: Optional[SummaryWriter] = None,
|
||||
world_size: int = 1,
|
||||
@ -793,7 +794,7 @@ def train_one_epoch(
|
||||
batch_size = len(batch["supervisions"]["text"])
|
||||
|
||||
try:
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, loss_info = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
@ -1067,7 +1068,7 @@ def run(rank, world_size, args):
|
||||
warmup=0.0 if params.start_epoch == 1 else 1.0,
|
||||
)
|
||||
|
||||
scaler = GradScaler(enabled=params.use_fp16)
|
||||
scaler = create_grad_scaler(enabled=params.use_fp16)
|
||||
if checkpoints and "grad_scaler" in checkpoints:
|
||||
logging.info("Loading grad scaler state dict")
|
||||
scaler.load_state_dict(checkpoints["grad_scaler"])
|
||||
@ -1136,7 +1137,7 @@ def scan_pessimistic_batches_for_oom(
|
||||
for criterion, cuts in batches.items():
|
||||
batch = train_dl.dataset[cuts]
|
||||
try:
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_.autocast(enabled=params.use_fp16):
|
||||
loss, _ = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
|
@ -21,7 +21,7 @@ import torch.nn as nn
|
||||
from encoder_interface import EncoderInterface
|
||||
from scaling import ScaledLinear
|
||||
|
||||
from icefall.utils import add_sos
|
||||
from icefall.utils import add_sos, torch_autocast
|
||||
|
||||
|
||||
class Transducer(nn.Module):
|
||||
@ -141,7 +141,7 @@ class Transducer(nn.Module):
|
||||
lm = self.simple_lm_proj(decoder_out)
|
||||
am = self.simple_am_proj(encoder_out)
|
||||
|
||||
with torch.cuda.amp.autocast(enabled=False):
|
||||
with torch_autocast(enabled=False):
|
||||
simple_loss, (px_grad, py_grad) = k2.rnnt_loss_smoothed(
|
||||
lm=lm.float(),
|
||||
am=am.float(),
|
||||
@ -176,7 +176,7 @@ class Transducer(nn.Module):
|
||||
# prior to do_rnnt_pruning (this is an optimization for speed).
|
||||
logits = self.joiner(am_pruned, lm_pruned, project_input=False)
|
||||
|
||||
with torch.cuda.amp.autocast(enabled=False):
|
||||
with torch_autocast(enabled=False):
|
||||
pruned_loss = k2.rnnt_loss_pruned(
|
||||
logits=logits.float(),
|
||||
symbols=y_padded,
|
||||
|
@ -10,9 +10,11 @@ from typing import Optional, Tuple
|
||||
import torch
|
||||
from scaling import ScaledLinear
|
||||
from torch import Tensor, nn
|
||||
from torch.cuda.amp import GradScaler, custom_bwd, custom_fwd
|
||||
from torch.cuda.amp import custom_bwd, custom_fwd
|
||||
from torch_scheduled_sampling import sample_combined
|
||||
|
||||
from icefall.utils import create_grad_scaler, torch_autocast
|
||||
|
||||
# The main exports of this file are the module KnowledgeBaseLookup and the
|
||||
# function create_knowledge_base.
|
||||
|
||||
@ -330,14 +332,14 @@ def _test_knowledge_base_lookup_autocast():
|
||||
optimizer = Eve(m.parameters(), lr=0.005, eps=1.0e-04)
|
||||
m = m.to(device)
|
||||
|
||||
scaler = GradScaler(enabled=True)
|
||||
scaler = create_grad_scaler(enabled=True)
|
||||
|
||||
start = timeit.default_timer()
|
||||
|
||||
for epoch in range(150):
|
||||
for n, (x, y) in enumerate(train_pairs):
|
||||
y_out = m(x)
|
||||
with torch.cuda.amp.autocast(enabled=True):
|
||||
with torch_autocast(enabled=True):
|
||||
loss = ((y_out - y) ** 2).mean() * 100.0
|
||||
if n % 10 == 0 and epoch % 10 == 0:
|
||||
print(f"Epoch {epoch}, batch {n}, loss {loss.item()}")
|
||||
|
@ -66,7 +66,6 @@ from lhotse.utils import fix_random_seed
|
||||
from model import Transducer
|
||||
from optim import Eden, Eve
|
||||
from torch import Tensor
|
||||
from torch.cuda.amp import GradScaler
|
||||
from torch.nn.parallel import DistributedDataParallel as DDP
|
||||
from torch.utils.tensorboard import SummaryWriter
|
||||
|
||||
@ -76,7 +75,14 @@ from icefall.checkpoint import save_checkpoint as save_checkpoint_impl
|
||||
from icefall.checkpoint import save_checkpoint_with_global_batch_idx
|
||||
from icefall.dist import cleanup_dist, setup_dist
|
||||
from icefall.env import get_env_info
|
||||
from icefall.utils import AttributeDict, MetricsTracker, setup_logger, str2bool
|
||||
from icefall.utils import (
|
||||
create_grad_scaler,
|
||||
AttributeDict,
|
||||
MetricsTracker,
|
||||
setup_logger,
|
||||
str2bool,
|
||||
torch_autocast,
|
||||
)
|
||||
|
||||
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
|
||||
|
||||
@ -453,7 +459,7 @@ def save_checkpoint(
|
||||
optimizer: Optional[torch.optim.Optimizer] = None,
|
||||
scheduler: Optional[LRSchedulerType] = None,
|
||||
sampler: Optional[CutSampler] = None,
|
||||
scaler: Optional[GradScaler] = None,
|
||||
scaler: Optional["GradScaler"] = None,
|
||||
rank: int = 0,
|
||||
) -> None:
|
||||
"""Save model, optimizer, scheduler and training stats to file.
|
||||
@ -608,7 +614,7 @@ def train_one_epoch(
|
||||
sp: spm.SentencePieceProcessor,
|
||||
train_dl: torch.utils.data.DataLoader,
|
||||
valid_dl: torch.utils.data.DataLoader,
|
||||
scaler: GradScaler,
|
||||
scaler: "GradScaler",
|
||||
tb_writer: Optional[SummaryWriter] = None,
|
||||
world_size: int = 1,
|
||||
rank: int = 0,
|
||||
@ -650,7 +656,7 @@ def train_one_epoch(
|
||||
params.batch_idx_train += 1
|
||||
batch_size = len(batch["supervisions"]["text"])
|
||||
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, loss_info = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
@ -868,7 +874,7 @@ def run(rank, world_size, args):
|
||||
params=params,
|
||||
)
|
||||
|
||||
scaler = GradScaler(enabled=params.use_fp16)
|
||||
scaler = create_grad_scaler(enabled=params.use_fp16)
|
||||
if checkpoints and "grad_scaler" in checkpoints:
|
||||
logging.info("Loading grad scaler state dict")
|
||||
scaler.load_state_dict(checkpoints["grad_scaler"])
|
||||
@ -937,7 +943,7 @@ def scan_pessimistic_batches_for_oom(
|
||||
# warmup = 0.0 is so that the derivs for the pruned loss stay zero
|
||||
# (i.e. are not remembered by the decaying-average in adam), because
|
||||
# we want to avoid these params being subject to shrinkage in adam.
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, _ = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
|
@ -55,7 +55,6 @@ from lhotse.utils import fix_random_seed
|
||||
from model import Transducer
|
||||
from noam import Noam
|
||||
from torch import Tensor
|
||||
from torch.cuda.amp import GradScaler
|
||||
from torch.nn.parallel import DistributedDataParallel as DDP
|
||||
from torch.utils.tensorboard import SummaryWriter
|
||||
|
||||
@ -68,7 +67,14 @@ from icefall.checkpoint import (
|
||||
)
|
||||
from icefall.dist import cleanup_dist, setup_dist
|
||||
from icefall.env import get_env_info
|
||||
from icefall.utils import AttributeDict, MetricsTracker, setup_logger, str2bool
|
||||
from icefall.utils import (
|
||||
AttributeDict,
|
||||
MetricsTracker,
|
||||
create_grad_scaler,
|
||||
setup_logger,
|
||||
str2bool,
|
||||
torch_autocast,
|
||||
)
|
||||
|
||||
|
||||
def add_model_arguments(parser: argparse.ArgumentParser):
|
||||
@ -496,7 +502,7 @@ def save_checkpoint(
|
||||
model_avg: Optional[nn.Module] = None,
|
||||
optimizer: Optional[torch.optim.Optimizer] = None,
|
||||
sampler: Optional[CutSampler] = None,
|
||||
scaler: Optional[GradScaler] = None,
|
||||
scaler: Optional["GradScaler"] = None,
|
||||
rank: int = 0,
|
||||
) -> None:
|
||||
"""Save model, optimizer, and training stats to file.
|
||||
@ -650,7 +656,7 @@ def train_one_epoch(
|
||||
sp: spm.SentencePieceProcessor,
|
||||
train_dl: torch.utils.data.DataLoader,
|
||||
valid_dl: torch.utils.data.DataLoader,
|
||||
scaler: GradScaler,
|
||||
scaler: "GradScaler",
|
||||
model_avg: Optional[nn.Module] = None,
|
||||
tb_writer: Optional[SummaryWriter] = None,
|
||||
world_size: int = 1,
|
||||
@ -693,7 +699,7 @@ def train_one_epoch(
|
||||
params.batch_idx_train += 1
|
||||
batch_size = len(batch["supervisions"]["text"])
|
||||
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, loss_info = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
@ -939,7 +945,7 @@ def run(rank, world_size, args):
|
||||
params=params,
|
||||
)
|
||||
|
||||
scaler = GradScaler(enabled=params.use_fp16)
|
||||
scaler = create_grad_scaler(enabled=params.use_fp16)
|
||||
if checkpoints and "grad_scaler" in checkpoints:
|
||||
logging.info("Loading grad scaler state dict")
|
||||
scaler.load_state_dict(checkpoints["grad_scaler"])
|
||||
@ -1004,7 +1010,7 @@ def scan_pessimistic_batches_for_oom(
|
||||
for criterion, cuts in batches.items():
|
||||
batch = train_dl.dataset[cuts]
|
||||
try:
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, _ = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
|
@ -741,7 +741,7 @@ def main():
|
||||
lg_filename = params.lang_dir / "LG.pt"
|
||||
logging.info(f"Loading {lg_filename}")
|
||||
decoding_graph = k2.Fsa.from_dict(
|
||||
torch.load(lg_filename, map_location=device)
|
||||
torch.load(lg_filename, map_location=device, weights_only=False)
|
||||
)
|
||||
decoding_graph.scores *= params.ngram_lm_scale
|
||||
else:
|
||||
|
@ -1347,7 +1347,10 @@ def modified_beam_search(
|
||||
(
|
||||
context_score,
|
||||
new_context_state,
|
||||
) = context_graph.forward_one_step(hyp.context_state, new_token)
|
||||
_,
|
||||
) = context_graph.forward_one_step(
|
||||
hyp.context_state, new_token, strict_mode=False
|
||||
)
|
||||
|
||||
new_log_prob = topk_log_probs[k] + context_score
|
||||
|
||||
@ -2853,7 +2856,10 @@ def modified_beam_search_LODR(
|
||||
(
|
||||
context_score,
|
||||
new_context_state,
|
||||
) = context_graph.forward_one_step(hyp.context_state, new_token)
|
||||
_,
|
||||
) = context_graph.forward_one_step(
|
||||
hyp.context_state, new_token, strict_mode=False
|
||||
)
|
||||
|
||||
ys.append(new_token)
|
||||
state_cost = hyp.state_cost.forward_one_step(new_token)
|
||||
|
@ -754,7 +754,7 @@ def main():
|
||||
lg_filename = params.lang_dir / "LG.pt"
|
||||
logging.info(f"Loading {lg_filename}")
|
||||
decoding_graph = k2.Fsa.from_dict(
|
||||
torch.load(lg_filename, map_location=device)
|
||||
torch.load(lg_filename, map_location=device, weights_only=False)
|
||||
)
|
||||
decoding_graph.scores *= params.ngram_lm_scale
|
||||
else:
|
||||
|
@ -23,7 +23,7 @@ import torch.nn as nn
|
||||
from encoder_interface import EncoderInterface
|
||||
from scaling import ScaledLinear
|
||||
|
||||
from icefall.utils import add_sos
|
||||
from icefall.utils import add_sos, torch_autocast
|
||||
|
||||
|
||||
class Transducer(nn.Module):
|
||||
@ -157,7 +157,7 @@ class Transducer(nn.Module):
|
||||
lm = self.simple_lm_proj(decoder_out)
|
||||
am = self.simple_am_proj(encoder_out)
|
||||
|
||||
with torch.cuda.amp.autocast(enabled=False):
|
||||
with torch_autocast(enabled=False):
|
||||
simple_loss, (px_grad, py_grad) = k2.rnnt_loss_smoothed(
|
||||
lm=lm.float(),
|
||||
am=am.float(),
|
||||
@ -193,7 +193,7 @@ class Transducer(nn.Module):
|
||||
# prior to do_rnnt_pruning (this is an optimization for speed).
|
||||
logits = self.joiner(am_pruned, lm_pruned, project_input=False)
|
||||
|
||||
with torch.cuda.amp.autocast(enabled=False):
|
||||
with torch_autocast(enabled=False):
|
||||
pruned_loss = k2.rnnt_loss_pruned(
|
||||
logits=logits.float(),
|
||||
symbols=y_padded,
|
||||
|
@ -265,7 +265,7 @@ def main():
|
||||
num_param = sum([p.numel() for p in model.parameters()])
|
||||
logging.info(f"Number of model parameters: {num_param}")
|
||||
|
||||
checkpoint = torch.load(args.checkpoint, map_location="cpu")
|
||||
checkpoint = torch.load(args.checkpoint, map_location="cpu", weights_only=False)
|
||||
model.load_state_dict(checkpoint["model"], strict=False)
|
||||
model.to(device)
|
||||
model.eval()
|
||||
|
@ -78,7 +78,6 @@ from lhotse.utils import fix_random_seed
|
||||
from model import Transducer
|
||||
from optim import Eden, Eve
|
||||
from torch import Tensor
|
||||
from torch.cuda.amp import GradScaler
|
||||
from torch.nn.parallel import DistributedDataParallel as DDP
|
||||
from torch.utils.tensorboard import SummaryWriter
|
||||
|
||||
@ -91,9 +90,11 @@ from icefall.env import get_env_info
|
||||
from icefall.utils import (
|
||||
AttributeDict,
|
||||
MetricsTracker,
|
||||
create_grad_scaler,
|
||||
display_and_save_batch,
|
||||
setup_logger,
|
||||
str2bool,
|
||||
torch_autocast,
|
||||
)
|
||||
|
||||
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
|
||||
@ -523,7 +524,7 @@ def save_checkpoint(
|
||||
optimizer: Optional[torch.optim.Optimizer] = None,
|
||||
scheduler: Optional[LRSchedulerType] = None,
|
||||
sampler: Optional[CutSampler] = None,
|
||||
scaler: Optional[GradScaler] = None,
|
||||
scaler: Optional["GradScaler"] = None,
|
||||
rank: int = 0,
|
||||
) -> None:
|
||||
"""Save model, optimizer, scheduler and training stats to file.
|
||||
@ -716,7 +717,7 @@ def train_one_epoch(
|
||||
sp: spm.SentencePieceProcessor,
|
||||
train_dl: torch.utils.data.DataLoader,
|
||||
valid_dl: torch.utils.data.DataLoader,
|
||||
scaler: GradScaler,
|
||||
scaler: "GradScaler",
|
||||
tb_writer: Optional[SummaryWriter] = None,
|
||||
world_size: int = 1,
|
||||
rank: int = 0,
|
||||
@ -759,7 +760,7 @@ def train_one_epoch(
|
||||
batch_size = len(batch["supervisions"]["text"])
|
||||
|
||||
try:
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, loss_info = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
@ -1000,7 +1001,7 @@ def run(rank, world_size, args):
|
||||
warmup=0.0 if params.start_epoch == 0 else 1.0,
|
||||
)
|
||||
|
||||
scaler = GradScaler(enabled=params.use_fp16)
|
||||
scaler = create_grad_scaler(enabled=params.use_fp16)
|
||||
if checkpoints and "grad_scaler" in checkpoints:
|
||||
logging.info("Loading grad scaler state dict")
|
||||
scaler.load_state_dict(checkpoints["grad_scaler"])
|
||||
@ -1067,7 +1068,7 @@ def scan_pessimistic_batches_for_oom(
|
||||
for criterion, cuts in batches.items():
|
||||
batch = train_dl.dataset[cuts]
|
||||
try:
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, _ = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
|
@ -921,7 +921,7 @@ def load_ngram_LM(
|
||||
|
||||
if pt_file.is_file():
|
||||
logging.info(f"Loading pre-compiled {pt_file}")
|
||||
d = torch.load(pt_file, map_location=device)
|
||||
d = torch.load(pt_file, map_location=device, weights_only=False)
|
||||
G = k2.Fsa.from_dict(d)
|
||||
G = k2.add_epsilon_self_loops(G)
|
||||
G = k2.arc_sort(G)
|
||||
@ -1101,7 +1101,7 @@ def main():
|
||||
lg_filename = params.lang_dir / "LG.pt"
|
||||
logging.info(f"Loading {lg_filename}")
|
||||
decoding_graph = k2.Fsa.from_dict(
|
||||
torch.load(lg_filename, map_location=device)
|
||||
torch.load(lg_filename, map_location=device, weights_only=False)
|
||||
)
|
||||
decoding_graph.scores *= params.ngram_lm_scale
|
||||
elif params.decoding_method in [
|
||||
|
@ -23,7 +23,7 @@ import torch.nn as nn
|
||||
from encoder_interface import EncoderInterface
|
||||
from scaling import ScaledLinear
|
||||
|
||||
from icefall.utils import add_sos
|
||||
from icefall.utils import add_sos, torch_autocast
|
||||
|
||||
|
||||
class Transducer(nn.Module):
|
||||
@ -195,7 +195,7 @@ class Transducer(nn.Module):
|
||||
lm = simple_lm_proj(decoder_out)
|
||||
am = simple_am_proj(encoder_out)
|
||||
|
||||
with torch.cuda.amp.autocast(enabled=False):
|
||||
with torch_autocast(enabled=False):
|
||||
simple_loss, (px_grad, py_grad) = k2.rnnt_loss_smoothed(
|
||||
lm=lm.float(),
|
||||
am=am.float(),
|
||||
@ -231,7 +231,7 @@ class Transducer(nn.Module):
|
||||
# prior to do_rnnt_pruning (this is an optimization for speed).
|
||||
logits = joiner(am_pruned, lm_pruned, project_input=False)
|
||||
|
||||
with torch.cuda.amp.autocast(enabled=False):
|
||||
with torch_autocast(enabled=False):
|
||||
pruned_loss = k2.rnnt_loss_pruned(
|
||||
logits=logits.float(),
|
||||
symbols=y_padded,
|
||||
|
@ -274,7 +274,7 @@ def main():
|
||||
num_param = sum([p.numel() for p in model.parameters()])
|
||||
logging.info(f"Number of model parameters: {num_param}")
|
||||
|
||||
checkpoint = torch.load(args.checkpoint, map_location="cpu")
|
||||
checkpoint = torch.load(args.checkpoint, map_location="cpu", weights_only=False)
|
||||
model.load_state_dict(checkpoint["model"], strict=False)
|
||||
model.to(device)
|
||||
model.eval()
|
||||
|
@ -74,7 +74,6 @@ from librispeech import LibriSpeech
|
||||
from model import Transducer
|
||||
from optim import Eden, Eve
|
||||
from torch import Tensor
|
||||
from torch.cuda.amp import GradScaler
|
||||
from torch.nn.parallel import DistributedDataParallel as DDP
|
||||
from torch.utils.tensorboard import SummaryWriter
|
||||
|
||||
@ -87,9 +86,11 @@ from icefall.env import get_env_info
|
||||
from icefall.utils import (
|
||||
AttributeDict,
|
||||
MetricsTracker,
|
||||
create_grad_scaler,
|
||||
display_and_save_batch,
|
||||
setup_logger,
|
||||
str2bool,
|
||||
torch_autocast,
|
||||
)
|
||||
|
||||
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
|
||||
@ -546,7 +547,7 @@ def save_checkpoint(
|
||||
optimizer: Optional[torch.optim.Optimizer] = None,
|
||||
scheduler: Optional[LRSchedulerType] = None,
|
||||
sampler: Optional[CutSampler] = None,
|
||||
scaler: Optional[GradScaler] = None,
|
||||
scaler: Optional["GradScaler"] = None,
|
||||
rank: int = 0,
|
||||
) -> None:
|
||||
"""Save model, optimizer, scheduler and training stats to file.
|
||||
@ -755,7 +756,7 @@ def train_one_epoch(
|
||||
giga_train_dl: torch.utils.data.DataLoader,
|
||||
valid_dl: torch.utils.data.DataLoader,
|
||||
rng: random.Random,
|
||||
scaler: GradScaler,
|
||||
scaler: "GradScaler",
|
||||
tb_writer: Optional[SummaryWriter] = None,
|
||||
world_size: int = 1,
|
||||
rank: int = 0,
|
||||
@ -827,7 +828,7 @@ def train_one_epoch(
|
||||
|
||||
libri = is_libri(batch["supervisions"]["cut"][0])
|
||||
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, loss_info = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
@ -1126,7 +1127,7 @@ def run(rank, world_size, args):
|
||||
warmup=0.0 if params.start_epoch == 0 else 1.0,
|
||||
)
|
||||
|
||||
scaler = GradScaler(enabled=params.use_fp16)
|
||||
scaler = create_grad_scaler(enabled=params.use_fp16)
|
||||
if checkpoints and "grad_scaler" in checkpoints:
|
||||
logging.info("Loading grad scaler state dict")
|
||||
scaler.load_state_dict(checkpoints["grad_scaler"])
|
||||
@ -1195,7 +1196,7 @@ def scan_pessimistic_batches_for_oom(
|
||||
for criterion, cuts in batches.items():
|
||||
batch = train_dl.dataset[cuts]
|
||||
try:
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, _ = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
|
@ -913,7 +913,7 @@ def main():
|
||||
lg_filename = params.lang_dir / "LG.pt"
|
||||
logging.info(f"Loading {lg_filename}")
|
||||
decoding_graph = k2.Fsa.from_dict(
|
||||
torch.load(lg_filename, map_location=device)
|
||||
torch.load(lg_filename, map_location=device, weights_only=False)
|
||||
)
|
||||
decoding_graph.scores *= params.ngram_lm_scale
|
||||
else:
|
||||
|
@ -80,7 +80,6 @@ from lhotse.utils import fix_random_seed
|
||||
from model import Transducer
|
||||
from optim import Eden, Eve
|
||||
from torch import Tensor
|
||||
from torch.cuda.amp import GradScaler
|
||||
from torch.nn.parallel import DistributedDataParallel as DDP
|
||||
from torch.utils.tensorboard import SummaryWriter
|
||||
|
||||
@ -96,9 +95,11 @@ from icefall.env import get_env_info
|
||||
from icefall.utils import (
|
||||
AttributeDict,
|
||||
MetricsTracker,
|
||||
create_grad_scaler,
|
||||
display_and_save_batch,
|
||||
setup_logger,
|
||||
str2bool,
|
||||
torch_autocast,
|
||||
)
|
||||
|
||||
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
|
||||
@ -548,7 +549,7 @@ def save_checkpoint(
|
||||
optimizer: Optional[torch.optim.Optimizer] = None,
|
||||
scheduler: Optional[LRSchedulerType] = None,
|
||||
sampler: Optional[CutSampler] = None,
|
||||
scaler: Optional[GradScaler] = None,
|
||||
scaler: Optional["GradScaler"] = None,
|
||||
rank: int = 0,
|
||||
) -> None:
|
||||
"""Save model, optimizer, scheduler and training stats to file.
|
||||
@ -744,7 +745,7 @@ def train_one_epoch(
|
||||
sp: spm.SentencePieceProcessor,
|
||||
train_dl: torch.utils.data.DataLoader,
|
||||
valid_dl: torch.utils.data.DataLoader,
|
||||
scaler: GradScaler,
|
||||
scaler: "GradScaler",
|
||||
model_avg: Optional[nn.Module] = None,
|
||||
tb_writer: Optional[SummaryWriter] = None,
|
||||
world_size: int = 1,
|
||||
@ -789,7 +790,7 @@ def train_one_epoch(
|
||||
params.batch_idx_train += 1
|
||||
batch_size = len(batch["supervisions"]["text"])
|
||||
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, loss_info = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
@ -1047,7 +1048,7 @@ def run(rank, world_size, args):
|
||||
warmup=0.0 if params.start_epoch == 1 else 1.0,
|
||||
)
|
||||
|
||||
scaler = GradScaler(enabled=params.use_fp16)
|
||||
scaler = create_grad_scaler(enabled=params.use_fp16)
|
||||
if checkpoints and "grad_scaler" in checkpoints:
|
||||
logging.info("Loading grad scaler state dict")
|
||||
scaler.load_state_dict(checkpoints["grad_scaler"])
|
||||
@ -1116,7 +1117,7 @@ def scan_pessimistic_batches_for_oom(
|
||||
for criterion, cuts in batches.items():
|
||||
batch = train_dl.dataset[cuts]
|
||||
try:
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, _ = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
|
@ -972,7 +972,7 @@ def main():
|
||||
lg_filename = params.lang_dir / "LG.pt"
|
||||
logging.info(f"Loading {lg_filename}")
|
||||
decoding_graph = k2.Fsa.from_dict(
|
||||
torch.load(lg_filename, map_location=device)
|
||||
torch.load(lg_filename, map_location=device, weights_only=False)
|
||||
)
|
||||
decoding_graph.scores *= params.ngram_lm_scale
|
||||
else:
|
||||
|
@ -238,7 +238,7 @@ def main():
|
||||
num_param = sum([p.numel() for p in model.parameters()])
|
||||
logging.info(f"Number of model parameters: {num_param}")
|
||||
|
||||
checkpoint = torch.load(args.checkpoint, map_location="cpu")
|
||||
checkpoint = torch.load(args.checkpoint, map_location="cpu", weights_only=False)
|
||||
model.load_state_dict(checkpoint["model"], strict=False)
|
||||
model.to(device)
|
||||
model.eval()
|
||||
|
@ -68,7 +68,6 @@ from lhotse.utils import fix_random_seed
|
||||
from model import Transducer
|
||||
from optim import Eden, Eve
|
||||
from torch import Tensor
|
||||
from torch.cuda.amp import GradScaler
|
||||
from torch.nn.parallel import DistributedDataParallel as DDP
|
||||
from torch.utils.tensorboard import SummaryWriter
|
||||
|
||||
@ -84,9 +83,11 @@ from icefall.env import get_env_info
|
||||
from icefall.utils import (
|
||||
AttributeDict,
|
||||
MetricsTracker,
|
||||
create_grad_scaler,
|
||||
display_and_save_batch,
|
||||
setup_logger,
|
||||
str2bool,
|
||||
torch_autocast,
|
||||
)
|
||||
|
||||
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
|
||||
@ -571,7 +572,7 @@ def save_checkpoint(
|
||||
optimizer: Optional[torch.optim.Optimizer] = None,
|
||||
scheduler: Optional[LRSchedulerType] = None,
|
||||
sampler: Optional[CutSampler] = None,
|
||||
scaler: Optional[GradScaler] = None,
|
||||
scaler: Optional["GradScaler"] = None,
|
||||
rank: int = 0,
|
||||
) -> None:
|
||||
"""Save model, optimizer, scheduler and training stats to file.
|
||||
@ -768,7 +769,7 @@ def train_one_epoch(
|
||||
sp: spm.SentencePieceProcessor,
|
||||
train_dl: torch.utils.data.DataLoader,
|
||||
valid_dl: torch.utils.data.DataLoader,
|
||||
scaler: GradScaler,
|
||||
scaler: "GradScaler",
|
||||
model_avg: Optional[nn.Module] = None,
|
||||
tb_writer: Optional[SummaryWriter] = None,
|
||||
world_size: int = 1,
|
||||
@ -814,7 +815,7 @@ def train_one_epoch(
|
||||
batch_size = len(batch["supervisions"]["text"])
|
||||
|
||||
try:
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, loss_info = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
@ -1078,7 +1079,7 @@ def run(rank, world_size, args):
|
||||
warmup=0.0 if params.start_epoch == 1 else 1.0,
|
||||
)
|
||||
|
||||
scaler = GradScaler(enabled=params.use_fp16)
|
||||
scaler = create_grad_scaler(enabled=params.use_fp16)
|
||||
if checkpoints and "grad_scaler" in checkpoints:
|
||||
logging.info("Loading grad scaler state dict")
|
||||
scaler.load_state_dict(checkpoints["grad_scaler"])
|
||||
@ -1147,7 +1148,7 @@ def scan_pessimistic_batches_for_oom(
|
||||
for criterion, cuts in batches.items():
|
||||
batch = train_dl.dataset[cuts]
|
||||
try:
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, _ = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
|
@ -23,7 +23,7 @@ import torch.nn as nn
|
||||
from encoder_interface import EncoderInterface
|
||||
from scaling import ScaledLinear
|
||||
|
||||
from icefall.utils import add_sos
|
||||
from icefall.utils import add_sos, torch_autocast
|
||||
|
||||
|
||||
class Transducer(nn.Module):
|
||||
@ -185,7 +185,7 @@ class Transducer(nn.Module):
|
||||
lm = self.simple_lm_proj(decoder_out)
|
||||
am = self.simple_am_proj(encoder_out)
|
||||
|
||||
with torch.cuda.amp.autocast(enabled=False):
|
||||
with torch_autocast(enabled=False):
|
||||
simple_loss, (px_grad, py_grad) = k2.rnnt_loss_smoothed(
|
||||
lm=lm.float(),
|
||||
am=am.float(),
|
||||
@ -220,7 +220,7 @@ class Transducer(nn.Module):
|
||||
# prior to do_rnnt_pruning (this is an optimization for speed).
|
||||
logits = self.joiner(am_pruned, lm_pruned, project_input=False)
|
||||
|
||||
with torch.cuda.amp.autocast(enabled=False):
|
||||
with torch_autocast(enabled=False):
|
||||
pruned_loss = k2.rnnt_loss_pruned(
|
||||
logits=logits.float(),
|
||||
symbols=y_padded,
|
||||
|
@ -80,7 +80,6 @@ from lhotse.utils import fix_random_seed
|
||||
from model import Transducer
|
||||
from optim import Eden, Eve
|
||||
from torch import Tensor
|
||||
from torch.cuda.amp import GradScaler
|
||||
from torch.nn.parallel import DistributedDataParallel as DDP
|
||||
from torch.utils.tensorboard import SummaryWriter
|
||||
|
||||
@ -96,9 +95,11 @@ from icefall.env import get_env_info
|
||||
from icefall.utils import (
|
||||
AttributeDict,
|
||||
MetricsTracker,
|
||||
create_grad_scaler,
|
||||
display_and_save_batch,
|
||||
setup_logger,
|
||||
str2bool,
|
||||
torch_autocast,
|
||||
)
|
||||
|
||||
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
|
||||
@ -519,7 +520,7 @@ def save_checkpoint(
|
||||
optimizer: Optional[torch.optim.Optimizer] = None,
|
||||
scheduler: Optional[LRSchedulerType] = None,
|
||||
sampler: Optional[CutSampler] = None,
|
||||
scaler: Optional[GradScaler] = None,
|
||||
scaler: Optional["GradScaler"] = None,
|
||||
rank: int = 0,
|
||||
) -> None:
|
||||
"""Save model, optimizer, scheduler and training stats to file.
|
||||
@ -736,7 +737,7 @@ def train_one_epoch(
|
||||
sp: spm.SentencePieceProcessor,
|
||||
train_dl: torch.utils.data.DataLoader,
|
||||
valid_dl: torch.utils.data.DataLoader,
|
||||
scaler: GradScaler,
|
||||
scaler: "GradScaler",
|
||||
model_avg: Optional[nn.Module] = None,
|
||||
tb_writer: Optional[SummaryWriter] = None,
|
||||
world_size: int = 1,
|
||||
@ -781,7 +782,7 @@ def train_one_epoch(
|
||||
params.batch_idx_train += 1
|
||||
batch_size = len(batch["supervisions"]["text"])
|
||||
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, loss_info = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
@ -1039,7 +1040,7 @@ def run(rank, world_size, args):
|
||||
warmup=0.0 if params.start_epoch == 1 else 1.0,
|
||||
)
|
||||
|
||||
scaler = GradScaler(enabled=params.use_fp16)
|
||||
scaler = create_grad_scaler(enabled=params.use_fp16)
|
||||
if checkpoints and "grad_scaler" in checkpoints:
|
||||
logging.info("Loading grad scaler state dict")
|
||||
scaler.load_state_dict(checkpoints["grad_scaler"])
|
||||
@ -1108,7 +1109,7 @@ def scan_pessimistic_batches_for_oom(
|
||||
for criterion, cuts in batches.items():
|
||||
batch = train_dl.dataset[cuts]
|
||||
try:
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, _ = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
|
@ -348,7 +348,9 @@ class CodebookIndexExtractor:
|
||||
num_codebooks=self.params.num_codebooks,
|
||||
codebook_size=256,
|
||||
)
|
||||
quantizer.load_state_dict(torch.load(self.quantizer_file_path))
|
||||
quantizer.load_state_dict(
|
||||
torch.load(self.quantizer_file_path, weights_only=False)
|
||||
)
|
||||
quantizer.to(self.params.device)
|
||||
return quantizer
|
||||
|
||||
|
@ -289,7 +289,7 @@ def main():
|
||||
logging.info("About to create model")
|
||||
model = get_transducer_model(params)
|
||||
|
||||
checkpoint = torch.load(args.checkpoint, map_location="cpu")
|
||||
checkpoint = torch.load(args.checkpoint, map_location="cpu", weights_only=False)
|
||||
model.load_state_dict(checkpoint["model"], strict=False)
|
||||
model.to(device)
|
||||
model.eval()
|
||||
|
@ -910,7 +910,7 @@ def main():
|
||||
lg_filename = params.lang_dir / "LG.pt"
|
||||
logging.info(f"Loading {lg_filename}")
|
||||
decoding_graph = k2.Fsa.from_dict(
|
||||
torch.load(lg_filename, map_location=device)
|
||||
torch.load(lg_filename, map_location=device, weights_only=False)
|
||||
)
|
||||
decoding_graph.scores *= params.ngram_lm_scale
|
||||
else:
|
||||
|
@ -813,7 +813,7 @@ def main():
|
||||
lg_filename = params.lang_dir / "LG.pt"
|
||||
logging.info(f"Loading {lg_filename}")
|
||||
decoding_graph = k2.Fsa.from_dict(
|
||||
torch.load(lg_filename, map_location=device)
|
||||
torch.load(lg_filename, map_location=device, weights_only=False)
|
||||
)
|
||||
decoding_graph.scores *= params.ngram_lm_scale
|
||||
else:
|
||||
|
@ -66,7 +66,6 @@ from lhotse.utils import fix_random_seed
|
||||
from model import Transducer
|
||||
from optim import Eden, ScaledAdam
|
||||
from torch import Tensor
|
||||
from torch.cuda.amp import GradScaler
|
||||
from torch.nn.parallel import DistributedDataParallel as DDP
|
||||
from torch.utils.tensorboard import SummaryWriter
|
||||
from zipformer import Zipformer
|
||||
@ -85,9 +84,11 @@ from icefall.hooks import register_inf_check_hooks
|
||||
from icefall.utils import (
|
||||
AttributeDict,
|
||||
MetricsTracker,
|
||||
create_grad_scaler,
|
||||
filter_uneven_sized_batch,
|
||||
setup_logger,
|
||||
str2bool,
|
||||
torch_autocast,
|
||||
)
|
||||
|
||||
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
|
||||
@ -635,7 +636,7 @@ def load_model_params(
|
||||
|
||||
"""
|
||||
logging.info(f"Loading checkpoint from {ckpt}")
|
||||
checkpoint = torch.load(ckpt, map_location="cpu")
|
||||
checkpoint = torch.load(ckpt, map_location="cpu", weights_only=False)
|
||||
|
||||
# if module list is empty, load the whole model from ckpt
|
||||
if not init_modules:
|
||||
@ -678,7 +679,7 @@ def save_checkpoint(
|
||||
optimizer: Optional[torch.optim.Optimizer] = None,
|
||||
scheduler: Optional[LRSchedulerType] = None,
|
||||
sampler: Optional[CutSampler] = None,
|
||||
scaler: Optional[GradScaler] = None,
|
||||
scaler: Optional["GradScaler"] = None,
|
||||
rank: int = 0,
|
||||
) -> None:
|
||||
"""Save model, optimizer, scheduler and training stats to file.
|
||||
@ -857,7 +858,7 @@ def train_one_epoch(
|
||||
sp: spm.SentencePieceProcessor,
|
||||
train_dl: torch.utils.data.DataLoader,
|
||||
valid_dl: torch.utils.data.DataLoader,
|
||||
scaler: GradScaler,
|
||||
scaler: "GradScaler",
|
||||
model_avg: Optional[nn.Module] = None,
|
||||
tb_writer: Optional[SummaryWriter] = None,
|
||||
world_size: int = 1,
|
||||
@ -903,7 +904,7 @@ def train_one_epoch(
|
||||
batch_size = len(batch["supervisions"]["text"])
|
||||
|
||||
try:
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, loss_info = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
@ -1219,7 +1220,7 @@ def run(rank, world_size, args):
|
||||
params=params,
|
||||
)
|
||||
|
||||
scaler = GradScaler(enabled=params.use_fp16, init_scale=1.0)
|
||||
scaler = create_grad_scaler(enabled=params.use_fp16, init_scale=1.0)
|
||||
if checkpoints and "grad_scaler" in checkpoints:
|
||||
logging.info("Loading grad scaler state dict")
|
||||
scaler.load_state_dict(checkpoints["grad_scaler"])
|
||||
@ -1319,7 +1320,7 @@ def scan_pessimistic_batches_for_oom(
|
||||
for criterion, cuts in batches.items():
|
||||
batch = train_dl.dataset[cuts]
|
||||
try:
|
||||
with torch.cuda.amp.autocast(enabled=params.use_fp16):
|
||||
with torch_autocast(enabled=params.use_fp16):
|
||||
loss, _ = compute_loss(
|
||||
params=params,
|
||||
model=model,
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user