mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-09-10 17:44:20 +00:00
Merge branch 'k2-fsa:master' into tiny
This commit is contained in:
commit
5f23026b65
@ -15,5 +15,5 @@ mkdir -p data
|
||||
cd data
|
||||
[ ! -e fbank ] && ln -s ~/tmp/fbank-libri fbank
|
||||
cd ..
|
||||
./local/compute_fbank_librispeech.py
|
||||
./local/compute_fbank_librispeech.py --dataset 'test-clean test-other'
|
||||
ls -lh data/fbank/
|
||||
|
@ -25,7 +25,6 @@ repo=$(basename $repo_url)
|
||||
|
||||
log "Display test files"
|
||||
tree $repo/
|
||||
soxi $repo/test_wavs/*.wav
|
||||
ls -lh $repo/test_wavs/*.wav
|
||||
|
||||
pushd $repo/exp
|
||||
|
@ -18,7 +18,6 @@ repo=$(basename $repo_url)
|
||||
|
||||
log "Display test files"
|
||||
tree $repo/
|
||||
soxi $repo/test_wavs/*.wav
|
||||
ls -lh $repo/test_wavs/*.wav
|
||||
|
||||
pushd $repo/exp
|
||||
|
@ -20,7 +20,6 @@ abs_repo=$(realpath $repo)
|
||||
|
||||
log "Display test files"
|
||||
tree $repo/
|
||||
soxi $repo/test_wavs/*.wav
|
||||
ls -lh $repo/test_wavs/*.wav
|
||||
|
||||
pushd $repo/exp
|
||||
|
@ -19,7 +19,6 @@ repo=$(basename $repo_url)
|
||||
|
||||
log "Display test files"
|
||||
tree $repo/
|
||||
soxi $repo/test_wavs/*.wav
|
||||
ls -lh $repo/test_wavs/*.wav
|
||||
|
||||
for sym in 1 2 3; do
|
||||
|
@ -23,7 +23,6 @@ popd
|
||||
|
||||
log "Display test files"
|
||||
tree $repo/
|
||||
soxi $repo/test_wavs/*.wav
|
||||
ls -lh $repo/test_wavs/*.wav
|
||||
|
||||
pushd $repo/exp
|
||||
|
@ -22,7 +22,6 @@ popd
|
||||
|
||||
log "Display test files"
|
||||
tree $repo/
|
||||
soxi $repo/test_wavs/*.wav
|
||||
ls -lh $repo/test_wavs/*.wav
|
||||
|
||||
pushd $repo/exp
|
||||
|
@ -19,7 +19,6 @@ repo=$(basename $repo_url)
|
||||
|
||||
log "Display test files"
|
||||
tree $repo/
|
||||
soxi $repo/test_wavs/*.wav
|
||||
ls -lh $repo/test_wavs/*.wav
|
||||
|
||||
pushd $repo/exp
|
||||
|
@ -19,7 +19,6 @@ repo=$(basename $repo_url)
|
||||
|
||||
log "Display test files"
|
||||
tree $repo/
|
||||
soxi $repo/test_wavs/*.wav
|
||||
ls -lh $repo/test_wavs/*.wav
|
||||
|
||||
pushd $repo/exp
|
||||
|
@ -19,7 +19,6 @@ repo=$(basename $repo_url)
|
||||
|
||||
log "Display test files"
|
||||
tree $repo/
|
||||
soxi $repo/test_wavs/*.wav
|
||||
ls -lh $repo/test_wavs/*.wav
|
||||
|
||||
pushd $repo/exp
|
||||
|
@ -18,7 +18,6 @@ repo=$(basename $repo_url)
|
||||
|
||||
log "Display test files"
|
||||
tree $repo/
|
||||
soxi $repo/test_wavs/*.wav
|
||||
ls -lh $repo/test_wavs/*.wav
|
||||
|
||||
pushd $repo/exp
|
||||
|
@ -10,7 +10,7 @@ log() {
|
||||
|
||||
cd egs/librispeech/ASR
|
||||
|
||||
repo_url=https://huggingface.co/yfyeung/icefall-asr-librispeech-pruned_transducer_stateless7_ctc_bs-2022-12-14
|
||||
repo_url=https://huggingface.co/yfyeung/icefall-asr-librispeech-pruned_transducer_stateless7_ctc_bs-2023-01-29
|
||||
|
||||
log "Downloading pre-trained model from $repo_url"
|
||||
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
||||
@ -18,7 +18,6 @@ repo=$(basename $repo_url)
|
||||
|
||||
log "Display test files"
|
||||
tree $repo/
|
||||
soxi $repo/test_wavs/*.wav
|
||||
ls -lh $repo/test_wavs/*.wav
|
||||
|
||||
pushd $repo/exp
|
||||
|
@ -19,7 +19,6 @@ repo=$(basename $repo_url)
|
||||
|
||||
log "Display test files"
|
||||
tree $repo/
|
||||
soxi $repo/test_wavs/*.wav
|
||||
ls -lh $repo/test_wavs/*.wav
|
||||
|
||||
pushd $repo
|
||||
|
@ -19,7 +19,6 @@ repo=$(basename $repo_url)
|
||||
|
||||
log "Display test files"
|
||||
tree $repo/
|
||||
soxi $repo/test_wavs/*.wav
|
||||
ls -lh $repo/test_wavs/*.wav
|
||||
|
||||
pushd $repo/exp
|
||||
|
@ -19,7 +19,6 @@ repo=$(basename $repo_url)
|
||||
|
||||
log "Display test files"
|
||||
tree $repo/
|
||||
soxi $repo/test_wavs/*.wav
|
||||
ls -lh $repo/test_wavs/*.wav
|
||||
|
||||
pushd $repo/exp
|
||||
|
@ -19,7 +19,6 @@ repo=$(basename $repo_url)
|
||||
|
||||
log "Display test files"
|
||||
tree $repo/
|
||||
soxi $repo/test_wavs/*.wav
|
||||
ls -lh $repo/test_wavs/*.wav
|
||||
|
||||
for sym in 1 2 3; do
|
||||
|
@ -18,7 +18,6 @@ repo=$(basename $repo_url)
|
||||
|
||||
log "Display test files"
|
||||
tree $repo/
|
||||
soxi $repo/test_wavs/*.wav
|
||||
ls -lh $repo/test_wavs/*.wav
|
||||
|
||||
pushd $repo/exp
|
||||
|
@ -19,7 +19,6 @@ repo=$(basename $repo_url)
|
||||
|
||||
log "Display test files"
|
||||
tree $repo/
|
||||
soxi $repo/test_wavs/*.flac
|
||||
ls -lh $repo/test_wavs/*.flac
|
||||
|
||||
log "CTC decoding"
|
||||
|
@ -19,7 +19,6 @@ repo=$(basename $repo_url)
|
||||
|
||||
log "Display test files"
|
||||
tree $repo/
|
||||
soxi $repo/test_wavs/*.wav
|
||||
ls -lh $repo/test_wavs/*.wav
|
||||
|
||||
for sym in 1 2 3; do
|
||||
|
@ -19,7 +19,6 @@ repo=$(basename $repo_url)
|
||||
|
||||
log "Display test files"
|
||||
tree $repo/
|
||||
soxi $repo/test_wavs/*.wav
|
||||
ls -lh $repo/test_wavs/*.wav
|
||||
|
||||
for sym in 1 2 3; do
|
||||
|
@ -19,7 +19,6 @@ repo=$(basename $repo_url)
|
||||
|
||||
log "Display test files"
|
||||
tree $repo/
|
||||
soxi $repo/test_wavs/*.wav
|
||||
ls -lh $repo/test_wavs/*.wav
|
||||
|
||||
for sym in 1 2 3; do
|
||||
|
@ -19,7 +19,6 @@ repo=$(basename $repo_url)
|
||||
|
||||
log "Display test files"
|
||||
tree $repo/
|
||||
soxi $repo/test_wavs/*.wav
|
||||
ls -lh $repo/test_wavs/*.wav
|
||||
|
||||
for sym in 1 2 3; do
|
||||
|
@ -19,7 +19,6 @@ repo=$(basename $repo_url)
|
||||
|
||||
log "Display test files"
|
||||
tree $repo/
|
||||
soxi $repo/test_wavs/*.wav
|
||||
ls -lh $repo/test_wavs/*.wav
|
||||
|
||||
for sym in 1 2 3; do
|
||||
|
@ -19,7 +19,6 @@ repo=$(basename $repo_url)
|
||||
|
||||
log "Display test files"
|
||||
tree $repo/
|
||||
soxi $repo/test_wavs/*.wav
|
||||
ls -lh $repo/test_wavs/*.wav
|
||||
|
||||
log "Beam search decoding"
|
||||
|
@ -20,7 +20,6 @@ repo=$(basename $repo_url)
|
||||
|
||||
log "Display test files"
|
||||
tree $repo/
|
||||
soxi $repo/test_wavs/*.wav
|
||||
ls -lh $repo/test_wavs/*.wav
|
||||
|
||||
pushd $repo/exp
|
||||
|
67
.github/scripts/test-ncnn-export.sh
vendored
67
.github/scripts/test-ncnn-export.sh
vendored
@ -232,70 +232,3 @@ python3 ./pruned_transducer_stateless7_streaming/streaming-ncnn-decode.py \
|
||||
|
||||
rm -rf $repo
|
||||
log "--------------------------------------------------------------------------"
|
||||
|
||||
# Go back to the root directory of icefall repo
|
||||
popd
|
||||
|
||||
pushd egs/csj/ASR
|
||||
|
||||
log "=========================================================================="
|
||||
repo_url=https://huggingface.co/TeoWenShen/icefall-asr-csj-pruned-transducer-stateless7-streaming-230208
|
||||
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
||||
repo=$(basename $repo_url)
|
||||
|
||||
pushd $repo
|
||||
git lfs pull --include "exp_fluent/pretrained.pt"
|
||||
git lfs pull --include "exp_disfluent/pretrained.pt"
|
||||
|
||||
cd exp_fluent
|
||||
ln -s pretrained.pt epoch-99.pt
|
||||
|
||||
cd ../exp_disfluent
|
||||
ln -s pretrained.pt epoch-99.pt
|
||||
|
||||
cd ../test_wavs
|
||||
git lfs pull --include "*.wav"
|
||||
popd
|
||||
|
||||
log "Export via torch.jit.trace()"
|
||||
|
||||
for exp in exp_fluent exp_disfluent; do
|
||||
./pruned_transducer_stateless7_streaming/export-for-ncnn.py \
|
||||
--exp-dir $repo/$exp/ \
|
||||
--lang $repo/data/lang_char \
|
||||
--epoch 99 \
|
||||
--avg 1 \
|
||||
--use-averaged-model 0 \
|
||||
\
|
||||
--decode-chunk-len 32 \
|
||||
--num-left-chunks 4 \
|
||||
--num-encoder-layers "2,4,3,2,4" \
|
||||
--feedforward-dims "1024,1024,2048,2048,1024" \
|
||||
--nhead "8,8,8,8,8" \
|
||||
--encoder-dims "384,384,384,384,384" \
|
||||
--attention-dims "192,192,192,192,192" \
|
||||
--encoder-unmasked-dims "256,256,256,256,256" \
|
||||
--zipformer-downsampling-factors "1,2,4,8,2" \
|
||||
--cnn-module-kernels "31,31,31,31,31" \
|
||||
--decoder-dim 512 \
|
||||
--joiner-dim 512
|
||||
|
||||
pnnx $repo/$exp/encoder_jit_trace-pnnx.pt
|
||||
pnnx $repo/$exp/decoder_jit_trace-pnnx.pt
|
||||
pnnx $repo/$exp/joiner_jit_trace-pnnx.pt
|
||||
|
||||
for wav in aps-smp.wav interview_aps-smp.wav reproduction-smp.wav sps-smp.wav; do
|
||||
python3 ./pruned_transducer_stateless7_streaming/streaming-ncnn-decode.py \
|
||||
--tokens $repo/data/lang_char/tokens.txt \
|
||||
--encoder-param-filename $repo/$exp/encoder_jit_trace-pnnx.ncnn.param \
|
||||
--encoder-bin-filename $repo/$exp/encoder_jit_trace-pnnx.ncnn.bin \
|
||||
--decoder-param-filename $repo/$exp/decoder_jit_trace-pnnx.ncnn.param \
|
||||
--decoder-bin-filename $repo/$exp/decoder_jit_trace-pnnx.ncnn.bin \
|
||||
--joiner-param-filename $repo/$exp/joiner_jit_trace-pnnx.ncnn.param \
|
||||
--joiner-bin-filename $repo/$exp/joiner_jit_trace-pnnx.ncnn.bin \
|
||||
$repo/test_wavs/$wav
|
||||
done
|
||||
done
|
||||
|
||||
rm -rf $repo
|
||||
log "--------------------------------------------------------------------------"
|
||||
|
4
.github/workflows/run-aishell-2022-06-20.yml
vendored
4
.github/workflows/run-aishell-2022-06-20.yml
vendored
@ -65,7 +65,7 @@ jobs:
|
||||
run: |
|
||||
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
|
||||
pip uninstall -y protobuf
|
||||
pip install --no-binary protobuf protobuf
|
||||
pip install --no-binary protobuf protobuf==3.20.*
|
||||
|
||||
- name: Cache kaldifeat
|
||||
id: my-cache
|
||||
@ -87,7 +87,7 @@ jobs:
|
||||
GITHUB_EVENT_NAME: ${{ github.event_name }}
|
||||
GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }}
|
||||
run: |
|
||||
sudo apt-get -qq install git-lfs tree sox
|
||||
sudo apt-get -qq install git-lfs tree
|
||||
export PYTHONPATH=$PWD:$PYTHONPATH
|
||||
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
|
||||
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
|
||||
|
@ -64,7 +64,7 @@ jobs:
|
||||
run: |
|
||||
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
|
||||
pip uninstall -y protobuf
|
||||
pip install --no-binary protobuf protobuf
|
||||
pip install --no-binary protobuf protobuf==3.20.*
|
||||
|
||||
- name: Cache kaldifeat
|
||||
id: my-cache
|
||||
|
@ -64,7 +64,7 @@ jobs:
|
||||
run: |
|
||||
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
|
||||
pip uninstall -y protobuf
|
||||
pip install --no-binary protobuf protobuf
|
||||
pip install --no-binary protobuf protobuf==3.20.*
|
||||
|
||||
- name: Cache kaldifeat
|
||||
id: my-cache
|
||||
@ -123,7 +123,7 @@ jobs:
|
||||
ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
|
||||
ls -lh egs/librispeech/ASR/data/*
|
||||
|
||||
sudo apt-get -qq install git-lfs tree sox
|
||||
sudo apt-get -qq install git-lfs tree
|
||||
export PYTHONPATH=$PWD:$PYTHONPATH
|
||||
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
|
||||
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
|
||||
|
@ -64,7 +64,7 @@ jobs:
|
||||
run: |
|
||||
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
|
||||
pip uninstall -y protobuf
|
||||
pip install --no-binary protobuf protobuf
|
||||
pip install --no-binary protobuf protobuf==3.20.*
|
||||
|
||||
- name: Cache kaldifeat
|
||||
id: my-cache
|
||||
@ -123,7 +123,7 @@ jobs:
|
||||
ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
|
||||
ls -lh egs/librispeech/ASR/data/*
|
||||
|
||||
sudo apt-get -qq install git-lfs tree sox
|
||||
sudo apt-get -qq install git-lfs tree
|
||||
export PYTHONPATH=$PWD:$PYTHONPATH
|
||||
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
|
||||
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
|
||||
|
@ -64,7 +64,7 @@ jobs:
|
||||
run: |
|
||||
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
|
||||
pip uninstall -y protobuf
|
||||
pip install --no-binary protobuf protobuf
|
||||
pip install --no-binary protobuf protobuf==3.20.*
|
||||
|
||||
- name: Cache kaldifeat
|
||||
id: my-cache
|
||||
@ -123,7 +123,7 @@ jobs:
|
||||
ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
|
||||
ls -lh egs/librispeech/ASR/data/*
|
||||
|
||||
sudo apt-get -qq install git-lfs tree sox
|
||||
sudo apt-get -qq install git-lfs tree
|
||||
export PYTHONPATH=$PWD:$PYTHONPATH
|
||||
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
|
||||
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
|
||||
|
@ -64,7 +64,7 @@ jobs:
|
||||
run: |
|
||||
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
|
||||
pip uninstall -y protobuf
|
||||
pip install --no-binary protobuf protobuf
|
||||
pip install --no-binary protobuf protobuf==3.20.*
|
||||
|
||||
- name: Cache kaldifeat
|
||||
id: my-cache
|
||||
@ -123,7 +123,7 @@ jobs:
|
||||
ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
|
||||
ls -lh egs/librispeech/ASR/data/*
|
||||
|
||||
sudo apt-get -qq install git-lfs tree sox
|
||||
sudo apt-get -qq install git-lfs tree
|
||||
export PYTHONPATH=$PWD:$PYTHONPATH
|
||||
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
|
||||
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
|
||||
|
@ -64,7 +64,7 @@ jobs:
|
||||
run: |
|
||||
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
|
||||
pip uninstall -y protobuf
|
||||
pip install --no-binary protobuf protobuf
|
||||
pip install --no-binary protobuf protobuf==3.20.*
|
||||
|
||||
- name: Cache kaldifeat
|
||||
id: my-cache
|
||||
@ -123,7 +123,7 @@ jobs:
|
||||
ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
|
||||
ls -lh egs/librispeech/ASR/data/*
|
||||
|
||||
sudo apt-get -qq install git-lfs tree sox
|
||||
sudo apt-get -qq install git-lfs tree
|
||||
export PYTHONPATH=$PWD:$PYTHONPATH
|
||||
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
|
||||
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
|
||||
|
@ -60,7 +60,7 @@ jobs:
|
||||
run: |
|
||||
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
|
||||
pip uninstall -y protobuf
|
||||
pip install --no-binary protobuf protobuf
|
||||
pip install --no-binary protobuf protobuf==3.20.*
|
||||
|
||||
- name: Cache kaldifeat
|
||||
id: my-cache
|
||||
@ -119,7 +119,7 @@ jobs:
|
||||
ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
|
||||
ls -lh egs/librispeech/ASR/data/*
|
||||
|
||||
sudo apt-get -qq install git-lfs tree sox
|
||||
sudo apt-get -qq install git-lfs tree
|
||||
export PYTHONPATH=$PWD:$PYTHONPATH
|
||||
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
|
||||
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
|
||||
|
@ -64,7 +64,7 @@ jobs:
|
||||
run: |
|
||||
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
|
||||
pip uninstall -y protobuf
|
||||
pip install --no-binary protobuf protobuf
|
||||
pip install --no-binary protobuf protobuf==3.20.*
|
||||
|
||||
- name: Cache kaldifeat
|
||||
id: my-cache
|
||||
@ -123,7 +123,7 @@ jobs:
|
||||
ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
|
||||
ls -lh egs/librispeech/ASR/data/*
|
||||
|
||||
sudo apt-get -qq install git-lfs tree sox
|
||||
sudo apt-get -qq install git-lfs tree
|
||||
export PYTHONPATH=$PWD:$PYTHONPATH
|
||||
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
|
||||
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
|
||||
|
@ -35,7 +35,7 @@ on:
|
||||
|
||||
jobs:
|
||||
run_librispeech_2022_12_15_zipformer_ctc_bs:
|
||||
if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event.label.name == 'blank-skip' || github.event_name == 'push' || github.event_name == 'schedule'
|
||||
if: github.event.label.name == 'run-decode' || github.event.label.name == 'blank-skip' || github.event_name == 'push' || github.event_name == 'schedule'
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
matrix:
|
||||
@ -60,7 +60,7 @@ jobs:
|
||||
run: |
|
||||
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
|
||||
pip uninstall -y protobuf
|
||||
pip install --no-binary protobuf protobuf
|
||||
pip install --no-binary protobuf protobuf==3.20.*
|
||||
|
||||
- name: Cache kaldifeat
|
||||
id: my-cache
|
||||
@ -119,7 +119,7 @@ jobs:
|
||||
ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
|
||||
ls -lh egs/librispeech/ASR/data/*
|
||||
|
||||
sudo apt-get -qq install git-lfs tree sox
|
||||
sudo apt-get -qq install git-lfs tree
|
||||
export PYTHONPATH=$PWD:$PYTHONPATH
|
||||
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
|
||||
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
|
||||
|
@ -64,7 +64,7 @@ jobs:
|
||||
run: |
|
||||
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
|
||||
pip uninstall -y protobuf
|
||||
pip install --no-binary protobuf protobuf
|
||||
pip install --no-binary protobuf protobuf==3.20.*
|
||||
|
||||
- name: Cache kaldifeat
|
||||
id: my-cache
|
||||
@ -123,7 +123,7 @@ jobs:
|
||||
ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
|
||||
ls -lh egs/librispeech/ASR/data/*
|
||||
|
||||
sudo apt-get -qq install git-lfs tree sox
|
||||
sudo apt-get -qq install git-lfs tree
|
||||
export PYTHONPATH=$PWD:$PYTHONPATH
|
||||
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
|
||||
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
|
||||
|
@ -64,7 +64,7 @@ jobs:
|
||||
run: |
|
||||
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
|
||||
pip uninstall -y protobuf
|
||||
pip install --no-binary protobuf protobuf
|
||||
pip install --no-binary protobuf protobuf==3.20.*
|
||||
|
||||
- name: Cache kaldifeat
|
||||
id: my-cache
|
||||
@ -123,7 +123,7 @@ jobs:
|
||||
ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
|
||||
ls -lh egs/librispeech/ASR/data/*
|
||||
|
||||
sudo apt-get -qq install git-lfs tree sox
|
||||
sudo apt-get -qq install git-lfs tree
|
||||
export PYTHONPATH=$PWD:$PYTHONPATH
|
||||
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
|
||||
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
|
||||
|
@ -47,7 +47,7 @@ jobs:
|
||||
run: |
|
||||
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
|
||||
pip uninstall -y protobuf
|
||||
pip install --no-binary protobuf protobuf
|
||||
pip install --no-binary protobuf protobuf==3.20.*
|
||||
|
||||
- name: Cache kaldifeat
|
||||
id: my-cache
|
||||
@ -106,7 +106,7 @@ jobs:
|
||||
ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
|
||||
ls -lh egs/librispeech/ASR/data/*
|
||||
|
||||
sudo apt-get -qq install git-lfs tree sox
|
||||
sudo apt-get -qq install git-lfs tree
|
||||
export PYTHONPATH=$PWD:$PYTHONPATH
|
||||
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
|
||||
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
|
||||
|
@ -64,7 +64,7 @@ jobs:
|
||||
run: |
|
||||
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
|
||||
pip uninstall -y protobuf
|
||||
pip install --no-binary protobuf protobuf
|
||||
pip install --no-binary protobuf protobuf==3.20.*
|
||||
|
||||
- name: Cache kaldifeat
|
||||
id: my-cache
|
||||
@ -123,7 +123,7 @@ jobs:
|
||||
ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
|
||||
ls -lh egs/librispeech/ASR/data/*
|
||||
|
||||
sudo apt-get -qq install git-lfs tree sox
|
||||
sudo apt-get -qq install git-lfs tree
|
||||
export PYTHONPATH=$PWD:$PYTHONPATH
|
||||
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
|
||||
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
|
||||
|
@ -64,7 +64,7 @@ jobs:
|
||||
run: |
|
||||
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
|
||||
pip uninstall -y protobuf
|
||||
pip install --no-binary protobuf protobuf
|
||||
pip install --no-binary protobuf protobuf==3.20.*
|
||||
|
||||
- name: Cache kaldifeat
|
||||
id: my-cache
|
||||
@ -123,7 +123,7 @@ jobs:
|
||||
ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
|
||||
ls -lh egs/librispeech/ASR/data/*
|
||||
|
||||
sudo apt-get -qq install git-lfs tree sox
|
||||
sudo apt-get -qq install git-lfs tree
|
||||
export PYTHONPATH=$PWD:$PYTHONPATH
|
||||
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
|
||||
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
|
||||
|
@ -64,7 +64,7 @@ jobs:
|
||||
run: |
|
||||
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
|
||||
pip uninstall -y protobuf
|
||||
pip install --no-binary protobuf protobuf
|
||||
pip install --no-binary protobuf protobuf==3.20.*
|
||||
|
||||
- name: Cache kaldifeat
|
||||
id: my-cache
|
||||
@ -123,7 +123,7 @@ jobs:
|
||||
ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
|
||||
ls -lh egs/librispeech/ASR/data/*
|
||||
|
||||
sudo apt-get -qq install git-lfs tree sox
|
||||
sudo apt-get -qq install git-lfs tree
|
||||
export PYTHONPATH=$PWD:$PYTHONPATH
|
||||
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
|
||||
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
|
||||
|
@ -54,7 +54,7 @@ jobs:
|
||||
run: |
|
||||
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
|
||||
pip uninstall -y protobuf
|
||||
pip install --no-binary protobuf protobuf
|
||||
pip install --no-binary protobuf protobuf==3.20.*
|
||||
|
||||
- name: Cache kaldifeat
|
||||
id: my-cache
|
||||
@ -73,7 +73,7 @@ jobs:
|
||||
- name: Inference with pre-trained model
|
||||
shell: bash
|
||||
run: |
|
||||
sudo apt-get -qq install git-lfs tree sox
|
||||
sudo apt-get -qq install git-lfs tree
|
||||
export PYTHONPATH=$PWD:$PYTHONPATH
|
||||
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
|
||||
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
|
||||
|
@ -63,7 +63,7 @@ jobs:
|
||||
run: |
|
||||
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
|
||||
pip uninstall -y protobuf
|
||||
pip install --no-binary protobuf protobuf
|
||||
pip install --no-binary protobuf protobuf==3.20.*
|
||||
|
||||
- name: Cache kaldifeat
|
||||
id: my-cache
|
||||
@ -122,7 +122,7 @@ jobs:
|
||||
ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
|
||||
ls -lh egs/librispeech/ASR/data/*
|
||||
|
||||
sudo apt-get -qq install git-lfs tree sox
|
||||
sudo apt-get -qq install git-lfs tree
|
||||
export PYTHONPATH=$PWD:$PYTHONPATH
|
||||
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
|
||||
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
|
||||
|
@ -63,7 +63,7 @@ jobs:
|
||||
run: |
|
||||
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
|
||||
pip uninstall -y protobuf
|
||||
pip install --no-binary protobuf protobuf
|
||||
pip install --no-binary protobuf protobuf==3.20.*
|
||||
|
||||
- name: Cache kaldifeat
|
||||
id: my-cache
|
||||
@ -122,7 +122,7 @@ jobs:
|
||||
ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
|
||||
ls -lh egs/librispeech/ASR/data/*
|
||||
|
||||
sudo apt-get -qq install git-lfs tree sox
|
||||
sudo apt-get -qq install git-lfs tree
|
||||
export PYTHONPATH=$PWD:$PYTHONPATH
|
||||
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
|
||||
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
|
||||
|
@ -54,7 +54,7 @@ jobs:
|
||||
run: |
|
||||
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
|
||||
pip uninstall -y protobuf
|
||||
pip install --no-binary protobuf protobuf
|
||||
pip install --no-binary protobuf protobuf==3.20.*
|
||||
|
||||
- name: Cache kaldifeat
|
||||
id: my-cache
|
||||
@ -73,7 +73,7 @@ jobs:
|
||||
- name: Inference with pre-trained model
|
||||
shell: bash
|
||||
run: |
|
||||
sudo apt-get -qq install git-lfs tree sox
|
||||
sudo apt-get -qq install git-lfs tree
|
||||
export PYTHONPATH=$PWD:$PYTHONPATH
|
||||
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
|
||||
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
|
||||
|
@ -54,7 +54,7 @@ jobs:
|
||||
run: |
|
||||
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
|
||||
pip uninstall -y protobuf
|
||||
pip install --no-binary protobuf protobuf
|
||||
pip install --no-binary protobuf protobuf==3.20.*
|
||||
|
||||
- name: Cache kaldifeat
|
||||
id: my-cache
|
||||
@ -73,7 +73,7 @@ jobs:
|
||||
- name: Inference with pre-trained model
|
||||
shell: bash
|
||||
run: |
|
||||
sudo apt-get -qq install git-lfs tree sox
|
||||
sudo apt-get -qq install git-lfs tree
|
||||
export PYTHONPATH=$PWD:$PYTHONPATH
|
||||
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
|
||||
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
|
||||
|
@ -63,7 +63,7 @@ jobs:
|
||||
run: |
|
||||
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
|
||||
pip uninstall -y protobuf
|
||||
pip install --no-binary protobuf protobuf
|
||||
pip install --no-binary protobuf protobuf==3.20.*
|
||||
|
||||
- name: Cache kaldifeat
|
||||
id: my-cache
|
||||
@ -122,7 +122,7 @@ jobs:
|
||||
ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
|
||||
ls -lh egs/librispeech/ASR/data/*
|
||||
|
||||
sudo apt-get -qq install git-lfs tree sox
|
||||
sudo apt-get -qq install git-lfs tree
|
||||
export PYTHONPATH=$PWD:$PYTHONPATH
|
||||
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
|
||||
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
|
||||
|
@ -54,7 +54,7 @@ jobs:
|
||||
run: |
|
||||
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
|
||||
pip uninstall -y protobuf
|
||||
pip install --no-binary protobuf protobuf
|
||||
pip install --no-binary protobuf protobuf==3.20.*
|
||||
|
||||
- name: Cache kaldifeat
|
||||
id: my-cache
|
||||
@ -73,7 +73,7 @@ jobs:
|
||||
- name: Inference with pre-trained model
|
||||
shell: bash
|
||||
run: |
|
||||
sudo apt-get -qq install git-lfs tree sox
|
||||
sudo apt-get -qq install git-lfs tree
|
||||
export PYTHONPATH=$PWD:$PYTHONPATH
|
||||
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
|
||||
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
|
||||
|
2
.github/workflows/run-ptb-rnn-lm.yml
vendored
2
.github/workflows/run-ptb-rnn-lm.yml
vendored
@ -47,7 +47,7 @@ jobs:
|
||||
run: |
|
||||
grep -v '^#' ./requirements-ci.txt | grep -v kaldifst | xargs -n 1 -L 1 pip install
|
||||
pip uninstall -y protobuf
|
||||
pip install --no-binary protobuf protobuf
|
||||
pip install --no-binary protobuf protobuf==3.20.*
|
||||
|
||||
- name: Prepare data
|
||||
shell: bash
|
||||
|
@ -54,7 +54,7 @@ jobs:
|
||||
run: |
|
||||
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
|
||||
pip uninstall -y protobuf
|
||||
pip install --no-binary protobuf protobuf
|
||||
pip install --no-binary protobuf protobuf==3.20.*
|
||||
|
||||
- name: Cache kaldifeat
|
||||
id: my-cache
|
||||
@ -76,7 +76,7 @@ jobs:
|
||||
GITHUB_EVENT_NAME: ${{ github.event_name }}
|
||||
GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }}
|
||||
run: |
|
||||
sudo apt-get -qq install git-lfs tree sox
|
||||
sudo apt-get -qq install git-lfs tree
|
||||
export PYTHONPATH=$PWD:$PYTHONPATH
|
||||
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
|
||||
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
|
||||
|
2
.github/workflows/run-yesno-recipe.yml
vendored
2
.github/workflows/run-yesno-recipe.yml
vendored
@ -67,7 +67,7 @@ jobs:
|
||||
run: |
|
||||
grep -v '^#' ./requirements-ci.txt | grep -v kaldifst | xargs -n 1 -L 1 pip install
|
||||
pip uninstall -y protobuf
|
||||
pip install --no-binary protobuf protobuf
|
||||
pip install --no-binary protobuf protobuf==3.20.*
|
||||
|
||||
- name: Run yesno recipe
|
||||
shell: bash
|
||||
|
2
.github/workflows/test-ncnn-export.yml
vendored
2
.github/workflows/test-ncnn-export.yml
vendored
@ -46,7 +46,7 @@ jobs:
|
||||
run: |
|
||||
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
|
||||
pip uninstall -y protobuf
|
||||
pip install --no-binary protobuf protobuf
|
||||
pip install --no-binary protobuf protobuf==3.20.*
|
||||
|
||||
- name: Cache kaldifeat
|
||||
id: my-cache
|
||||
|
2
.github/workflows/test-onnx-export.yml
vendored
2
.github/workflows/test-onnx-export.yml
vendored
@ -46,7 +46,7 @@ jobs:
|
||||
run: |
|
||||
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
|
||||
pip uninstall -y protobuf
|
||||
pip install --no-binary protobuf protobuf
|
||||
pip install --no-binary protobuf protobuf==3.20.*
|
||||
|
||||
- name: Cache kaldifeat
|
||||
id: my-cache
|
||||
|
12
.github/workflows/test.yml
vendored
12
.github/workflows/test.yml
vendored
@ -56,7 +56,7 @@ jobs:
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -q -y libsndfile1-dev libsndfile1 ffmpeg
|
||||
sudo apt install -q -y --fix-missing sox libsox-dev libsox-fmt-all
|
||||
sudo apt install -q -y --fix-missing libsox-dev libsox-fmt-all
|
||||
|
||||
- name: Install Python dependencies
|
||||
run: |
|
||||
@ -70,7 +70,7 @@ jobs:
|
||||
pip install git+https://github.com/lhotse-speech/lhotse
|
||||
# icefall requirements
|
||||
pip uninstall -y protobuf
|
||||
pip install --no-binary protobuf protobuf
|
||||
pip install --no-binary protobuf protobuf==3.20.*
|
||||
|
||||
pip install kaldifst
|
||||
pip install onnxruntime
|
||||
@ -119,8 +119,8 @@ jobs:
|
||||
cd ../transducer_stateless
|
||||
pytest -v -s
|
||||
|
||||
cd ../transducer
|
||||
pytest -v -s
|
||||
# cd ../transducer
|
||||
# pytest -v -s
|
||||
|
||||
cd ../transducer_stateless2
|
||||
pytest -v -s
|
||||
@ -157,8 +157,8 @@ jobs:
|
||||
cd ../transducer_stateless
|
||||
pytest -v -s
|
||||
|
||||
cd ../transducer
|
||||
pytest -v -s
|
||||
# cd ../transducer
|
||||
# pytest -v -s
|
||||
|
||||
cd ../transducer_stateless2
|
||||
pytest -v -s
|
||||
|
@ -81,6 +81,7 @@ todo_include_todos = True
|
||||
|
||||
rst_epilog = """
|
||||
.. _sherpa-ncnn: https://github.com/k2-fsa/sherpa-ncnn
|
||||
.. _sherpa-onnx: https://github.com/k2-fsa/sherpa-onnx
|
||||
.. _icefall: https://github.com/k2-fsa/icefall
|
||||
.. _git-lfs: https://git-lfs.com/
|
||||
.. _ncnn: https://github.com/tencent/ncnn
|
||||
|
@ -0,0 +1,74 @@
|
||||
2023-02-27 20:23:07,473 INFO [export-for-ncnn.py:246] device: cpu
|
||||
2023-02-27 20:23:07,477 INFO [export-for-ncnn.py:255] {'best_train_loss': inf, 'best_valid_loss': inf, 'best_train_epoch': -1, 'best_valid_epoch': -1, 'batch_idx_train': 0, 'log_interval': 50, 'reset_interval': 200, 'valid_interval': 3000, 'feature_dim': 80, 'subsampling_factor': 4, 'warm_step': 2000, 'env_info': {'k2-version': '1.23.4', 'k2-build-type': 'Release', 'k2-with-cuda': True, 'k2-git-sha1': '62e404dd3f3a811d73e424199b3408e309c06e1a', 'k2-git-date': 'Mon Jan 30 10:26:16 2023', 'lhotse-version': '1.12.0.dev+missing.version.file', 'torch-version': '1.10.0+cu102', 'torch-cuda-available': True, 'torch-cuda-version': '10.2', 'python-version': '3.8', 'icefall-git-branch': 'master', 'icefall-git-sha1': '6d7a559-clean', 'icefall-git-date': 'Thu Feb 16 19:47:54 2023', 'icefall-path': '/star-fj/fangjun/open-source/icefall-2', 'k2-path': '/star-fj/fangjun/open-source/k2/k2/python/k2/__init__.py', 'lhotse-path': '/star-fj/fangjun/open-source/lhotse/lhotse/__init__.py', 'hostname': 'de-74279-k2-train-3-1220120619-7695ff496b-s9n4w', 'IP address': '10.177.6.147'}, 'epoch': 99, 'iter': 0, 'avg': 1, 'exp_dir': PosixPath('icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp'), 'bpe_model': './icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/data/lang_bpe_500/bpe.model', 'context_size': 2, 'use_averaged_model': False, 'num_encoder_layers': '2,4,3,2,4', 'feedforward_dims': '1024,1024,2048,2048,1024', 'nhead': '8,8,8,8,8', 'encoder_dims': '384,384,384,384,384', 'attention_dims': '192,192,192,192,192', 'encoder_unmasked_dims': '256,256,256,256,256', 'zipformer_downsampling_factors': '1,2,4,8,2', 'cnn_module_kernels': '31,31,31,31,31', 'decoder_dim': 512, 'joiner_dim': 512, 'short_chunk_size': 50, 'num_left_chunks': 4, 'decode_chunk_len': 32, 'blank_id': 0, 'vocab_size': 500}
|
||||
2023-02-27 20:23:07,477 INFO [export-for-ncnn.py:257] About to create model
|
||||
2023-02-27 20:23:08,023 INFO [zipformer2.py:419] At encoder stack 4, which has downsampling_factor=2, we will combine the outputs of layers 1 and 3, with downsampling_factors=2 and 8.
|
||||
2023-02-27 20:23:08,037 INFO [checkpoint.py:112] Loading checkpoint from icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/epoch-99.pt
|
||||
2023-02-27 20:23:08,655 INFO [export-for-ncnn.py:346] encoder parameters: 68944004
|
||||
2023-02-27 20:23:08,655 INFO [export-for-ncnn.py:347] decoder parameters: 260096
|
||||
2023-02-27 20:23:08,655 INFO [export-for-ncnn.py:348] joiner parameters: 716276
|
||||
2023-02-27 20:23:08,656 INFO [export-for-ncnn.py:349] total parameters: 69920376
|
||||
2023-02-27 20:23:08,656 INFO [export-for-ncnn.py:351] Using torch.jit.trace()
|
||||
2023-02-27 20:23:08,656 INFO [export-for-ncnn.py:353] Exporting encoder
|
||||
2023-02-27 20:23:08,656 INFO [export-for-ncnn.py:174] decode_chunk_len: 32
|
||||
2023-02-27 20:23:08,656 INFO [export-for-ncnn.py:175] T: 39
|
||||
/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:1344: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
|
||||
assert cached_len.size(0) == self.num_layers, (
|
||||
/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:1348: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
|
||||
assert cached_avg.size(0) == self.num_layers, (
|
||||
/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:1352: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
|
||||
assert cached_key.size(0) == self.num_layers, (
|
||||
/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:1356: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
|
||||
assert cached_val.size(0) == self.num_layers, (
|
||||
/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:1360: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
|
||||
assert cached_val2.size(0) == self.num_layers, (
|
||||
/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:1364: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
|
||||
assert cached_conv1.size(0) == self.num_layers, (
|
||||
/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:1368: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
|
||||
assert cached_conv2.size(0) == self.num_layers, (
|
||||
/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:1373: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
|
||||
assert self.left_context_len == cached_key.shape[1], (
|
||||
/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:1884: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
|
||||
assert self.x_size == x.size(0), (self.x_size, x.size(0))
|
||||
/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:2442: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
|
||||
assert cached_key.shape[0] == self.left_context_len, (
|
||||
/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:2449: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
|
||||
assert cached_key.shape[0] == cached_val.shape[0], (
|
||||
/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:2469: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
|
||||
assert cached_key.shape[0] == left_context_len, (
|
||||
/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:2473: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
|
||||
assert cached_val.shape[0] == left_context_len, (
|
||||
/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:2483: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
|
||||
assert kv_len == k.shape[0], (kv_len, k.shape)
|
||||
/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:2570: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
|
||||
assert list(attn_output.size()) == [bsz * num_heads, seq_len, head_dim // 2]
|
||||
/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:2926: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
|
||||
assert cache.shape == (x.size(0), x.size(1), self.lorder), (
|
||||
/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:2652: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
|
||||
assert x.shape[0] == self.x_size, (x.shape[0], self.x_size)
|
||||
/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:2653: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
|
||||
assert x.shape[2] == self.embed_dim, (x.shape[2], self.embed_dim)
|
||||
/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:2666: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
|
||||
assert cached_val.shape[0] == self.left_context_len, (
|
||||
/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:1543: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
|
||||
assert src.shape[0] == self.in_x_size, (src.shape[0], self.in_x_size)
|
||||
/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:1637: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
|
||||
assert src.shape[0] == self.in_x_size, (
|
||||
/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:1643: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
|
||||
assert src.shape[2] == self.in_channels, (src.shape[2], self.in_channels)
|
||||
/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:1571: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
|
||||
if src.shape[0] != self.in_x_size:
|
||||
/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:1763: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
|
||||
assert src1.shape[:-1] == src2.shape[:-1], (src1.shape, src2.shape)
|
||||
/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:1779: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
|
||||
assert src1.shape[-1] == self.dim1, (src1.shape[-1], self.dim1)
|
||||
/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:1780: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
|
||||
assert src2.shape[-1] == self.dim2, (src2.shape[-1], self.dim2)
|
||||
/star-fj/fangjun/py38/lib/python3.8/site-packages/torch/jit/_trace.py:958: TracerWarning: Encountering a list at the output of the tracer might cause the trace to be incorrect, this is only valid if the container structure does not change based on the module's inputs. Consider using a constant container instead (e.g. for `list`, use a `tuple` instead. for `dict`, use a `NamedTuple` instead). If you absolutely need this and know the side effects, pass strict=False to trace() to allow this behavior.
|
||||
module._c._create_method_from_trace(
|
||||
2023-02-27 20:23:19,640 INFO [export-for-ncnn.py:182] Saved to icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/encoder_jit_trace-pnnx.pt
|
||||
2023-02-27 20:23:19,646 INFO [export-for-ncnn.py:357] Exporting decoder
|
||||
/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/decoder.py:102: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
|
||||
assert embedding_out.size(-1) == self.context_size
|
||||
2023-02-27 20:23:19,686 INFO [export-for-ncnn.py:204] Saved to icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/decoder_jit_trace-pnnx.pt
|
||||
2023-02-27 20:23:19,686 INFO [export-for-ncnn.py:361] Exporting joiner
|
||||
2023-02-27 20:23:19,735 INFO [export-for-ncnn.py:231] Saved to icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/joiner_jit_trace-pnnx.pt
|
@ -0,0 +1,7 @@
|
||||
2023-02-27 20:43:40,283 INFO [streaming-ncnn-decode.py:349] {'tokens': './icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/data/lang_bpe_500/tokens.txt', 'encoder_param_filename': './icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/encoder_jit_trace-pnnx.ncnn.param', 'encoder_bin_filename': './icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/encoder_jit_trace-pnnx.ncnn.bin', 'decoder_param_filename': './icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/decoder_jit_trace-pnnx.ncnn.param', 'decoder_bin_filename': './icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/decoder_jit_trace-pnnx.ncnn.bin', 'joiner_param_filename': './icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/joiner_jit_trace-pnnx.ncnn.param', 'joiner_bin_filename': './icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/joiner_jit_trace-pnnx.ncnn.bin', 'sound_filename': './icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/test_wavs/1089-134686-0001.wav'}
|
||||
2023-02-27 20:43:41,260 INFO [streaming-ncnn-decode.py:357] Constructing Fbank computer
|
||||
2023-02-27 20:43:41,264 INFO [streaming-ncnn-decode.py:360] Reading sound files: ./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/test_wavs/1089-134686-0001.wav
|
||||
2023-02-27 20:43:41,269 INFO [streaming-ncnn-decode.py:365] torch.Size([106000])
|
||||
2023-02-27 20:43:41,280 INFO [streaming-ncnn-decode.py:372] number of states: 35
|
||||
2023-02-27 20:43:45,026 INFO [streaming-ncnn-decode.py:410] ./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/test_wavs/1089-134686-0001.wav
|
||||
2023-02-27 20:43:45,026 INFO [streaming-ncnn-decode.py:411] AFTER EARLY NIGHTFALL THE YELLOW LAMPS WOULD LIGHT UP HERE AND THERE THE SQUALID QUARTER OF THE BROTHELS
|
@ -166,6 +166,10 @@ Next, we use the following code to export our model:
|
||||
--memory-size 32 \
|
||||
--encoder-dim 512
|
||||
|
||||
.. caution::
|
||||
|
||||
If your model has different configuration parameters, please change them accordingly.
|
||||
|
||||
.. hint::
|
||||
|
||||
We have renamed our model to ``epoch-30.pt`` so that we can use ``--epoch 30``.
|
||||
|
383
docs/source/model-export/export-ncnn-zipformer.rst
Normal file
383
docs/source/model-export/export-ncnn-zipformer.rst
Normal file
@ -0,0 +1,383 @@
|
||||
.. _export_streaming_zipformer_transducer_models_to_ncnn:
|
||||
|
||||
Export streaming Zipformer transducer models to ncnn
|
||||
----------------------------------------------------
|
||||
|
||||
We use the pre-trained model from the following repository as an example:
|
||||
|
||||
`<https://huggingface.co/Zengwei/icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29>`_
|
||||
|
||||
We will show you step by step how to export it to `ncnn`_ and run it with `sherpa-ncnn`_.
|
||||
|
||||
.. hint::
|
||||
|
||||
We use ``Ubuntu 18.04``, ``torch 1.13``, and ``Python 3.8`` for testing.
|
||||
|
||||
.. caution::
|
||||
|
||||
Please use a more recent version of PyTorch. For instance, ``torch 1.8``
|
||||
may ``not`` work.
|
||||
|
||||
1. Download the pre-trained model
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. hint::
|
||||
|
||||
You have to install `git-lfs`_ before you continue.
|
||||
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
cd egs/librispeech/ASR
|
||||
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/Zengwei/icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29
|
||||
cd icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29
|
||||
|
||||
git lfs pull --include "exp/pretrained.pt"
|
||||
git lfs pull --include "data/lang_bpe_500/bpe.model"
|
||||
|
||||
cd ..
|
||||
|
||||
.. note::
|
||||
|
||||
We downloaded ``exp/pretrained-xxx.pt``, not ``exp/cpu-jit_xxx.pt``.
|
||||
|
||||
In the above code, we downloaded the pre-trained model into the directory
|
||||
``egs/librispeech/ASR/icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29``.
|
||||
|
||||
2. Install ncnn and pnnx
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Please refer to :ref:`export_for_ncnn_install_ncnn_and_pnnx` .
|
||||
|
||||
|
||||
3. Export the model via torch.jit.trace()
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
First, let us rename our pre-trained model:
|
||||
|
||||
.. code-block::
|
||||
|
||||
cd egs/librispeech/ASR
|
||||
|
||||
cd icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp
|
||||
|
||||
ln -s pretrained.pt epoch-99.pt
|
||||
|
||||
cd ../..
|
||||
|
||||
Next, we use the following code to export our model:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
dir=./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29
|
||||
|
||||
./pruned_transducer_stateless7_streaming/export-for-ncnn.py \
|
||||
--bpe-model $dir/data/lang_bpe_500/bpe.model \
|
||||
--exp-dir $dir/exp \
|
||||
--use-averaged-model 0 \
|
||||
--epoch 99 \
|
||||
--avg 1 \
|
||||
\
|
||||
--decode-chunk-len 32 \
|
||||
--num-left-chunks 4 \
|
||||
--num-encoder-layers "2,4,3,2,4" \
|
||||
--feedforward-dims "1024,1024,2048,2048,1024" \
|
||||
--nhead "8,8,8,8,8" \
|
||||
--encoder-dims "384,384,384,384,384" \
|
||||
--attention-dims "192,192,192,192,192" \
|
||||
--encoder-unmasked-dims "256,256,256,256,256" \
|
||||
--zipformer-downsampling-factors "1,2,4,8,2" \
|
||||
--cnn-module-kernels "31,31,31,31,31" \
|
||||
--decoder-dim 512 \
|
||||
--joiner-dim 512
|
||||
|
||||
.. caution::
|
||||
|
||||
If your model has different configuration parameters, please change them accordingly.
|
||||
|
||||
.. hint::
|
||||
|
||||
We have renamed our model to ``epoch-99.pt`` so that we can use ``--epoch 99``.
|
||||
There is only one pre-trained model, so we use ``--avg 1 --use-averaged-model 0``.
|
||||
|
||||
If you have trained a model by yourself and if you have all checkpoints
|
||||
available, please first use ``decode.py`` to tune ``--epoch --avg``
|
||||
and select the best combination with with ``--use-averaged-model 1``.
|
||||
|
||||
.. note::
|
||||
|
||||
You will see the following log output:
|
||||
|
||||
.. literalinclude:: ./code/export-zipformer-transducer-for-ncnn-output.txt
|
||||
|
||||
The log shows the model has ``69920376`` parameters, i.e., ``~69.9 M``.
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
ls -lh icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/pretrained.pt
|
||||
-rw-r--r-- 1 kuangfangjun root 269M Jan 12 12:53 icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/pretrained.pt
|
||||
|
||||
You can see that the file size of the pre-trained model is ``269 MB``, which
|
||||
is roughly equal to ``69920376*4/1024/1024 = 266.725 MB``.
|
||||
|
||||
After running ``pruned_transducer_stateless7_streaming/export-for-ncnn.py``,
|
||||
we will get the following files:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
ls -lh icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/*pnnx.pt
|
||||
|
||||
-rw-r--r-- 1 kuangfangjun root 1022K Feb 27 20:23 icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/decoder_jit_trace-pnnx.pt
|
||||
-rw-r--r-- 1 kuangfangjun root 266M Feb 27 20:23 icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/encoder_jit_trace-pnnx.pt
|
||||
-rw-r--r-- 1 kuangfangjun root 2.8M Feb 27 20:23 icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/joiner_jit_trace-pnnx.pt
|
||||
|
||||
.. _zipformer-transducer-step-4-export-torchscript-model-via-pnnx:
|
||||
|
||||
4. Export torchscript model via pnnx
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. hint::
|
||||
|
||||
Make sure you have set up the ``PATH`` environment variable
|
||||
in :ref:`export_for_ncnn_install_ncnn_and_pnnx`. Otherwise,
|
||||
it will throw an error saying that ``pnnx`` could not be found.
|
||||
|
||||
Now, it's time to export our models to `ncnn`_ via ``pnnx``.
|
||||
|
||||
.. code-block::
|
||||
|
||||
cd icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/
|
||||
|
||||
pnnx ./encoder_jit_trace-pnnx.pt
|
||||
pnnx ./decoder_jit_trace-pnnx.pt
|
||||
pnnx ./joiner_jit_trace-pnnx.pt
|
||||
|
||||
It will generate the following files:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
ls -lh icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/*ncnn*{bin,param}
|
||||
|
||||
-rw-r--r-- 1 kuangfangjun root 509K Feb 27 20:31 icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/decoder_jit_trace-pnnx.ncnn.bin
|
||||
-rw-r--r-- 1 kuangfangjun root 437 Feb 27 20:31 icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/decoder_jit_trace-pnnx.ncnn.param
|
||||
-rw-r--r-- 1 kuangfangjun root 133M Feb 27 20:30 icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/encoder_jit_trace-pnnx.ncnn.bin
|
||||
-rw-r--r-- 1 kuangfangjun root 152K Feb 27 20:30 icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/encoder_jit_trace-pnnx.ncnn.param
|
||||
-rw-r--r-- 1 kuangfangjun root 1.4M Feb 27 20:31 icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/joiner_jit_trace-pnnx.ncnn.bin
|
||||
-rw-r--r-- 1 kuangfangjun root 488 Feb 27 20:31 icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/joiner_jit_trace-pnnx.ncnn.param
|
||||
|
||||
There are two types of files:
|
||||
|
||||
- ``param``: It is a text file containing the model architectures. You can
|
||||
use a text editor to view its content.
|
||||
- ``bin``: It is a binary file containing the model parameters.
|
||||
|
||||
We compare the file sizes of the models below before and after converting via ``pnnx``:
|
||||
|
||||
.. see https://tableconvert.com/restructuredtext-generator
|
||||
|
||||
+----------------------------------+------------+
|
||||
| File name | File size |
|
||||
+==================================+============+
|
||||
| encoder_jit_trace-pnnx.pt | 266 MB |
|
||||
+----------------------------------+------------+
|
||||
| decoder_jit_trace-pnnx.pt | 1022 KB |
|
||||
+----------------------------------+------------+
|
||||
| joiner_jit_trace-pnnx.pt | 2.8 MB |
|
||||
+----------------------------------+------------+
|
||||
| encoder_jit_trace-pnnx.ncnn.bin | 133 MB |
|
||||
+----------------------------------+------------+
|
||||
| decoder_jit_trace-pnnx.ncnn.bin | 509 KB |
|
||||
+----------------------------------+------------+
|
||||
| joiner_jit_trace-pnnx.ncnn.bin | 1.4 MB |
|
||||
+----------------------------------+------------+
|
||||
|
||||
You can see that the file sizes of the models after conversion are about one half
|
||||
of the models before conversion:
|
||||
|
||||
- encoder: 266 MB vs 133 MB
|
||||
- decoder: 1022 KB vs 509 KB
|
||||
- joiner: 2.8 MB vs 1.4 MB
|
||||
|
||||
The reason is that by default ``pnnx`` converts ``float32`` parameters
|
||||
to ``float16``. A ``float32`` parameter occupies 4 bytes, while it is 2 bytes
|
||||
for ``float16``. Thus, it is ``twice smaller`` after conversion.
|
||||
|
||||
.. hint::
|
||||
|
||||
If you use ``pnnx ./encoder_jit_trace-pnnx.pt fp16=0``, then ``pnnx``
|
||||
won't convert ``float32`` to ``float16``.
|
||||
|
||||
5. Test the exported models in icefall
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. note::
|
||||
|
||||
We assume you have set up the environment variable ``PYTHONPATH`` when
|
||||
building `ncnn`_.
|
||||
|
||||
Now we have successfully converted our pre-trained model to `ncnn`_ format.
|
||||
The generated 6 files are what we need. You can use the following code to
|
||||
test the converted models:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
python3 ./pruned_transducer_stateless7_streaming/streaming-ncnn-decode.py \
|
||||
--tokens ./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/data/lang_bpe_500/tokens.txt \
|
||||
--encoder-param-filename ./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/encoder_jit_trace-pnnx.ncnn.param \
|
||||
--encoder-bin-filename ./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/encoder_jit_trace-pnnx.ncnn.bin \
|
||||
--decoder-param-filename ./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/decoder_jit_trace-pnnx.ncnn.param \
|
||||
--decoder-bin-filename ./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/decoder_jit_trace-pnnx.ncnn.bin \
|
||||
--joiner-param-filename ./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/joiner_jit_trace-pnnx.ncnn.param \
|
||||
--joiner-bin-filename ./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/joiner_jit_trace-pnnx.ncnn.bin \
|
||||
./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/test_wavs/1089-134686-0001.wav
|
||||
|
||||
.. hint::
|
||||
|
||||
`ncnn`_ supports only ``batch size == 1``, so ``streaming-ncnn-decode.py`` accepts
|
||||
only 1 wave file as input.
|
||||
|
||||
The output is given below:
|
||||
|
||||
.. literalinclude:: ./code/test-streaming-ncnn-decode-zipformer-transducer-libri.txt
|
||||
|
||||
Congratulations! You have successfully exported a model from PyTorch to `ncnn`_!
|
||||
|
||||
.. _zipformer-modify-the-exported-encoder-for-sherpa-ncnn:
|
||||
|
||||
6. Modify the exported encoder for sherpa-ncnn
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
In order to use the exported models in `sherpa-ncnn`_, we have to modify
|
||||
``encoder_jit_trace-pnnx.ncnn.param``.
|
||||
|
||||
Let us have a look at the first few lines of ``encoder_jit_trace-pnnx.ncnn.param``:
|
||||
|
||||
.. code-block::
|
||||
|
||||
7767517
|
||||
2028 2547
|
||||
Input in0 0 1 in0
|
||||
|
||||
**Explanation** of the above three lines:
|
||||
|
||||
1. ``7767517``, it is a magic number and should not be changed.
|
||||
2. ``2028 2547``, the first number ``2028`` specifies the number of layers
|
||||
in this file, while ``2547`` specifies the number of intermediate outputs
|
||||
of this file
|
||||
3. ``Input in0 0 1 in0``, ``Input`` is the layer type of this layer; ``in0``
|
||||
is the layer name of this layer; ``0`` means this layer has no input;
|
||||
``1`` means this layer has one output; ``in0`` is the output name of
|
||||
this layer.
|
||||
|
||||
We need to add 1 extra line and also increment the number of layers.
|
||||
The result looks like below:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
7767517
|
||||
2029 2547
|
||||
SherpaMetaData sherpa_meta_data1 0 0 0=2 1=32 2=4 3=7 -23316=5,2,4,3,2,4 -23317=5,384,384,384,384,384 -23318=5,192,192,192,192,192 -23319=5,1,2,4,8,2 -23320=5,31,31,31,31,31
|
||||
Input in0 0 1 in0
|
||||
|
||||
**Explanation**
|
||||
|
||||
1. ``7767517``, it is still the same
|
||||
2. ``2029 2547``, we have added an extra layer, so we need to update ``2028`` to ``2029``.
|
||||
We don't need to change ``2547`` since the newly added layer has no inputs or outputs.
|
||||
3. ``SherpaMetaData sherpa_meta_data1 0 0 0=2 1=32 2=4 3=7 -23316=5,2,4,3,2,4 -23317=5,384,384,384,384,384 -23318=5,192,192,192,192,192 -23319=5,1,2,4,8,2 -23320=5,31,31,31,31,31``
|
||||
This line is newly added. Its explanation is given below:
|
||||
|
||||
- ``SherpaMetaData`` is the type of this layer. Must be ``SherpaMetaData``.
|
||||
- ``sherpa_meta_data1`` is the name of this layer. Must be ``sherpa_meta_data1``.
|
||||
- ``0 0`` means this layer has no inputs or output. Must be ``0 0``
|
||||
- ``0=2``, 0 is the key and 2 is the value. MUST be ``0=2``
|
||||
- ``1=32``, 1 is the key and 32 is the value of the
|
||||
parameter ``--decode-chunk-len`` that you provided when running
|
||||
``./pruned_transducer_stateless7_streaming/export-for-ncnn.py``.
|
||||
- ``2=4``, 2 is the key and 4 is the value of the
|
||||
parameter ``--num-left-chunks`` that you provided when running
|
||||
``./pruned_transducer_stateless7_streaming/export-for-ncnn.py``.
|
||||
- ``3=7``, 3 is the key and 7 is the value of for the amount of padding
|
||||
used in the Conv2DSubsampling layer. It should be 7 for zipformer
|
||||
if you don't change zipformer.py.
|
||||
- ``-23316=5,2,4,3,2,4``, attribute 16, this is an array attribute.
|
||||
It is attribute 16 since -23300 - (-23316) = 16.
|
||||
The first element of the array is the length of the array, which is 5 in our case.
|
||||
``2,4,3,2,4`` is the value of ``--num-encoder-layers``that you provided
|
||||
when running ``./pruned_transducer_stateless7_streaming/export-for-ncnn.py``.
|
||||
- ``-23317=5,384,384,384,384,384``, attribute 17.
|
||||
The first element of the array is the length of the array, which is 5 in our case.
|
||||
``384,384,384,384,384`` is the value of ``--encoder-dims``that you provided
|
||||
when running ``./pruned_transducer_stateless7_streaming/export-for-ncnn.py``.
|
||||
- ``-23318=5,192,192,192,192,192``, attribute 18.
|
||||
The first element of the array is the length of the array, which is 5 in our case.
|
||||
``192,192,192,192,192`` is the value of ``--attention-dims`` that you provided
|
||||
when running ``./pruned_transducer_stateless7_streaming/export-for-ncnn.py``.
|
||||
- ``-23319=5,1,2,4,8,2``, attribute 19.
|
||||
The first element of the array is the length of the array, which is 5 in our case.
|
||||
``1,2,4,8,2`` is the value of ``--zipformer-downsampling-factors`` that you provided
|
||||
when running ``./pruned_transducer_stateless7_streaming/export-for-ncnn.py``.
|
||||
- ``-23320=5,31,31,31,31,31``, attribute 20.
|
||||
The first element of the array is the length of the array, which is 5 in our case.
|
||||
``31,31,31,31,31`` is the value of ``--cnn-module-kernels`` that you provided
|
||||
when running ``./pruned_transducer_stateless7_streaming/export-for-ncnn.py``.
|
||||
|
||||
For ease of reference, we list the key-value pairs that you need to add
|
||||
in the following table. If your model has a different setting, please
|
||||
change the values for ``SherpaMetaData`` accordingly. Otherwise, you
|
||||
will be ``SAD``.
|
||||
|
||||
+----------+--------------------------------------------+
|
||||
| key | value |
|
||||
+==========+============================================+
|
||||
| 0 | 2 (fixed) |
|
||||
+----------+--------------------------------------------+
|
||||
| 1 | ``-decode-chunk-len`` |
|
||||
+----------+--------------------------------------------+
|
||||
| 2 | ``--num-left-chunks`` |
|
||||
+----------+--------------------------------------------+
|
||||
| 3 | 7 (if you don't change code) |
|
||||
+----------+--------------------------------------------+
|
||||
|-23316 | ``--num-encoder-layer`` |
|
||||
+----------+--------------------------------------------+
|
||||
|-23317 | ``--encoder-dims`` |
|
||||
+----------+--------------------------------------------+
|
||||
|-23318 | ``--attention-dims`` |
|
||||
+----------+--------------------------------------------+
|
||||
|-23319 | ``--zipformer-downsampling-factors`` |
|
||||
+----------+--------------------------------------------+
|
||||
|-23320 | ``--cnn-module-kernels`` |
|
||||
+----------+--------------------------------------------+
|
||||
|
||||
4. ``Input in0 0 1 in0``. No need to change it.
|
||||
|
||||
.. caution::
|
||||
|
||||
When you add a new layer ``SherpaMetaData``, please remember to update the
|
||||
number of layers. In our case, update ``2028`` to ``2029``. Otherwise,
|
||||
you will be SAD later.
|
||||
|
||||
.. hint::
|
||||
|
||||
After adding the new layer ``SherpaMetaData``, you cannot use this model
|
||||
with ``streaming-ncnn-decode.py`` anymore since ``SherpaMetaData`` is
|
||||
supported only in `sherpa-ncnn`_.
|
||||
|
||||
.. hint::
|
||||
|
||||
`ncnn`_ is very flexible. You can add new layers to it just by text-editing
|
||||
the ``param`` file! You don't need to change the ``bin`` file.
|
||||
|
||||
Now you can use this model in `sherpa-ncnn`_.
|
||||
Please refer to the following documentation:
|
||||
|
||||
- Linux/macOS/Windows/arm/aarch64: `<https://k2-fsa.github.io/sherpa/ncnn/install/index.html>`_
|
||||
- ``Android``: `<https://k2-fsa.github.io/sherpa/ncnn/android/index.html>`_
|
||||
- ``iOS``: `<https://k2-fsa.github.io/sherpa/ncnn/ios/index.html>`_
|
||||
- Python: `<https://k2-fsa.github.io/sherpa/ncnn/python/index.html>`_
|
||||
|
||||
We have a list of pre-trained models that have been exported for `sherpa-ncnn`_:
|
||||
|
||||
- `<https://k2-fsa.github.io/sherpa/ncnn/pretrained_models/index.html>`_
|
||||
|
||||
You can find more usages there.
|
@ -21,6 +21,7 @@ It has been tested on the following platforms:
|
||||
- ``iOS``
|
||||
- ``Raspberry Pi``
|
||||
- `爱芯派 <https://wiki.sipeed.com/hardware/zh/>`_ (`MAIX-III AXera-Pi <https://wiki.sipeed.com/hardware/en/maixIII/ax-pi/axpi.html>`_).
|
||||
- `RV1126 <https://www.rock-chips.com/a/en/products/RV11_Series/2020/0427/1076.html>`_
|
||||
|
||||
`sherpa-ncnn`_ is self-contained and can be statically linked to produce
|
||||
a binary containing everything needed. Please refer
|
||||
@ -31,5 +32,6 @@ to its documentation for details:
|
||||
|
||||
.. toctree::
|
||||
|
||||
export-ncnn-zipformer
|
||||
export-ncnn-conv-emformer
|
||||
export-ncnn-lstm
|
||||
|
@ -9,6 +9,22 @@ to export trained models to `ONNX`_.
|
||||
There is also a file named ``onnx_pretrained.py``, which you can use
|
||||
the exported `ONNX`_ model in Python with `onnxruntime`_ to decode sound files.
|
||||
|
||||
sherpa-onnx
|
||||
-----------
|
||||
|
||||
We have a separate repository `sherpa-onnx`_ for deploying your exported models
|
||||
on various platforms such as:
|
||||
|
||||
- iOS
|
||||
- Android
|
||||
- Raspberry Pi
|
||||
- Linux/macOS/Windows
|
||||
|
||||
|
||||
Please see the documentation of `sherpa-onnx`_ for details:
|
||||
|
||||
`<https://k2-fsa.github.io/sherpa/onnx/index.html>`_
|
||||
|
||||
Example
|
||||
-------
|
||||
|
||||
|
@ -391,18 +391,14 @@ def save_results(
|
||||
):
|
||||
test_set_wers = dict()
|
||||
for key, results in results_dict.items():
|
||||
recog_path = (
|
||||
params.res_dir / f"recogs-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
recog_path = params.res_dir / f"recogs-{test_set_name}-{params.suffix}.txt"
|
||||
results = sorted(results)
|
||||
store_transcripts(filename=recog_path, texts=results)
|
||||
logging.info(f"The transcripts are stored in {recog_path}")
|
||||
|
||||
# The following prints out WERs, per-word error statistics and aligned
|
||||
# ref/hyp pairs.
|
||||
errs_filename = (
|
||||
params.res_dir / f"errs-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
errs_filename = params.res_dir / f"errs-{test_set_name}-{params.suffix}.txt"
|
||||
with open(errs_filename, "w") as f:
|
||||
wer = write_error_stats(
|
||||
f, f"{test_set_name}-{key}", results, enable_log=True
|
||||
@ -412,9 +408,7 @@ def save_results(
|
||||
logging.info("Wrote detailed error stats to {}".format(errs_filename))
|
||||
|
||||
test_set_wers = sorted(test_set_wers.items(), key=lambda x: x[1])
|
||||
errs_info = (
|
||||
params.res_dir / f"wer-summary-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
errs_info = params.res_dir / f"wer-summary-{test_set_name}-{params.suffix}.txt"
|
||||
with open(errs_info, "w") as f:
|
||||
print("settings\tWER", file=f)
|
||||
for key, val in test_set_wers:
|
||||
|
164
egs/aishell/ASR/local/prepare_char_lm_training_data.py
Normal file
164
egs/aishell/ASR/local/prepare_char_lm_training_data.py
Normal file
@ -0,0 +1,164 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
# Copyright (c) 2021 Xiaomi Corporation (authors: Daniel Povey
|
||||
# Fangjun Kuang)
|
||||
#
|
||||
# See ../../../../LICENSE for clarification regarding multiple authors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""
|
||||
This script takes a `tokens.txt` and a text file such as
|
||||
./download/lm/aishell-transcript.txt
|
||||
and outputs the LM training data to a supplied directory such
|
||||
as data/lm_training_char. The format is as follows:
|
||||
It creates a PyTorch archive (.pt file), say data/lm_training.pt, which is a
|
||||
representation of a dict with the same format with librispeech receipe
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
import k2
|
||||
import torch
|
||||
|
||||
|
||||
def get_args():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
"--lang-char",
|
||||
type=str,
|
||||
help="""Lang dir of asr model, e.g. data/lang_char""",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--lm-data",
|
||||
type=str,
|
||||
help="""Input LM training data as text, e.g.
|
||||
download/lm/aishell-train-word.txt""",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--lm-archive",
|
||||
type=str,
|
||||
help="""Path to output archive, e.g. data/lm_training_char/lm_data.pt;
|
||||
look at the source of this script to see the format.""",
|
||||
)
|
||||
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def main():
|
||||
args = get_args()
|
||||
|
||||
if Path(args.lm_archive).exists():
|
||||
logging.warning(f"{args.lm_archive} exists - skipping")
|
||||
return
|
||||
|
||||
# make token_dict from tokens.txt in order to map characters to tokens.
|
||||
token_dict = {}
|
||||
token_file = args.lang_char + "/tokens.txt"
|
||||
|
||||
with open(token_file, "r") as f:
|
||||
for line in f.readlines():
|
||||
line_list = line.split()
|
||||
token_dict[line_list[0]] = int(line_list[1])
|
||||
|
||||
# word2index is a dictionary from words to integer ids. No need to reserve
|
||||
# space for epsilon, etc.; the words are just used as a convenient way to
|
||||
# compress the sequences of tokens.
|
||||
word2index = dict()
|
||||
|
||||
word2token = [] # Will be a list-of-list-of-int, representing tokens.
|
||||
sentences = [] # Will be a list-of-list-of-int, representing word-ids.
|
||||
|
||||
if "aishell-lm" in args.lm_data:
|
||||
num_lines_in_total = 120098.0
|
||||
step = 50000
|
||||
elif "valid" in args.lm_data:
|
||||
num_lines_in_total = 14326.0
|
||||
step = 3000
|
||||
elif "test" in args.lm_data:
|
||||
num_lines_in_total = 7176.0
|
||||
step = 3000
|
||||
else:
|
||||
num_lines_in_total = None
|
||||
step = None
|
||||
|
||||
processed = 0
|
||||
|
||||
with open(args.lm_data) as f:
|
||||
while True:
|
||||
line = f.readline()
|
||||
if line == "":
|
||||
break
|
||||
|
||||
if step and processed % step == 0:
|
||||
logging.info(
|
||||
f"Processed number of lines: {processed} "
|
||||
f"({processed / num_lines_in_total * 100: .3f}%)"
|
||||
)
|
||||
processed += 1
|
||||
|
||||
line_words = line.split()
|
||||
for w in line_words:
|
||||
if w not in word2index:
|
||||
w_token = []
|
||||
for t in w:
|
||||
if t in token_dict:
|
||||
w_token.append(token_dict[t])
|
||||
else:
|
||||
w_token.append(token_dict["<unk>"])
|
||||
word2index[w] = len(word2token)
|
||||
word2token.append(w_token)
|
||||
sentences.append([word2index[w] for w in line_words])
|
||||
|
||||
logging.info("Constructing ragged tensors")
|
||||
words = k2.ragged.RaggedTensor(word2token)
|
||||
sentences = k2.ragged.RaggedTensor(sentences)
|
||||
|
||||
output = dict(words=words, sentences=sentences)
|
||||
|
||||
num_sentences = sentences.dim0
|
||||
logging.info(f"Computing sentence lengths, num_sentences: {num_sentences}")
|
||||
sentence_lengths = [0] * num_sentences
|
||||
for i in range(num_sentences):
|
||||
if step and i % step == 0:
|
||||
logging.info(
|
||||
f"Processed number of lines: {i} ({i / num_sentences * 100: .3f}%)"
|
||||
)
|
||||
|
||||
word_ids = sentences[i]
|
||||
|
||||
# NOTE: If word_ids is a tensor with only 1 entry,
|
||||
# token_ids is a torch.Tensor
|
||||
token_ids = words[word_ids]
|
||||
if isinstance(token_ids, k2.RaggedTensor):
|
||||
token_ids = token_ids.values
|
||||
|
||||
# token_ids is a 1-D tensor containing the BPE tokens
|
||||
# of the current sentence
|
||||
|
||||
sentence_lengths[i] = token_ids.numel()
|
||||
|
||||
output["sentence_lengths"] = torch.tensor(sentence_lengths, dtype=torch.int32)
|
||||
|
||||
torch.save(output, args.lm_archive)
|
||||
logging.info(f"Saved to {args.lm_archive}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
formatter = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s"
|
||||
|
||||
logging.basicConfig(format=formatter, level=logging.INFO)
|
||||
|
||||
main()
|
@ -7,7 +7,7 @@ set -eou pipefail
|
||||
|
||||
nj=15
|
||||
stage=-1
|
||||
stop_stage=10
|
||||
stop_stage=11
|
||||
|
||||
# We assume dl_dir (download dir) contains the following
|
||||
# directories and files. If not, they will be downloaded
|
||||
@ -219,3 +219,93 @@ if [ $stage -le 8 ] && [ $stop_stage -ge 8 ]; then
|
||||
./local/compile_hlg.py --lang-dir $lang_phone_dir
|
||||
./local/compile_hlg.py --lang-dir $lang_char_dir
|
||||
fi
|
||||
|
||||
if [ $stage -le 9 ] && [ $stop_stage -ge 9 ]; then
|
||||
log "Stage 9: Generate LM training data"
|
||||
|
||||
log "Processing char based data"
|
||||
out_dir=data/lm_training_char
|
||||
mkdir -p $out_dir $dl_dir/lm
|
||||
|
||||
if [ ! -f $dl_dir/lm/aishell-train-word.txt ]; then
|
||||
cp $lang_phone_dir/transcript_words.txt $dl_dir/lm/aishell-train-word.txt
|
||||
fi
|
||||
|
||||
./local/prepare_char_lm_training_data.py \
|
||||
--lang-char data/lang_char \
|
||||
--lm-data $dl_dir/lm/aishell-train-word.txt \
|
||||
--lm-archive $out_dir/lm_data.pt
|
||||
|
||||
if [ ! -f $dl_dir/lm/aishell-valid-word.txt ]; then
|
||||
aishell_text=$dl_dir/aishell/data_aishell/transcript/aishell_transcript_v0.8.txt
|
||||
aishell_valid_uid=$dl_dir/aishell/data_aishell/transcript/aishell_valid_uid
|
||||
find $dl_dir/aishell/data_aishell/wav/dev -name "*.wav" | sed 's/\.wav//g' | awk -F '/' '{print $NF}' > $aishell_valid_uid
|
||||
awk 'NR==FNR{uid[$1]=$1} NR!=FNR{if($1 in uid) print $0}' $aishell_valid_uid $aishell_text |
|
||||
cut -d " " -f 2- > $dl_dir/lm/aishell-valid-word.txt
|
||||
fi
|
||||
|
||||
./local/prepare_char_lm_training_data.py \
|
||||
--lang-char data/lang_char \
|
||||
--lm-data $dl_dir/lm/aishell-valid-word.txt \
|
||||
--lm-archive $out_dir/lm_data_valid.pt
|
||||
|
||||
if [ ! -f $dl_dir/lm/aishell-test-word.txt ]; then
|
||||
aishell_text=$dl_dir/aishell/data_aishell/transcript/aishell_transcript_v0.8.txt
|
||||
aishell_test_uid=$dl_dir/aishell/data_aishell/transcript/aishell_test_uid
|
||||
find $dl_dir/aishell/data_aishell/wav/test -name "*.wav" | sed 's/\.wav//g' | awk -F '/' '{print $NF}' > $aishell_test_uid
|
||||
awk 'NR==FNR{uid[$1]=$1} NR!=FNR{if($1 in uid) print $0}' $aishell_test_uid $aishell_text |
|
||||
cut -d " " -f 2- > $dl_dir/lm/aishell-test-word.txt
|
||||
fi
|
||||
|
||||
./local/prepare_char_lm_training_data.py \
|
||||
--lang-char data/lang_char \
|
||||
--lm-data $dl_dir/lm/aishell-test-word.txt \
|
||||
--lm-archive $out_dir/lm_data_test.pt
|
||||
fi
|
||||
|
||||
|
||||
if [ $stage -le 10 ] && [ $stop_stage -ge 10 ]; then
|
||||
log "Stage 10: Sort LM training data"
|
||||
# Sort LM training data by sentence length in descending order
|
||||
# for ease of training.
|
||||
#
|
||||
# Sentence length equals to the number of tokens
|
||||
# in a sentence.
|
||||
|
||||
out_dir=data/lm_training_char
|
||||
mkdir -p $out_dir
|
||||
ln -snf ../../../librispeech/ASR/local/sort_lm_training_data.py local/
|
||||
|
||||
./local/sort_lm_training_data.py \
|
||||
--in-lm-data $out_dir/lm_data.pt \
|
||||
--out-lm-data $out_dir/sorted_lm_data.pt \
|
||||
--out-statistics $out_dir/statistics.txt
|
||||
|
||||
./local/sort_lm_training_data.py \
|
||||
--in-lm-data $out_dir/lm_data_valid.pt \
|
||||
--out-lm-data $out_dir/sorted_lm_data-valid.pt \
|
||||
--out-statistics $out_dir/statistics-valid.txt
|
||||
|
||||
./local/sort_lm_training_data.py \
|
||||
--in-lm-data $out_dir/lm_data_test.pt \
|
||||
--out-lm-data $out_dir/sorted_lm_data-test.pt \
|
||||
--out-statistics $out_dir/statistics-test.txt
|
||||
fi
|
||||
|
||||
if [ $stage -le 11 ] && [ $stop_stage -ge 11 ]; then
|
||||
log "Stage 11: Train RNN LM model"
|
||||
python ../../../icefall/rnn_lm/train.py \
|
||||
--start-epoch 0 \
|
||||
--world-size 1 \
|
||||
--num-epochs 20 \
|
||||
--use-fp16 0 \
|
||||
--embedding-dim 512 \
|
||||
--hidden-dim 512 \
|
||||
--num-layers 2 \
|
||||
--batch-size 400 \
|
||||
--exp-dir rnnlm_char/exp \
|
||||
--lm-data data/lm_training_char/sorted_lm_data.pt \
|
||||
--lm-data-valid data/lm_training_char/sorted_lm_data-valid.pt \
|
||||
--vocab-size 4336 \
|
||||
--master-port 12345
|
||||
fi
|
||||
|
@ -388,18 +388,14 @@ def save_results(
|
||||
):
|
||||
test_set_wers = dict()
|
||||
for key, results in results_dict.items():
|
||||
recog_path = (
|
||||
params.res_dir / f"recogs-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
recog_path = params.res_dir / f"recogs-{test_set_name}-{params.suffix}.txt"
|
||||
results = sorted(results)
|
||||
store_transcripts(filename=recog_path, texts=results)
|
||||
logging.info(f"The transcripts are stored in {recog_path}")
|
||||
|
||||
# The following prints out WERs, per-word error statistics and aligned
|
||||
# ref/hyp pairs.
|
||||
errs_filename = (
|
||||
params.res_dir / f"errs-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
errs_filename = params.res_dir / f"errs-{test_set_name}-{params.suffix}.txt"
|
||||
# we compute CER for aishell dataset.
|
||||
results_char = []
|
||||
for res in results:
|
||||
@ -413,9 +409,7 @@ def save_results(
|
||||
logging.info("Wrote detailed error stats to {}".format(errs_filename))
|
||||
|
||||
test_set_wers = sorted(test_set_wers.items(), key=lambda x: x[1])
|
||||
errs_info = (
|
||||
params.res_dir / f"wer-summary-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
errs_info = params.res_dir / f"wer-summary-{test_set_name}-{params.suffix}.txt"
|
||||
with open(errs_info, "w") as f:
|
||||
print("settings\tWER", file=f)
|
||||
for key, val in test_set_wers:
|
||||
|
@ -406,18 +406,14 @@ def save_results(
|
||||
):
|
||||
test_set_wers = dict()
|
||||
for key, results in results_dict.items():
|
||||
recog_path = (
|
||||
params.res_dir / f"recogs-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
recog_path = params.res_dir / f"recogs-{test_set_name}-{params.suffix}.txt"
|
||||
results = sorted(results)
|
||||
store_transcripts(filename=recog_path, texts=results)
|
||||
logging.info(f"The transcripts are stored in {recog_path}")
|
||||
|
||||
# The following prints out WERs, per-word error statistics and aligned
|
||||
# ref/hyp pairs.
|
||||
errs_filename = (
|
||||
params.res_dir / f"errs-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
errs_filename = params.res_dir / f"errs-{test_set_name}-{params.suffix}.txt"
|
||||
# we compute CER for aishell dataset.
|
||||
results_char = []
|
||||
for res in results:
|
||||
@ -431,9 +427,7 @@ def save_results(
|
||||
logging.info("Wrote detailed error stats to {}".format(errs_filename))
|
||||
|
||||
test_set_wers = sorted(test_set_wers.items(), key=lambda x: x[1])
|
||||
errs_info = (
|
||||
params.res_dir / f"wer-summary-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
errs_info = params.res_dir / f"wer-summary-{test_set_name}-{params.suffix}.txt"
|
||||
with open(errs_info, "w") as f:
|
||||
print("settings\tCER", file=f)
|
||||
for key, val in test_set_wers:
|
||||
|
@ -325,17 +325,13 @@ def save_results(
|
||||
):
|
||||
test_set_wers = dict()
|
||||
for key, results in results_dict.items():
|
||||
recog_path = (
|
||||
params.res_dir / f"recogs-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
recog_path = params.res_dir / f"recogs-{test_set_name}-{params.suffix}.txt"
|
||||
results = sorted(results)
|
||||
store_transcripts(filename=recog_path, texts=results)
|
||||
|
||||
# The following prints out WERs, per-word error statistics and aligned
|
||||
# ref/hyp pairs.
|
||||
errs_filename = (
|
||||
params.res_dir / f"errs-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
errs_filename = params.res_dir / f"errs-{test_set_name}-{params.suffix}.txt"
|
||||
# we compute CER for aishell dataset.
|
||||
results_char = []
|
||||
for res in results:
|
||||
@ -349,9 +345,7 @@ def save_results(
|
||||
logging.info("Wrote detailed error stats to {}".format(errs_filename))
|
||||
|
||||
test_set_wers = sorted(test_set_wers.items(), key=lambda x: x[1])
|
||||
errs_info = (
|
||||
params.res_dir / f"wer-summary-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
errs_info = params.res_dir / f"wer-summary-{test_set_name}-{params.suffix}.txt"
|
||||
with open(errs_info, "w") as f:
|
||||
print("settings\tCER", file=f)
|
||||
for key, val in test_set_wers:
|
||||
|
@ -370,18 +370,14 @@ def save_results(
|
||||
):
|
||||
test_set_wers = dict()
|
||||
for key, results in results_dict.items():
|
||||
recog_path = (
|
||||
params.res_dir / f"recogs-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
recog_path = params.res_dir / f"recogs-{test_set_name}-{params.suffix}.txt"
|
||||
results = sorted(results)
|
||||
store_transcripts(filename=recog_path, texts=results)
|
||||
logging.info(f"The transcripts are stored in {recog_path}")
|
||||
|
||||
# The following prints out WERs, per-word error statistics and aligned
|
||||
# ref/hyp pairs.
|
||||
errs_filename = (
|
||||
params.res_dir / f"errs-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
errs_filename = params.res_dir / f"errs-{test_set_name}-{params.suffix}.txt"
|
||||
# we compute CER for aishell dataset.
|
||||
results_char = []
|
||||
for res in results:
|
||||
@ -395,9 +391,7 @@ def save_results(
|
||||
logging.info("Wrote detailed error stats to {}".format(errs_filename))
|
||||
|
||||
test_set_wers = sorted(test_set_wers.items(), key=lambda x: x[1])
|
||||
errs_info = (
|
||||
params.res_dir / f"wer-summary-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
errs_info = params.res_dir / f"wer-summary-{test_set_name}-{params.suffix}.txt"
|
||||
with open(errs_info, "w") as f:
|
||||
print("settings\tCER", file=f)
|
||||
for key, val in test_set_wers:
|
||||
|
@ -374,18 +374,14 @@ def save_results(
|
||||
):
|
||||
test_set_wers = dict()
|
||||
for key, results in results_dict.items():
|
||||
recog_path = (
|
||||
params.res_dir / f"recogs-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
recog_path = params.res_dir / f"recogs-{test_set_name}-{params.suffix}.txt"
|
||||
results = sorted(results)
|
||||
store_transcripts(filename=recog_path, texts=results)
|
||||
logging.info(f"The transcripts are stored in {recog_path}")
|
||||
|
||||
# The following prints out WERs, per-word error statistics and aligned
|
||||
# ref/hyp pairs.
|
||||
errs_filename = (
|
||||
params.res_dir / f"errs-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
errs_filename = params.res_dir / f"errs-{test_set_name}-{params.suffix}.txt"
|
||||
# we compute CER for aishell dataset.
|
||||
results_char = []
|
||||
for res in results:
|
||||
@ -399,9 +395,7 @@ def save_results(
|
||||
logging.info("Wrote detailed error stats to {}".format(errs_filename))
|
||||
|
||||
test_set_wers = sorted(test_set_wers.items(), key=lambda x: x[1])
|
||||
errs_info = (
|
||||
params.res_dir / f"wer-summary-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
errs_info = params.res_dir / f"wer-summary-{test_set_name}-{params.suffix}.txt"
|
||||
with open(errs_info, "w") as f:
|
||||
print("settings\tCER", file=f)
|
||||
for key, val in test_set_wers:
|
||||
|
@ -543,18 +543,14 @@ def save_results(
|
||||
):
|
||||
test_set_wers = dict()
|
||||
for key, results in results_dict.items():
|
||||
recog_path = (
|
||||
params.res_dir / f"recogs-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
recog_path = params.res_dir / f"recogs-{test_set_name}-{params.suffix}.txt"
|
||||
results = sorted(results)
|
||||
store_transcripts(filename=recog_path, texts=results)
|
||||
logging.info(f"The transcripts are stored in {recog_path}")
|
||||
|
||||
# The following prints out WERs, per-word error statistics and aligned
|
||||
# ref/hyp pairs.
|
||||
errs_filename = (
|
||||
params.res_dir / f"errs-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
errs_filename = params.res_dir / f"errs-{test_set_name}-{params.suffix}.txt"
|
||||
with open(errs_filename, "w") as f:
|
||||
wer = write_error_stats(
|
||||
f, f"{test_set_name}-{key}", results, enable_log=True
|
||||
@ -564,9 +560,7 @@ def save_results(
|
||||
logging.info("Wrote detailed error stats to {}".format(errs_filename))
|
||||
|
||||
test_set_wers = sorted(test_set_wers.items(), key=lambda x: x[1])
|
||||
errs_info = (
|
||||
params.res_dir / f"wer-summary-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
errs_info = params.res_dir / f"wer-summary-{test_set_name}-{params.suffix}.txt"
|
||||
with open(errs_info, "w") as f:
|
||||
print("settings\tWER", file=f)
|
||||
for key, val in test_set_wers:
|
||||
|
@ -406,18 +406,14 @@ def save_results(
|
||||
):
|
||||
test_set_wers = dict()
|
||||
for key, results in results_dict.items():
|
||||
recog_path = (
|
||||
params.res_dir / f"recogs-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
recog_path = params.res_dir / f"recogs-{test_set_name}-{params.suffix}.txt"
|
||||
results = sorted(results)
|
||||
store_transcripts(filename=recog_path, texts=results)
|
||||
logging.info(f"The transcripts are stored in {recog_path}")
|
||||
|
||||
# The following prints out WERs, per-word error statistics and aligned
|
||||
# ref/hyp pairs.
|
||||
errs_filename = (
|
||||
params.res_dir / f"errs-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
errs_filename = params.res_dir / f"errs-{test_set_name}-{params.suffix}.txt"
|
||||
with open(errs_filename, "w") as f:
|
||||
wer = write_error_stats(
|
||||
f, f"{test_set_name}-{key}", results, enable_log=True
|
||||
@ -427,9 +423,7 @@ def save_results(
|
||||
logging.info("Wrote detailed error stats to {}".format(errs_filename))
|
||||
|
||||
test_set_wers = sorted(test_set_wers.items(), key=lambda x: x[1])
|
||||
errs_info = (
|
||||
params.res_dir / f"wer-summary-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
errs_info = params.res_dir / f"wer-summary-{test_set_name}-{params.suffix}.txt"
|
||||
with open(errs_info, "w") as f:
|
||||
print("settings\tWER", file=f)
|
||||
for key, val in test_set_wers:
|
||||
|
@ -391,18 +391,14 @@ def save_results(
|
||||
):
|
||||
test_set_wers = dict()
|
||||
for key, results in results_dict.items():
|
||||
recog_path = (
|
||||
params.res_dir / f"recogs-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
recog_path = params.res_dir / f"recogs-{test_set_name}-{params.suffix}.txt"
|
||||
results = sorted(results)
|
||||
store_transcripts(filename=recog_path, texts=results)
|
||||
logging.info(f"The transcripts are stored in {recog_path}")
|
||||
|
||||
# The following prints out WERs, per-word error statistics and aligned
|
||||
# ref/hyp pairs.
|
||||
errs_filename = (
|
||||
params.res_dir / f"errs-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
errs_filename = params.res_dir / f"errs-{test_set_name}-{params.suffix}.txt"
|
||||
with open(errs_filename, "w") as f:
|
||||
wer = write_error_stats(
|
||||
f, f"{test_set_name}-{key}", results, enable_log=True
|
||||
@ -412,9 +408,7 @@ def save_results(
|
||||
logging.info("Wrote detailed error stats to {}".format(errs_filename))
|
||||
|
||||
test_set_wers = sorted(test_set_wers.items(), key=lambda x: x[1])
|
||||
errs_info = (
|
||||
params.res_dir / f"wer-summary-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
errs_info = params.res_dir / f"wer-summary-{test_set_name}-{params.suffix}.txt"
|
||||
with open(errs_info, "w") as f:
|
||||
print("settings\tWER", file=f)
|
||||
for key, val in test_set_wers:
|
||||
|
@ -462,18 +462,14 @@ def save_results(
|
||||
):
|
||||
test_set_wers = dict()
|
||||
for key, results in results_dict.items():
|
||||
recog_path = (
|
||||
params.res_dir / f"recogs-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
recog_path = params.res_dir / f"recogs-{test_set_name}-{params.suffix}.txt"
|
||||
results = sorted(results)
|
||||
store_transcripts(filename=recog_path, texts=results)
|
||||
logging.info(f"The transcripts are stored in {recog_path}")
|
||||
|
||||
# The following prints out WERs, per-word error statistics and aligned
|
||||
# ref/hyp pairs.
|
||||
errs_filename = (
|
||||
params.res_dir / f"errs-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
errs_filename = params.res_dir / f"errs-{test_set_name}-{params.suffix}.txt"
|
||||
with open(errs_filename, "w") as f:
|
||||
wer = write_error_stats(
|
||||
f, f"{test_set_name}-{key}", results, enable_log=True
|
||||
@ -483,9 +479,7 @@ def save_results(
|
||||
logging.info("Wrote detailed error stats to {}".format(errs_filename))
|
||||
|
||||
test_set_wers = sorted(test_set_wers.items(), key=lambda x: x[1])
|
||||
errs_info = (
|
||||
params.res_dir / f"wer-summary-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
errs_info = params.res_dir / f"wer-summary-{test_set_name}-{params.suffix}.txt"
|
||||
with open(errs_info, "w") as f:
|
||||
print("settings\tWER", file=f)
|
||||
for key, val in test_set_wers:
|
||||
|
@ -478,17 +478,13 @@ def save_results(
|
||||
test_set_wers = dict()
|
||||
test_set_cers = dict()
|
||||
for key, results in results_dict.items():
|
||||
recog_path = (
|
||||
params.res_dir / f"recogs-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
recog_path = params.res_dir / f"recogs-{test_set_name}-{params.suffix}.txt"
|
||||
store_transcripts(filename=recog_path, texts=results)
|
||||
logging.info(f"The transcripts are stored in {recog_path}")
|
||||
|
||||
# The following prints out WERs, per-word error statistics and aligned
|
||||
# ref/hyp pairs.
|
||||
wers_filename = (
|
||||
params.res_dir / f"wers-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
wers_filename = params.res_dir / f"wers-{test_set_name}-{params.suffix}.txt"
|
||||
with open(wers_filename, "w") as f:
|
||||
wer = write_error_stats(
|
||||
f, f"{test_set_name}-{key}", results, enable_log=True
|
||||
@ -499,9 +495,7 @@ def save_results(
|
||||
results_char = []
|
||||
for res in results:
|
||||
results_char.append((res[0], list("".join(res[1])), list("".join(res[2]))))
|
||||
cers_filename = (
|
||||
params.res_dir / f"cers-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
cers_filename = params.res_dir / f"cers-{test_set_name}-{params.suffix}.txt"
|
||||
with open(cers_filename, "w") as f:
|
||||
cer = write_error_stats(
|
||||
f, f"{test_set_name}-{key}", results_char, enable_log=True
|
||||
@ -512,9 +506,7 @@ def save_results(
|
||||
|
||||
test_set_wers = {k: v for k, v in sorted(test_set_wers.items(), key=lambda x: x[1])}
|
||||
test_set_cers = {k: v for k, v in sorted(test_set_cers.items(), key=lambda x: x[1])}
|
||||
errs_info = (
|
||||
params.res_dir / f"wer-summary-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
errs_info = params.res_dir / f"wer-summary-{test_set_name}-{params.suffix}.txt"
|
||||
with open(errs_info, "w") as f:
|
||||
print("settings\tWER\tCER", file=f)
|
||||
for key in test_set_wers:
|
||||
|
@ -599,9 +599,7 @@ def save_results(
|
||||
):
|
||||
test_set_wers = dict()
|
||||
for key, results in results_dict.items():
|
||||
recog_path = (
|
||||
params.res_dir / f"recogs-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
recog_path = params.res_dir / f"recogs-{test_set_name}-{params.suffix}.txt"
|
||||
results = sorted(results)
|
||||
store_transcripts(filename=recog_path, texts=results)
|
||||
|
||||
@ -609,9 +607,7 @@ def save_results(
|
||||
|
||||
# The following prints out WERs, per-word error statistics and aligned
|
||||
# ref/hyp pairs.
|
||||
errs_filename = (
|
||||
params.res_dir / f"errs-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
errs_filename = params.res_dir / f"errs-{test_set_name}-{params.suffix}.txt"
|
||||
with open(errs_filename, "w") as f:
|
||||
wer = write_error_stats(
|
||||
f, f"{test_set_name}-{key}", results, enable_log=True
|
||||
@ -621,9 +617,7 @@ def save_results(
|
||||
logging.info("Wrote detailed error stats to {}".format(errs_filename))
|
||||
|
||||
test_set_wers = sorted(test_set_wers.items(), key=lambda x: x[1])
|
||||
errs_info = (
|
||||
params.res_dir / f"wer-summary-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
errs_info = params.res_dir / f"wer-summary-{test_set_name}-{params.suffix}.txt"
|
||||
with open(errs_info, "w") as f:
|
||||
print("settings\tWER", file=f)
|
||||
for key, val in test_set_wers:
|
||||
|
@ -399,9 +399,7 @@ def save_results(
|
||||
):
|
||||
test_set_wers = dict()
|
||||
for key, results in results_dict.items():
|
||||
recog_path = (
|
||||
params.res_dir / f"recogs-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
recog_path = params.res_dir / f"recogs-{test_set_name}-{params.suffix}.txt"
|
||||
results = post_processing(results)
|
||||
results = sorted(results)
|
||||
store_transcripts(filename=recog_path, texts=results)
|
||||
@ -409,9 +407,7 @@ def save_results(
|
||||
|
||||
# The following prints out WERs, per-word error statistics and aligned
|
||||
# ref/hyp pairs.
|
||||
errs_filename = (
|
||||
params.res_dir / f"errs-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
errs_filename = params.res_dir / f"errs-{test_set_name}-{params.suffix}.txt"
|
||||
with open(errs_filename, "w") as f:
|
||||
wer = write_error_stats(
|
||||
f, f"{test_set_name}-{key}", results, enable_log=True
|
||||
@ -421,9 +417,7 @@ def save_results(
|
||||
logging.info("Wrote detailed error stats to {}".format(errs_filename))
|
||||
|
||||
test_set_wers = sorted(test_set_wers.items(), key=lambda x: x[1])
|
||||
errs_info = (
|
||||
params.res_dir / f"wer-summary-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
errs_info = params.res_dir / f"wer-summary-{test_set_name}-{params.suffix}.txt"
|
||||
with open(errs_info, "w") as f:
|
||||
print("settings\tWER", file=f)
|
||||
for key, val in test_set_wers:
|
||||
|
@ -540,6 +540,10 @@ for m in greedy_search fast_beam_search modified_beam_search ; do
|
||||
done
|
||||
```
|
||||
|
||||
Note that a small change is made to the `pruned_transducer_stateless7/decoder.py` in
|
||||
this [PR](/ceph-data4/yangxiaoyu/softwares/icefall_development/icefall_random_padding/egs/librispeech/ASR/pruned_transducer_stateless7/exp_960h_no_paddingidx_ngpu4/tensorboard) to address the
|
||||
problem of emitting the first symbol at the very beginning. If you need a
|
||||
model without this issue, please download the model from here: <https://huggingface.co/marcoyang/icefall-asr-librispeech-pruned-transducer-stateless7-2023-03-10>
|
||||
|
||||
### LibriSpeech BPE training results (Pruned Stateless LSTM RNN-T + gradient filter)
|
||||
|
||||
|
@ -728,18 +728,14 @@ def save_results(
|
||||
test_set_wers = dict()
|
||||
test_set_delays = dict()
|
||||
for key, results in results_dict.items():
|
||||
recog_path = (
|
||||
params.res_dir / f"recogs-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
recog_path = params.res_dir / f"recogs-{test_set_name}-{params.suffix}.txt"
|
||||
results = sorted(results)
|
||||
store_transcripts_and_timestamps(filename=recog_path, texts=results)
|
||||
logging.info(f"The transcripts are stored in {recog_path}")
|
||||
|
||||
# The following prints out WERs, per-word error statistics and aligned
|
||||
# ref/hyp pairs.
|
||||
errs_filename = (
|
||||
params.res_dir / f"errs-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
errs_filename = params.res_dir / f"errs-{test_set_name}-{params.suffix}.txt"
|
||||
with open(errs_filename, "w") as f:
|
||||
wer, mean_delay, var_delay = write_error_stats_with_timestamps(
|
||||
f,
|
||||
@ -754,9 +750,7 @@ def save_results(
|
||||
logging.info("Wrote detailed error stats to {}".format(errs_filename))
|
||||
|
||||
test_set_wers = sorted(test_set_wers.items(), key=lambda x: x[1])
|
||||
errs_info = (
|
||||
params.res_dir / f"wer-summary-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
errs_info = params.res_dir / f"wer-summary-{test_set_name}-{params.suffix}.txt"
|
||||
with open(errs_info, "w") as f:
|
||||
print("settings\tWER", file=f)
|
||||
for key, val in test_set_wers:
|
||||
@ -765,8 +759,7 @@ def save_results(
|
||||
# sort according to the mean start symbol delay
|
||||
test_set_delays = sorted(test_set_delays.items(), key=lambda x: x[1][0][0])
|
||||
delays_info = (
|
||||
params.res_dir
|
||||
/ f"symbol-delay-summary-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
params.res_dir / f"symbol-delay-summary-{test_set_name}-{params.suffix}.txt"
|
||||
)
|
||||
with open(delays_info, "w") as f:
|
||||
print("settings\t(start, end) symbol-delay (s) (start, end)", file=f)
|
||||
|
@ -432,18 +432,14 @@ def save_results(
|
||||
):
|
||||
test_set_wers = dict()
|
||||
for key, results in results_dict.items():
|
||||
recog_path = (
|
||||
params.res_dir / f"recogs-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
recog_path = params.res_dir / f"recogs-{test_set_name}-{params.suffix}.txt"
|
||||
results = sorted(results)
|
||||
store_transcripts(filename=recog_path, texts=results)
|
||||
logging.info(f"The transcripts are stored in {recog_path}")
|
||||
|
||||
# The following prints out WERs, per-word error statistics and aligned
|
||||
# ref/hyp pairs.
|
||||
errs_filename = (
|
||||
params.res_dir / f"errs-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
errs_filename = params.res_dir / f"errs-{test_set_name}-{params.suffix}.txt"
|
||||
with open(errs_filename, "w") as f:
|
||||
wer = write_error_stats(
|
||||
f, f"{test_set_name}-{key}", results, enable_log=True
|
||||
@ -453,9 +449,7 @@ def save_results(
|
||||
logging.info("Wrote detailed error stats to {}".format(errs_filename))
|
||||
|
||||
test_set_wers = sorted(test_set_wers.items(), key=lambda x: x[1])
|
||||
errs_info = (
|
||||
params.res_dir / f"wer-summary-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
errs_info = params.res_dir / f"wer-summary-{test_set_name}-{params.suffix}.txt"
|
||||
with open(errs_info, "w") as f:
|
||||
print("settings\tWER", file=f)
|
||||
for key, val in test_set_wers:
|
||||
|
@ -750,17 +750,13 @@ def save_results(
|
||||
):
|
||||
test_set_wers = dict()
|
||||
for key, results in results_dict.items():
|
||||
recog_path = (
|
||||
params.res_dir / f"recogs-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
recog_path = params.res_dir / f"recogs-{test_set_name}-{params.suffix}.txt"
|
||||
store_transcripts(filename=recog_path, texts=sorted(results))
|
||||
logging.info(f"The transcripts are stored in {recog_path}")
|
||||
|
||||
# The following prints out WERs, per-word error statistics and aligned
|
||||
# ref/hyp pairs.
|
||||
errs_filename = (
|
||||
params.res_dir / f"errs-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
errs_filename = params.res_dir / f"errs-{test_set_name}-{params.suffix}.txt"
|
||||
with open(errs_filename, "w") as f:
|
||||
wer = write_error_stats(
|
||||
f, f"{test_set_name}-{key}", results, enable_log=True
|
||||
@ -770,9 +766,7 @@ def save_results(
|
||||
logging.info("Wrote detailed error stats to {}".format(errs_filename))
|
||||
|
||||
test_set_wers = sorted(test_set_wers.items(), key=lambda x: x[1])
|
||||
errs_info = (
|
||||
params.res_dir / f"wer-summary-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
errs_info = params.res_dir / f"wer-summary-{test_set_name}-{params.suffix}.txt"
|
||||
with open(errs_info, "w") as f:
|
||||
print("settings\tWER", file=f)
|
||||
for key, val in test_set_wers:
|
||||
|
@ -432,18 +432,14 @@ def save_results(
|
||||
):
|
||||
test_set_wers = dict()
|
||||
for key, results in results_dict.items():
|
||||
recog_path = (
|
||||
params.res_dir / f"recogs-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
recog_path = params.res_dir / f"recogs-{test_set_name}-{params.suffix}.txt"
|
||||
results = sorted(results)
|
||||
store_transcripts(filename=recog_path, texts=results)
|
||||
logging.info(f"The transcripts are stored in {recog_path}")
|
||||
|
||||
# The following prints out WERs, per-word error statistics and aligned
|
||||
# ref/hyp pairs.
|
||||
errs_filename = (
|
||||
params.res_dir / f"errs-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
errs_filename = params.res_dir / f"errs-{test_set_name}-{params.suffix}.txt"
|
||||
with open(errs_filename, "w") as f:
|
||||
wer = write_error_stats(
|
||||
f, f"{test_set_name}-{key}", results, enable_log=True
|
||||
@ -453,9 +449,7 @@ def save_results(
|
||||
logging.info("Wrote detailed error stats to {}".format(errs_filename))
|
||||
|
||||
test_set_wers = sorted(test_set_wers.items(), key=lambda x: x[1])
|
||||
errs_info = (
|
||||
params.res_dir / f"wer-summary-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
errs_info = params.res_dir / f"wer-summary-{test_set_name}-{params.suffix}.txt"
|
||||
with open(errs_info, "w") as f:
|
||||
print("settings\tWER", file=f)
|
||||
for key, val in test_set_wers:
|
||||
|
@ -750,17 +750,13 @@ def save_results(
|
||||
):
|
||||
test_set_wers = dict()
|
||||
for key, results in results_dict.items():
|
||||
recog_path = (
|
||||
params.res_dir / f"recogs-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
recog_path = params.res_dir / f"recogs-{test_set_name}-{params.suffix}.txt"
|
||||
store_transcripts(filename=recog_path, texts=sorted(results))
|
||||
logging.info(f"The transcripts are stored in {recog_path}")
|
||||
|
||||
# The following prints out WERs, per-word error statistics and aligned
|
||||
# ref/hyp pairs.
|
||||
errs_filename = (
|
||||
params.res_dir / f"errs-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
errs_filename = params.res_dir / f"errs-{test_set_name}-{params.suffix}.txt"
|
||||
with open(errs_filename, "w") as f:
|
||||
wer = write_error_stats(
|
||||
f, f"{test_set_name}-{key}", results, enable_log=True
|
||||
@ -770,9 +766,7 @@ def save_results(
|
||||
logging.info("Wrote detailed error stats to {}".format(errs_filename))
|
||||
|
||||
test_set_wers = sorted(test_set_wers.items(), key=lambda x: x[1])
|
||||
errs_info = (
|
||||
params.res_dir / f"wer-summary-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
errs_info = params.res_dir / f"wer-summary-{test_set_name}-{params.suffix}.txt"
|
||||
with open(errs_info, "w") as f:
|
||||
print("settings\tWER", file=f)
|
||||
for key, val in test_set_wers:
|
||||
|
85
egs/librispeech/ASR/finetune.sh
Executable file
85
egs/librispeech/ASR/finetune.sh
Executable file
@ -0,0 +1,85 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# fix segmentation fault reported in https://github.com/k2-fsa/icefall/issues/674
|
||||
export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
|
||||
|
||||
set -eou pipefail
|
||||
|
||||
stage=-1
|
||||
stop_stage=100
|
||||
|
||||
# This is an example script for fine-tuning. Here, we fine-tune a model trained
|
||||
# on Librispeech on GigaSpeech. The model used for fine-tuning is
|
||||
# pruned_transducer_stateless7 (zipformer). If you want to fine-tune model
|
||||
# from another recipe, you can adapt ./pruned_transducer_stateless7/finetune.py
|
||||
# for that recipe. If you have any problem, please open up an issue in https://github.com/k2-fsa/icefall/issues.
|
||||
|
||||
# We assume that you have already prepared the GigaSpeech manfiest&features under ./data.
|
||||
# If you haven't done that, please see https://github.com/k2-fsa/icefall/blob/master/egs/gigaspeech/ASR/prepare.sh.
|
||||
|
||||
dl_dir=$PWD/download
|
||||
|
||||
. shared/parse_options.sh || exit 1
|
||||
|
||||
log() {
|
||||
# This function is from espnet
|
||||
local fname=${BASH_SOURCE[1]##*/}
|
||||
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
|
||||
}
|
||||
|
||||
if [ $stage -le -1 ] && [ $stop_stage -ge -1 ]; then
|
||||
log "Stage -1: Download Pre-trained model"
|
||||
|
||||
# clone from huggingface
|
||||
git lfs install
|
||||
git clone https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless7-2022-11-11
|
||||
|
||||
fi
|
||||
|
||||
if [ $stage -le 0 ] && [ $stop_stage -ge 0 ]; then
|
||||
log "Stage 0: Start fine-tuning"
|
||||
|
||||
# The following configuration of lr schedule should work well
|
||||
# You may also tune the following parameters to adjust learning rate schedule
|
||||
base_lr=0.005
|
||||
lr_epochs=100
|
||||
lr_batches=100000
|
||||
|
||||
# We recommend to start from an averaged model
|
||||
finetune_ckpt=icefall-asr-librispeech-pruned-transducer-stateless7-2022-11-11/exp/pretrained.pt
|
||||
export CUDA_VISIBLE_DEVICES="0,1"
|
||||
|
||||
./pruned_transducer_stateless7/finetune.py \
|
||||
--world-size 2 \
|
||||
--master-port 18180 \
|
||||
--num-epochs 20 \
|
||||
--start-epoch 1 \
|
||||
--exp-dir pruned_transducer_stateless7/exp_giga_finetune \
|
||||
--subset S \
|
||||
--use-fp16 1 \
|
||||
--base-lr $base_lr \
|
||||
--lr-epochs $lr_epochs \
|
||||
--lr-batches $lr_batches \
|
||||
--bpe-model icefall-asr-librispeech-pruned-transducer-stateless7-2022-11-11/data/lang_bpe_500/bpe.model \
|
||||
--do-finetune True \
|
||||
--finetune-ckpt $finetune_ckpt \
|
||||
--max-duration 500
|
||||
fi
|
||||
|
||||
if [ $stage -le 1 ] && [ $stop_stage -ge 1 ]; then
|
||||
log "Stage 1: Decoding"
|
||||
|
||||
epoch=15
|
||||
avg=10
|
||||
|
||||
for m in greedy_search modified_beam_search; do
|
||||
python pruned_transducer_stateless7/decode_gigaspeech.py \
|
||||
--epoch $epoch \
|
||||
--avg $avg \
|
||||
--use-averaged-model True \
|
||||
--beam-size 4 \
|
||||
--exp-dir pruned_transducer_stateless7/exp_giga_finetune \
|
||||
--max-duration 400 \
|
||||
--decoding-method $m
|
||||
done
|
||||
fi
|
@ -54,10 +54,20 @@ def get_args():
|
||||
help="""Path to the bpe.model. If not None, we will remove short and
|
||||
long utterances before extracting features""",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--dataset",
|
||||
type=str,
|
||||
help="""Dataset parts to compute fbank. If None, we will use all""",
|
||||
)
|
||||
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def compute_fbank_librispeech(bpe_model: Optional[str] = None):
|
||||
def compute_fbank_librispeech(
|
||||
bpe_model: Optional[str] = None,
|
||||
dataset: Optional[str] = None,
|
||||
):
|
||||
src_dir = Path("data/manifests")
|
||||
output_dir = Path("data/fbank")
|
||||
num_jobs = min(15, os.cpu_count())
|
||||
@ -68,6 +78,7 @@ def compute_fbank_librispeech(bpe_model: Optional[str] = None):
|
||||
sp = spm.SentencePieceProcessor()
|
||||
sp.load(bpe_model)
|
||||
|
||||
if dataset is None:
|
||||
dataset_parts = (
|
||||
"dev-clean",
|
||||
"dev-other",
|
||||
@ -77,6 +88,9 @@ def compute_fbank_librispeech(bpe_model: Optional[str] = None):
|
||||
"train-clean-360",
|
||||
"train-other-500",
|
||||
)
|
||||
else:
|
||||
dataset_parts = dataset.split(" ", -1)
|
||||
|
||||
prefix = "librispeech"
|
||||
suffix = "jsonl.gz"
|
||||
manifests = read_manifests_if_cached(
|
||||
@ -131,4 +145,4 @@ if __name__ == "__main__":
|
||||
logging.basicConfig(format=formatter, level=logging.INFO)
|
||||
args = get_args()
|
||||
logging.info(vars(args))
|
||||
compute_fbank_librispeech(bpe_model=args.bpe_model)
|
||||
compute_fbank_librispeech(bpe_model=args.bpe_model, dataset=args.dataset)
|
||||
|
@ -566,18 +566,14 @@ def save_results(
|
||||
):
|
||||
test_set_wers = dict()
|
||||
for key, results in results_dict.items():
|
||||
recog_path = (
|
||||
params.res_dir / f"recogs-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
recog_path = params.res_dir / f"recogs-{test_set_name}-{params.suffix}.txt"
|
||||
results = sorted(results)
|
||||
store_transcripts(filename=recog_path, texts=results)
|
||||
logging.info(f"The transcripts are stored in {recog_path}")
|
||||
|
||||
# The following prints out WERs, per-word error statistics and aligned
|
||||
# ref/hyp pairs.
|
||||
errs_filename = (
|
||||
params.res_dir / f"errs-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
errs_filename = params.res_dir / f"errs-{test_set_name}-{params.suffix}.txt"
|
||||
with open(errs_filename, "w") as f:
|
||||
wer = write_error_stats(
|
||||
f, f"{test_set_name}-{key}", results, enable_log=True
|
||||
@ -587,9 +583,7 @@ def save_results(
|
||||
logging.info("Wrote detailed error stats to {}".format(errs_filename))
|
||||
|
||||
test_set_wers = sorted(test_set_wers.items(), key=lambda x: x[1])
|
||||
errs_info = (
|
||||
params.res_dir / f"wer-summary-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
errs_info = params.res_dir / f"wer-summary-{test_set_name}-{params.suffix}.txt"
|
||||
with open(errs_info, "w") as f:
|
||||
print("settings\tWER", file=f)
|
||||
for key, val in test_set_wers:
|
||||
|
@ -742,17 +742,13 @@ def save_results(
|
||||
):
|
||||
test_set_wers = dict()
|
||||
for key, results in results_dict.items():
|
||||
recog_path = (
|
||||
params.res_dir / f"recogs-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
recog_path = params.res_dir / f"recogs-{test_set_name}-{params.suffix}.txt"
|
||||
store_transcripts(filename=recog_path, texts=sorted(results))
|
||||
logging.info(f"The transcripts are stored in {recog_path}")
|
||||
|
||||
# The following prints out WERs, per-word error statistics and aligned
|
||||
# ref/hyp pairs.
|
||||
errs_filename = (
|
||||
params.res_dir / f"errs-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
errs_filename = params.res_dir / f"errs-{test_set_name}-{params.suffix}.txt"
|
||||
with open(errs_filename, "w") as f:
|
||||
wer = write_error_stats(
|
||||
f, f"{test_set_name}-{key}", results, enable_log=True
|
||||
@ -762,9 +758,7 @@ def save_results(
|
||||
logging.info("Wrote detailed error stats to {}".format(errs_filename))
|
||||
|
||||
test_set_wers = sorted(test_set_wers.items(), key=lambda x: x[1])
|
||||
errs_info = (
|
||||
params.res_dir / f"wer-summary-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
errs_info = params.res_dir / f"wer-summary-{test_set_name}-{params.suffix}.txt"
|
||||
with open(errs_info, "w") as f:
|
||||
print("settings\tWER", file=f)
|
||||
for key, val in test_set_wers:
|
||||
|
@ -702,18 +702,14 @@ def save_results(
|
||||
):
|
||||
test_set_wers = dict()
|
||||
for key, results in results_dict.items():
|
||||
recog_path = (
|
||||
params.res_dir / f"recogs-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
recog_path = params.res_dir / f"recogs-{test_set_name}-{params.suffix}.txt"
|
||||
results = sorted(results)
|
||||
store_transcripts(filename=recog_path, texts=results)
|
||||
logging.info(f"The transcripts are stored in {recog_path}")
|
||||
|
||||
# The following prints out WERs, per-word error statistics and aligned
|
||||
# ref/hyp pairs.
|
||||
errs_filename = (
|
||||
params.res_dir / f"errs-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
errs_filename = params.res_dir / f"errs-{test_set_name}-{params.suffix}.txt"
|
||||
with open(errs_filename, "w") as f:
|
||||
wer = write_error_stats(
|
||||
f, f"{test_set_name}-{key}", results, enable_log=True
|
||||
@ -723,9 +719,7 @@ def save_results(
|
||||
logging.info("Wrote detailed error stats to {}".format(errs_filename))
|
||||
|
||||
test_set_wers = sorted(test_set_wers.items(), key=lambda x: x[1])
|
||||
errs_info = (
|
||||
params.res_dir / f"wer-summary-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
errs_info = params.res_dir / f"wer-summary-{test_set_name}-{params.suffix}.txt"
|
||||
with open(errs_info, "w") as f:
|
||||
print("settings\tWER", file=f)
|
||||
for key, val in test_set_wers:
|
||||
|
@ -611,18 +611,14 @@ def save_results(
|
||||
test_set_wers = dict()
|
||||
test_set_delays = dict()
|
||||
for key, results in results_dict.items():
|
||||
recog_path = (
|
||||
params.res_dir / f"recogs-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
recog_path = params.res_dir / f"recogs-{test_set_name}-{params.suffix}.txt"
|
||||
results = sorted(results)
|
||||
store_transcripts_and_timestamps(filename=recog_path, texts=results)
|
||||
logging.info(f"The transcripts are stored in {recog_path}")
|
||||
|
||||
# The following prints out WERs, per-word error statistics and aligned
|
||||
# ref/hyp pairs.
|
||||
errs_filename = (
|
||||
params.res_dir / f"errs-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
errs_filename = params.res_dir / f"errs-{test_set_name}-{params.suffix}.txt"
|
||||
with open(errs_filename, "w") as f:
|
||||
wer, mean_delay, var_delay = write_error_stats_with_timestamps(
|
||||
f, f"{test_set_name}-{key}", results, enable_log=True
|
||||
@ -633,9 +629,7 @@ def save_results(
|
||||
logging.info("Wrote detailed error stats to {}".format(errs_filename))
|
||||
|
||||
test_set_wers = sorted(test_set_wers.items(), key=lambda x: x[1])
|
||||
errs_info = (
|
||||
params.res_dir / f"wer-summary-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
errs_info = params.res_dir / f"wer-summary-{test_set_name}-{params.suffix}.txt"
|
||||
with open(errs_info, "w") as f:
|
||||
print("settings\tWER", file=f)
|
||||
for key, val in test_set_wers:
|
||||
@ -643,8 +637,7 @@ def save_results(
|
||||
|
||||
test_set_delays = sorted(test_set_delays.items(), key=lambda x: x[1][0])
|
||||
delays_info = (
|
||||
params.res_dir
|
||||
/ f"symbol-delay-summary-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
params.res_dir / f"symbol-delay-summary-{test_set_name}-{params.suffix}.txt"
|
||||
)
|
||||
with open(delays_info, "w") as f:
|
||||
print("settings\tsymbol-delay", file=f)
|
||||
|
@ -742,17 +742,13 @@ def save_results(
|
||||
):
|
||||
test_set_wers = dict()
|
||||
for key, results in results_dict.items():
|
||||
recog_path = (
|
||||
params.res_dir / f"recogs-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
recog_path = params.res_dir / f"recogs-{test_set_name}-{params.suffix}.txt"
|
||||
store_transcripts(filename=recog_path, texts=sorted(results))
|
||||
logging.info(f"The transcripts are stored in {recog_path}")
|
||||
|
||||
# The following prints out WERs, per-word error statistics and aligned
|
||||
# ref/hyp pairs.
|
||||
errs_filename = (
|
||||
params.res_dir / f"errs-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
errs_filename = params.res_dir / f"errs-{test_set_name}-{params.suffix}.txt"
|
||||
with open(errs_filename, "w") as f:
|
||||
wer = write_error_stats(
|
||||
f, f"{test_set_name}-{key}", results, enable_log=True
|
||||
@ -762,9 +758,7 @@ def save_results(
|
||||
logging.info("Wrote detailed error stats to {}".format(errs_filename))
|
||||
|
||||
test_set_wers = sorted(test_set_wers.items(), key=lambda x: x[1])
|
||||
errs_info = (
|
||||
params.res_dir / f"wer-summary-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
errs_info = params.res_dir / f"wer-summary-{test_set_name}-{params.suffix}.txt"
|
||||
with open(errs_info, "w") as f:
|
||||
print("settings\tWER", file=f)
|
||||
for key, val in test_set_wers:
|
||||
|
@ -386,17 +386,13 @@ def save_results(
|
||||
):
|
||||
test_set_wers = dict()
|
||||
for key, results in results_dict.items():
|
||||
recog_path = (
|
||||
params.res_dir / f"recogs-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
recog_path = params.res_dir / f"recogs-{test_set_name}-{params.suffix}.txt"
|
||||
store_transcripts(filename=recog_path, texts=results)
|
||||
logging.info(f"The transcripts are stored in {recog_path}")
|
||||
|
||||
# The following prints out WERs, per-word error statistics and aligned
|
||||
# ref/hyp pairs.
|
||||
errs_filename = (
|
||||
params.res_dir / f"errs-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
errs_filename = params.res_dir / f"errs-{test_set_name}-{params.suffix}.txt"
|
||||
with open(errs_filename, "w") as f:
|
||||
wer = write_error_stats(
|
||||
f, f"{test_set_name}-{key}", results, enable_log=True
|
||||
@ -406,9 +402,7 @@ def save_results(
|
||||
logging.info("Wrote detailed error stats to {}".format(errs_filename))
|
||||
|
||||
test_set_wers = sorted(test_set_wers.items(), key=lambda x: x[1])
|
||||
errs_info = (
|
||||
params.res_dir / f"wer-summary-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
errs_info = params.res_dir / f"wer-summary-{test_set_name}-{params.suffix}.txt"
|
||||
with open(errs_info, "w") as f:
|
||||
print("settings\tWER", file=f)
|
||||
for key, val in test_set_wers:
|
||||
|
@ -420,18 +420,14 @@ def save_results(
|
||||
):
|
||||
test_set_wers = dict()
|
||||
for key, results in results_dict.items():
|
||||
recog_path = (
|
||||
params.res_dir / f"recogs-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
recog_path = params.res_dir / f"recogs-{test_set_name}-{params.suffix}.txt"
|
||||
results = sorted(results)
|
||||
store_transcripts(filename=recog_path, texts=results)
|
||||
logging.info(f"The transcripts are stored in {recog_path}")
|
||||
|
||||
# The following prints out WERs, per-word error statistics and aligned
|
||||
# ref/hyp pairs.
|
||||
errs_filename = (
|
||||
params.res_dir / f"errs-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
errs_filename = params.res_dir / f"errs-{test_set_name}-{params.suffix}.txt"
|
||||
with open(errs_filename, "w") as f:
|
||||
wer = write_error_stats(
|
||||
f, f"{test_set_name}-{key}", results, enable_log=True
|
||||
@ -441,9 +437,7 @@ def save_results(
|
||||
logging.info("Wrote detailed error stats to {}".format(errs_filename))
|
||||
|
||||
test_set_wers = sorted(test_set_wers.items(), key=lambda x: x[1])
|
||||
errs_info = (
|
||||
params.res_dir / f"wer-summary-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
errs_info = params.res_dir / f"wer-summary-{test_set_name}-{params.suffix}.txt"
|
||||
with open(errs_info, "w") as f:
|
||||
print("settings\tWER", file=f)
|
||||
for key, val in test_set_wers:
|
||||
|
@ -585,18 +585,14 @@ def save_results(
|
||||
):
|
||||
test_set_wers = dict()
|
||||
for key, results in results_dict.items():
|
||||
recog_path = (
|
||||
params.res_dir / f"recogs-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
recog_path = params.res_dir / f"recogs-{test_set_name}-{params.suffix}.txt"
|
||||
results = sorted(results)
|
||||
store_transcripts(filename=recog_path, texts=results)
|
||||
logging.info(f"The transcripts are stored in {recog_path}")
|
||||
|
||||
# The following prints out WERs, per-word error statistics and aligned
|
||||
# ref/hyp pairs.
|
||||
errs_filename = (
|
||||
params.res_dir / f"errs-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
errs_filename = params.res_dir / f"errs-{test_set_name}-{params.suffix}.txt"
|
||||
with open(errs_filename, "w") as f:
|
||||
wer = write_error_stats(
|
||||
f, f"{test_set_name}-{key}", results, enable_log=True
|
||||
@ -606,9 +602,7 @@ def save_results(
|
||||
logging.info("Wrote detailed error stats to {}".format(errs_filename))
|
||||
|
||||
test_set_wers = sorted(test_set_wers.items(), key=lambda x: x[1])
|
||||
errs_info = (
|
||||
params.res_dir / f"wer-summary-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
errs_info = params.res_dir / f"wer-summary-{test_set_name}-{params.suffix}.txt"
|
||||
with open(errs_info, "w") as f:
|
||||
print("settings\tWER", file=f)
|
||||
for key, val in test_set_wers:
|
||||
|
@ -58,7 +58,6 @@ class Decoder(nn.Module):
|
||||
self.embedding = nn.Embedding(
|
||||
num_embeddings=vocab_size,
|
||||
embedding_dim=embedding_dim,
|
||||
padding_idx=blank_id,
|
||||
)
|
||||
self.blank_id = blank_id
|
||||
self.unk_id = unk_id
|
||||
|
@ -423,9 +423,7 @@ def save_results(
|
||||
):
|
||||
test_set_wers = dict()
|
||||
for key, results in results_dict.items():
|
||||
recog_path = (
|
||||
params.res_dir / f"recogs-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
recog_path = params.res_dir / f"recogs-{test_set_name}-{params.suffix}.txt"
|
||||
# sort results so we can easily compare the difference between two
|
||||
# recognition results
|
||||
results = sorted(results)
|
||||
@ -434,9 +432,7 @@ def save_results(
|
||||
|
||||
# The following prints out WERs, per-word error statistics and aligned
|
||||
# ref/hyp pairs.
|
||||
errs_filename = (
|
||||
params.res_dir / f"errs-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
errs_filename = params.res_dir / f"errs-{test_set_name}-{params.suffix}.txt"
|
||||
with open(errs_filename, "w") as f:
|
||||
wer = write_error_stats(
|
||||
f, f"{test_set_name}-{key}", results, enable_log=True
|
||||
@ -446,9 +442,7 @@ def save_results(
|
||||
logging.info("Wrote detailed error stats to {}".format(errs_filename))
|
||||
|
||||
test_set_wers = sorted(test_set_wers.items(), key=lambda x: x[1])
|
||||
errs_info = (
|
||||
params.res_dir / f"wer-summary-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
errs_info = params.res_dir / f"wer-summary-{test_set_name}-{params.suffix}.txt"
|
||||
with open(errs_info, "w") as f:
|
||||
print("settings\tWER", file=f)
|
||||
for key, val in test_set_wers:
|
||||
|
@ -609,18 +609,14 @@ def save_results(
|
||||
):
|
||||
test_set_wers = dict()
|
||||
for key, results in results_dict.items():
|
||||
recog_path = (
|
||||
params.res_dir / f"recogs-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
recog_path = params.res_dir / f"recogs-{test_set_name}-{params.suffix}.txt"
|
||||
results = sorted(results)
|
||||
store_transcripts(filename=recog_path, texts=results)
|
||||
logging.info(f"The transcripts are stored in {recog_path}")
|
||||
|
||||
# The following prints out WERs, per-word error statistics and aligned
|
||||
# ref/hyp pairs.
|
||||
errs_filename = (
|
||||
params.res_dir / f"errs-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
errs_filename = params.res_dir / f"errs-{test_set_name}-{params.suffix}.txt"
|
||||
with open(errs_filename, "w") as f:
|
||||
wer = write_error_stats(
|
||||
f, f"{test_set_name}-{key}", results, enable_log=True
|
||||
@ -630,9 +626,7 @@ def save_results(
|
||||
logging.info("Wrote detailed error stats to {}".format(errs_filename))
|
||||
|
||||
test_set_wers = sorted(test_set_wers.items(), key=lambda x: x[1])
|
||||
errs_info = (
|
||||
params.res_dir / f"wer-summary-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
errs_info = params.res_dir / f"wer-summary-{test_set_name}-{params.suffix}.txt"
|
||||
with open(errs_info, "w") as f:
|
||||
print("settings\tWER", file=f)
|
||||
for key, val in test_set_wers:
|
||||
|
@ -59,7 +59,6 @@ class Decoder(nn.Module):
|
||||
self.embedding = ScaledEmbedding(
|
||||
num_embeddings=vocab_size,
|
||||
embedding_dim=decoder_dim,
|
||||
padding_idx=blank_id,
|
||||
)
|
||||
self.blank_id = blank_id
|
||||
|
||||
|
@ -425,9 +425,7 @@ def save_results(
|
||||
):
|
||||
test_set_wers = dict()
|
||||
for key, results in results_dict.items():
|
||||
recog_path = (
|
||||
params.res_dir / f"recogs-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
recog_path = params.res_dir / f"recogs-{test_set_name}-{params.suffix}.txt"
|
||||
# sort results so we can easily compare the difference between two
|
||||
# recognition results
|
||||
results = sorted(results)
|
||||
@ -436,9 +434,7 @@ def save_results(
|
||||
|
||||
# The following prints out WERs, per-word error statistics and aligned
|
||||
# ref/hyp pairs.
|
||||
errs_filename = (
|
||||
params.res_dir / f"errs-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
errs_filename = params.res_dir / f"errs-{test_set_name}-{params.suffix}.txt"
|
||||
with open(errs_filename, "w") as f:
|
||||
wer = write_error_stats(
|
||||
f, f"{test_set_name}-{key}", results, enable_log=True
|
||||
@ -448,9 +444,7 @@ def save_results(
|
||||
logging.info("Wrote detailed error stats to {}".format(errs_filename))
|
||||
|
||||
test_set_wers = sorted(test_set_wers.items(), key=lambda x: x[1])
|
||||
errs_info = (
|
||||
params.res_dir / f"wer-summary-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
errs_info = params.res_dir / f"wer-summary-{test_set_name}-{params.suffix}.txt"
|
||||
with open(errs_info, "w") as f:
|
||||
print("settings\tWER", file=f)
|
||||
for key, val in test_set_wers:
|
||||
|
@ -869,18 +869,14 @@ def save_results(
|
||||
):
|
||||
test_set_wers = dict()
|
||||
for key, results in results_dict.items():
|
||||
recog_path = (
|
||||
params.res_dir / f"recogs-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
recog_path = params.res_dir / f"recogs-{test_set_name}-{params.suffix}.txt"
|
||||
results = sorted(results)
|
||||
store_transcripts(filename=recog_path, texts=results)
|
||||
logging.info(f"The transcripts are stored in {recog_path}")
|
||||
|
||||
# The following prints out WERs, per-word error statistics and aligned
|
||||
# ref/hyp pairs.
|
||||
errs_filename = (
|
||||
params.res_dir / f"errs-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
errs_filename = params.res_dir / f"errs-{test_set_name}-{params.suffix}.txt"
|
||||
with open(errs_filename, "w") as f:
|
||||
wer = write_error_stats(
|
||||
f, f"{test_set_name}-{key}", results, enable_log=True
|
||||
@ -890,9 +886,7 @@ def save_results(
|
||||
logging.info("Wrote detailed error stats to {}".format(errs_filename))
|
||||
|
||||
test_set_wers = sorted(test_set_wers.items(), key=lambda x: x[1])
|
||||
errs_info = (
|
||||
params.res_dir / f"wer-summary-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
errs_info = params.res_dir / f"wer-summary-{test_set_name}-{params.suffix}.txt"
|
||||
with open(errs_info, "w") as f:
|
||||
print("settings\tWER", file=f)
|
||||
for key, val in test_set_wers:
|
||||
|
@ -426,18 +426,14 @@ def save_results(
|
||||
):
|
||||
test_set_wers = dict()
|
||||
for key, results in results_dict.items():
|
||||
recog_path = (
|
||||
params.res_dir / f"recogs-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
recog_path = params.res_dir / f"recogs-{test_set_name}-{params.suffix}.txt"
|
||||
results = sorted(results)
|
||||
store_transcripts(filename=recog_path, texts=results)
|
||||
logging.info(f"The transcripts are stored in {recog_path}")
|
||||
|
||||
# The following prints out WERs, per-word error statistics and aligned
|
||||
# ref/hyp pairs.
|
||||
errs_filename = (
|
||||
params.res_dir / f"errs-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
errs_filename = params.res_dir / f"errs-{test_set_name}-{params.suffix}.txt"
|
||||
with open(errs_filename, "w") as f:
|
||||
wer = write_error_stats(
|
||||
f, f"{test_set_name}-{key}", results, enable_log=True
|
||||
@ -447,9 +443,7 @@ def save_results(
|
||||
logging.info("Wrote detailed error stats to {}".format(errs_filename))
|
||||
|
||||
test_set_wers = sorted(test_set_wers.items(), key=lambda x: x[1])
|
||||
errs_info = (
|
||||
params.res_dir / f"wer-summary-{test_set_name}-{key}-{params.suffix}.txt"
|
||||
)
|
||||
errs_info = params.res_dir / f"wer-summary-{test_set_name}-{params.suffix}.txt"
|
||||
with open(errs_info, "w") as f:
|
||||
print("settings\tWER", file=f)
|
||||
for key, val in test_set_wers:
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user