mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-08-09 18:12:19 +00:00
* Init commit for recipes trained on multiple zh datasets. * fbank extraction for thchs30 * added support for aishell1 * added support for aishell-2 * fixes * fixes * fixes * added support for stcmds and primewords * fixes * added support for magicdata script for fbank computation not done yet * added script for magicdata fbank computation * file permission fixed * updated for the wenetspeech recipe * updated * Update preprocess_kespeech.py * updated * updated * updated * updated * file permission fixed * updated paths * fixes * added support for kespeech dev/test set fbank computation * fixes for file permission * refined support for KeSpeech * added scripts for BPE model training * updated * init commit for the multi_zh-cn zipformer recipe * disable speed perturbation by default * updated * updated * added necessary files for the zipformer recipe * removed redundant wenetspeech M and S sets * updates for multi dataset decoding * refined * formatting issues fixed * updated * minor fixes * this commit finalize the recipe (hopefully) * fixed formatting issues * minor fixes * updated * using soft links to reduce redundancy * minor updates * using soft links to reduce redundancy * minor updates * minor updates * using soft links to reduce redundancy * minor updates * Update README.md * minor updates * Update egs/multi_zh-hans/ASR/local/compute_fbank_magicdata.py Co-authored-by: Fangjun Kuang <csukuangfj@gmail.com> * Update egs/multi_zh-hans/ASR/local/compute_fbank_magicdata.py Co-authored-by: Fangjun Kuang <csukuangfj@gmail.com> * Update egs/multi_zh-hans/ASR/local/compute_fbank_stcmds.py Co-authored-by: Fangjun Kuang <csukuangfj@gmail.com> * Update egs/multi_zh-hans/ASR/local/compute_fbank_stcmds.py Co-authored-by: Fangjun Kuang <csukuangfj@gmail.com> * Update egs/multi_zh-hans/ASR/local/compute_fbank_primewords.py Co-authored-by: Fangjun Kuang <csukuangfj@gmail.com> * Update egs/multi_zh-hans/ASR/local/compute_fbank_primewords.py Co-authored-by: Fangjun Kuang <csukuangfj@gmail.com> * minor updates * minor fixes * fixed a formatting issue * Update preprocess_kespeech.py * Update prepare.sh * Update egs/multi_zh-hans/ASR/local/compute_fbank_kespeech_splits.py Co-authored-by: Fangjun Kuang <csukuangfj@gmail.com> * Update egs/multi_zh-hans/ASR/local/preprocess_kespeech.py Co-authored-by: Fangjun Kuang <csukuangfj@gmail.com> * removed redundant files * symlinks added * minor updates * added CI tests for `multi_zh-hans` * minor fixes * Update run-multi-zh_hans-zipformer.sh * Update run-multi-zh_hans-zipformer.sh * Update run-multi-zh_hans-zipformer.sh * Update run-multi-zh_hans-zipformer.sh * Update run-multi-zh_hans-zipformer.sh * Update run-multi-zh_hans-zipformer.sh * Update run-multi-zh_hans-zipformer.sh --------- Co-authored-by: Fangjun Kuang <csukuangfj@gmail.com>
52 lines
1.2 KiB
Bash
Executable File
52 lines
1.2 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
|
|
set -e
|
|
|
|
log() {
|
|
# This function is from espnet
|
|
local fname=${BASH_SOURCE[1]##*/}
|
|
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
|
|
}
|
|
|
|
cd egs/multi_zh-hans/ASR
|
|
|
|
repo_url=https://huggingface.co/zrjin/icefall-asr-multi-zh-hans-zipformer-2023-9-2/
|
|
|
|
log "Downloading pre-trained model from $repo_url"
|
|
git lfs install
|
|
git clone $repo_url
|
|
repo=$(basename $repo_url)
|
|
|
|
|
|
log "Display test files"
|
|
tree $repo/
|
|
ls -lh $repo/test_wavs/*.wav
|
|
|
|
pushd $repo/exp
|
|
ln -s epoch-20.pt epoch-99.pt
|
|
popd
|
|
|
|
ls -lh $repo/exp/*.pt
|
|
|
|
|
|
./zipformer/pretrained.py \
|
|
--checkpoint $repo/exp/epoch-99.pt \
|
|
--tokens $repo/data/lang_bpe_2000/tokens.txt \
|
|
--method greedy_search \
|
|
$repo/test_wavs/DEV_T0000000000.wav \
|
|
$repo/test_wavs/DEV_T0000000001.wav \
|
|
$repo/test_wavs/DEV_T0000000002.wav
|
|
|
|
for method in modified_beam_search fast_beam_search; do
|
|
log "$method"
|
|
|
|
./zipformer/pretrained.py \
|
|
--method $method \
|
|
--beam-size 4 \
|
|
--checkpoint $repo/exp/epoch-99.pt \
|
|
--tokens $repo/data/lang_bpe_2000/tokens.txt \
|
|
$repo/test_wavs/DEV_T0000000000.wav \
|
|
$repo/test_wavs/DEV_T0000000001.wav \
|
|
$repo/test_wavs/DEV_T0000000002.wav
|
|
done
|