mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-08-08 09:32:20 +00:00
158 lines
4.1 KiB
Bash
Executable File
158 lines
4.1 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
|
|
set -ex
|
|
|
|
python3 -m pip install piper_phonemize -f https://k2-fsa.github.io/icefall/piper_phonemize.html
|
|
python3 -m pip install espnet_tts_frontend
|
|
python3 -m pip install numba
|
|
|
|
log() {
|
|
# This function is from espnet
|
|
local fname=${BASH_SOURCE[1]##*/}
|
|
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
|
|
}
|
|
|
|
cd egs/ljspeech/TTS
|
|
|
|
sed -i.bak s/600/8/g ./prepare.sh
|
|
sed -i.bak s/"first 100"/"first 3"/g ./prepare.sh
|
|
sed -i.bak s/500/5/g ./prepare.sh
|
|
git diff
|
|
|
|
function prepare_data() {
|
|
# We have created a subset of the data for testing
|
|
#
|
|
mkdir -p download
|
|
pushd download
|
|
wget -q https://huggingface.co/csukuangfj/ljspeech-subset-for-ci-test/resolve/main/LJSpeech-1.1.tar.bz2
|
|
tar xvf LJSpeech-1.1.tar.bz2
|
|
popd
|
|
|
|
./prepare.sh
|
|
tree .
|
|
}
|
|
|
|
function train() {
|
|
pushd ./vits
|
|
sed -i.bak s/200/3/g ./train.py
|
|
git diff .
|
|
popd
|
|
|
|
for t in low medium high; do
|
|
./vits/train.py \
|
|
--exp-dir vits/exp-$t \
|
|
--model-type $t \
|
|
--num-epochs 1 \
|
|
--save-every-n 1 \
|
|
--num-buckets 2 \
|
|
--tokens data/tokens.txt \
|
|
--max-duration 20
|
|
|
|
ls -lh vits/exp-$t
|
|
done
|
|
}
|
|
|
|
function infer() {
|
|
for t in low medium high; do
|
|
./vits/infer.py \
|
|
--num-buckets 2 \
|
|
--model-type $t \
|
|
--epoch 1 \
|
|
--exp-dir ./vits/exp-$t \
|
|
--tokens data/tokens.txt \
|
|
--max-duration 20
|
|
done
|
|
}
|
|
|
|
function export_onnx() {
|
|
for t in low medium high; do
|
|
./vits/export-onnx.py \
|
|
--model-type $t \
|
|
--epoch 1 \
|
|
--exp-dir ./vits/exp-$t \
|
|
--tokens data/tokens.txt
|
|
|
|
ls -lh vits/exp-$t/
|
|
done
|
|
}
|
|
|
|
function test_medium() {
|
|
git clone https://huggingface.co/csukuangfj/icefall-tts-ljspeech-vits-medium-2024-03-12
|
|
|
|
./vits/export-onnx.py \
|
|
--model-type medium \
|
|
--epoch 820 \
|
|
--exp-dir ./icefall-tts-ljspeech-vits-medium-2024-03-12/exp \
|
|
--tokens ./icefall-tts-ljspeech-vits-medium-2024-03-12/data/tokens.txt
|
|
|
|
ls -lh ./icefall-tts-ljspeech-vits-medium-2024-03-12/exp
|
|
|
|
./vits/test_onnx.py \
|
|
--model-filename ./icefall-tts-ljspeech-vits-medium-2024-03-12/exp/vits-epoch-820.onnx \
|
|
--tokens ./icefall-tts-ljspeech-vits-medium-2024-03-12/data/tokens.txt \
|
|
--output-filename /icefall/test-medium.wav
|
|
|
|
ls -lh /icefall/test-medium.wav
|
|
|
|
d=/icefall/vits-icefall-en_US-ljspeech-medium
|
|
mkdir $d
|
|
cp -v ./icefall-tts-ljspeech-vits-medium-2024-03-12/data/tokens.txt $d/
|
|
cp -v ./icefall-tts-ljspeech-vits-medium-2024-03-12/exp/vits-epoch-820.onnx $d/model.onnx
|
|
|
|
rm -rf icefall-tts-ljspeech-vits-medium-2024-03-12
|
|
|
|
pushd $d
|
|
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/espeak-ng-data.tar.bz2
|
|
tar xf espeak-ng-data.tar.bz2
|
|
rm espeak-ng-data.tar.bz2
|
|
cd ..
|
|
tar cjf vits-icefall-en_US-ljspeech-medium.tar.bz2 vits-icefall-en_US-ljspeech-medium
|
|
rm -rf vits-icefall-en_US-ljspeech-medium
|
|
ls -lh *.tar.bz2
|
|
popd
|
|
}
|
|
|
|
function test_low() {
|
|
git clone https://huggingface.co/csukuangfj/icefall-tts-ljspeech-vits-low-2024-03-12
|
|
|
|
./vits/export-onnx.py \
|
|
--model-type low \
|
|
--epoch 1600 \
|
|
--exp-dir ./icefall-tts-ljspeech-vits-low-2024-03-12/exp \
|
|
--tokens ./icefall-tts-ljspeech-vits-low-2024-03-12/data/tokens.txt
|
|
|
|
ls -lh ./icefall-tts-ljspeech-vits-low-2024-03-12/exp
|
|
|
|
./vits/test_onnx.py \
|
|
--model-filename ./icefall-tts-ljspeech-vits-low-2024-03-12/exp/vits-epoch-1600.onnx \
|
|
--tokens ./icefall-tts-ljspeech-vits-low-2024-03-12/data/tokens.txt \
|
|
--output-filename /icefall/test-low.wav
|
|
|
|
ls -lh /icefall/test-low.wav
|
|
|
|
d=/icefall/vits-icefall-en_US-ljspeech-low
|
|
mkdir $d
|
|
cp -v ./icefall-tts-ljspeech-vits-low-2024-03-12/data/tokens.txt $d/
|
|
cp -v ./icefall-tts-ljspeech-vits-low-2024-03-12/exp/vits-epoch-1600.onnx $d/model.onnx
|
|
|
|
rm -rf icefall-tts-ljspeech-vits-low-2024-03-12
|
|
|
|
pushd $d
|
|
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/espeak-ng-data.tar.bz2
|
|
tar xf espeak-ng-data.tar.bz2
|
|
rm espeak-ng-data.tar.bz2
|
|
cd ..
|
|
tar cjf vits-icefall-en_US-ljspeech-low.tar.bz2 vits-icefall-en_US-ljspeech-low
|
|
rm -rf vits-icefall-en_US-ljspeech-low
|
|
ls -lh *.tar.bz2
|
|
popd
|
|
}
|
|
|
|
prepare_data
|
|
train
|
|
infer
|
|
export_onnx
|
|
rm -rf vits/exp-{low,medium,high}
|
|
test_medium
|
|
test_low
|