diff --git a/.github/scripts/ljspeech/TTS/run-matcha.sh b/.github/scripts/ljspeech/TTS/run-matcha.sh index e69de29bb..b1da5ff13 100755 --- a/.github/scripts/ljspeech/TTS/run-matcha.sh +++ b/.github/scripts/ljspeech/TTS/run-matcha.sh @@ -0,0 +1,116 @@ +#!/usr/bin/env bash + +set -ex + +sudo apt-get install sox + +python3 -m pip install piper_phonemize -f https://k2-fsa.github.io/icefall/piper_phonemize.html +python3 -m pip install espnet_tts_frontend +python3 -m pip install numba + +pytnon3 -m pip install conformer==0.3.2 diffusers + +log() { + # This function is from espnet + local fname=${BASH_SOURCE[1]##*/} + echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" +} + +cd egs/ljspeech/TTS + +sed -i.bak s/600/8/g ./prepare.sh +sed -i.bak s/"first 100"/"first 3"/g ./prepare.sh +sed -i.bak s/500/5/g ./prepare.sh +git diff + +function prepare_data() { + # We have created a subset of the data for testing + # + mkdir download + pushd download + wget -q https://huggingface.co/csukuangfj/ljspeech-subset-for-ci-test/resolve/main/LJSpeech-1.1.tar.bz2 + tar xvf LJSpeech-1.1.tar.bz2 + popd + + ./prepare.sh + tree . +} + +function train() { + pushd ./vits + sed -i.bak s/1500/3/g ./train.py + git diff . + popd + + ./vits/train.py \ + --exp-dir matcha/exp \ + --num-epochs 1 \ + --save-every-n 1 \ + --num-buckets 2 \ + --tokens data/tokens.txt \ + --max-duration 20 + + ls -lh match/exp + done +} + +function infer() { + + curl -SL -O https://github.com/csukuangfj/models/raw/refs/heads/master/hifigan/generator_v1 + + ./matcha/inference.py \ + --epoch 1 \ + --exp-dir ./matcha/exp \ + --tokens data/tokens.txt \ + --vocoder ./generator_v1 \ + --input-text "how are you doing?" + --output-wav ./generated.wav + + ls -lh *.wav + soxi ./generated.wav + rm -v ./generated.wav + rm -v generator_v1 +} + +function export_onnx() { + pushd matcha/exp + + curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-ljspeech-matcha-en-2024-10-28/resolve/main/exp/epoch-4000.pt + curl -SL -O https://github.com/csukuangfj/models/raw/refs/heads/master/hifigan/generator_v1 + curl -SL -O https://github.com/csukuangfj/models/raw/refs/heads/master/hifigan/generator_v2 + curl -SL -O https://github.com/csukuangfj/models/raw/refs/heads/master/hifigan/generator_v3 + + popd + + pushd data/fbank + rm -v *.json + curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-ljspeech-matcha-en-2024-10-28/resolve/main/data/cmvn.json + popd + + ./matcha/export_onnx.py \ + --exp-dir ./matcha/exp-new-3 \ + --epoch 4000 \ + --tokens ./data/tokens.txt \ + --cmvn ./data/fbank/cmvn.json + + ls -lh *.onnx + + python3 ./matcha/export_onnx_hifigan.py + + ls -lh *.onnx + + python3 ./matcha/onnx_pretrained.py \ + --acoustic-model ./model-steps-6.onnx \ + --vocoder ./hifigan_v1.onnx \ + --tokens ./data/tokens.txt \ + --input-text "how are you doing?" \ + --output-wav /icefall/generated-matcha-tts-6.wav + + ls -lh /icefall/*.wav + soxi /icefall/generated-matcha-tts-6.wav +} + +prepare_data +train +infer +export_onnx diff --git a/.github/workflows/ljspeech.yml b/.github/workflows/ljspeech.yml index e202d21b5..aaca73032 100644 --- a/.github/workflows/ljspeech.yml +++ b/.github/workflows/ljspeech.yml @@ -70,6 +70,7 @@ jobs: cd /icefall git config --global --add safe.directory /icefall + .github/scripts/ljspeech/TTS/run-matcha.sh .github/scripts/ljspeech/TTS/run.sh - name: display files