diff --git a/.github/scripts/baker_zh/TTS/run-matcha.sh b/.github/scripts/baker_zh/TTS/run-matcha.sh index f5aca7c9c..a16a4a67e 100755 --- a/.github/scripts/baker_zh/TTS/run-matcha.sh +++ b/.github/scripts/baker_zh/TTS/run-matcha.sh @@ -37,4 +37,112 @@ function prepare_data() { tree . } +function train() { + pushd ./matcha + sed -i.bak s/1500/3/g ./train.py + git diff . + popd + + ./matcha/train.py \ + --exp-dir matcha/exp \ + --num-epochs 1 \ + --save-every-n 1 \ + --num-buckets 2 \ + --tokens data/tokens.txt \ + --max-duration 20 + + ls -lh matcha/exp +} + +function infer() { + curl -SL -O https://github.com/csukuangfj/models/raw/refs/heads/master/hifigan/generator_v2 + + ./matcha/infer.py \ + --num-buckets 2 \ + --epoch 1 \ + --exp-dir ./matcha/exp \ + --tokens data/tokens.txt \ + --cmvn ./data/fbank/cmvn.json \ + --vocoder ./generator_v2 \ + --input-text "当夜幕降临,星光点点,伴随着微风拂面,我在静谧中感受着时光的流转,思念如涟漪荡漾,梦境如画卷展开,我与自然融为一体,沉静在这片宁静的美丽之中,感受着生命的奇迹与温柔。" \ + --output-wav ./generated.wav + + ls -lh *.wav + soxi ./generated.wav + rm -v ./generated.wav + rm -v generator_v1 +} + +function export_onnx() { + pushd matcha/exp + curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-baker-matcha-zh-2024-12-27/resolve/main/epoch-2000.pt + popd + + pushd data/fbank + rm -v *.json + curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-baker-matcha-zh-2024-12-27/resolve/main/cmvn.json + popd + + ./matcha/export_onnx.py \ + --exp-dir ./matcha/exp \ + --epoch 2000 \ + --tokens ./data/tokens.txt \ + --cmvn ./data/fbank/cmvn.json + + ls -lh *.onnx + + if false; then + # THe CI machine does not have enough memory to run it + # + curl -SL -O https://github.com/csukuangfj/models/raw/refs/heads/master/hifigan/generator_v1 + curl -SL -O https://github.com/csukuangfj/models/raw/refs/heads/master/hifigan/generator_v2 + curl -SL -O https://github.com/csukuangfj/models/raw/refs/heads/master/hifigan/generator_v3 + python3 ./matcha/export_onnx_hifigan.py + else + curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-ljspeech-matcha-en-2024-10-28/resolve/main/exp/hifigan_v1.onnx + curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-ljspeech-matcha-en-2024-10-28/resolve/main/exp/hifigan_v2.onnx + curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-ljspeech-matcha-en-2024-10-28/resolve/main/exp/hifigan_v3.onnx + fi + + ls -lh *.onnx + + for v in v1 v2 v3; do + python3 ./matcha/onnx_pretrained.py \ + --acoustic-model ./model-steps-6.onnx \ + --vocoder ./hifigan_$v.onnx \ + --tokens ./data/tokens.txt \ + --input-text "当夜幕降临,星光点点,伴随着微风拂面,我在静谧中感受着时光的流转,思念如涟漪荡漾,梦境如画卷展开,我与自然融为一体,沉静在这片宁静的美丽之中,感受着生命的奇迹与温柔。" \ + --output-wav /icefall/generated-matcha-tts-steps-6-$v.wav + done + + ls -lh /icefall/*.wav + soxi /icefall/generated-matcha-tts-steps-6-*.wav + cp ./model-steps-*.onnx /icefall + + d=matcha-icefall-zh-baker + mkdir $d + cp -v data/tokens.txt $d + cp model-steps-3.onnx $d + pushd $d + curl -SL -O https://github.com/csukuangfj/cppjieba/releases/download/sherpa-onnx-2024-04-19/dict.tar.bz2 + tar xvf dict.tar.bz2 + rm dict.tar.bz2 + + curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-aishell3-vits-low-2024-04-06/resolve/main/data/date.fst + curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-aishell3-vits-low-2024-04-06/resolve/main/data/number.fst + curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-aishell3-vits-low-2024-04-06/resolve/main/data/phone.fst + + ls -lh + popd + tar cvjf $d.tar.bz2 $d + rm -rf $d + mv $d.tar.bz2 /icefall +} + prepare_data +train +infer +export_onnx + +rm -rfv generator_v* matcha/exp +git checkout . diff --git a/.github/workflows/baker_zh.yml b/.github/workflows/baker_zh.yml index 4aa957629..1d69446a0 100644 --- a/.github/workflows/baker_zh.yml +++ b/.github/workflows/baker_zh.yml @@ -79,20 +79,48 @@ jobs: ls -lh - uses: actions/upload-artifact@v4 - # if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0' - if: false + if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0' with: name: generated-test-files-${{ matrix.python-version }}-${{ matrix.torch-version }} path: ./*.wav + - uses: actions/upload-artifact@v4 + if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0' + with: + name: step-2 + path: ./model-steps-2.onnx + + - uses: actions/upload-artifact@v4 + if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0' + with: + name: step-3 + path: ./model-steps-3.onnx + + - uses: actions/upload-artifact@v4 + if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0' + with: + name: step-4 + path: ./model-steps-4.onnx + + - uses: actions/upload-artifact@v4 + if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0' + with: + name: step-5 + path: ./model-steps-5.onnx + + - uses: actions/upload-artifact@v4 + if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0' + with: + name: step-6 + path: ./model-steps-6.onnx + - name: Release exported onnx models - # if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0' && github.event_name == 'push' - if: false + if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0' && github.event_name == 'push' uses: svenstaro/upload-release-action@v2 with: file_glob: true overwrite: true - file: vits-icefall-*.tar.bz2 + file: matcha-icefall-*.tar.bz2 repo_name: k2-fsa/sherpa-onnx repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }} tag: tts-models diff --git a/egs/baker_zh/TTS/README.md b/egs/baker_zh/TTS/README.md index 67241ca19..5a6413b3c 100644 --- a/egs/baker_zh/TTS/README.md +++ b/egs/baker_zh/TTS/README.md @@ -97,6 +97,9 @@ The above command generates 3 files: - hifigan_v2.onnx - hifigan_v3.onnx +**HINT**: You can download pre-exported hifigan ONNX models from + + To use the generated onnx files to generate speech from text, please run: ```bash