Upload matcha tts models for ljspeech

2025-08-09 10:02:22 +00:00 · 2025-01-02 09:52:05 +08:00 · 2025-01-02 09:52:05 +08:00 · 1761b21f06
commit 1761b21f06
parent bfffda5afb
3 changed files with 103 additions and 0 deletions
--- a/.github/scripts/ljspeech/TTS/run-matcha.sh
+++ b/.github/scripts/ljspeech/TTS/run-matcha.sh
@ -115,6 +115,37 @@ function export_onnx() {
  ls -lh /icefall/*.wav
  soxi /icefall/generated-matcha-tts-steps-6-*.wav
  cp ./model-steps-*.onnx /icefall
  d=matcha-icefall-en_US-ljspeech
  mkdir $d
  cp -v data/tokens.txt $d
  cp model-steps-3.onnx $d
  pushd $d
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/espeak-ng-data.tar.bz2
  tar xf espeak-ng-data.tar.bz2
  rm espeak-ng-data.tar.bz2
 cat >README.md <<EOF
 # Introduction
 This model is trained using the dataset from
 https://keithito.com/LJ-Speech-Dataset/
 The dataset contains only 1 female speaker.
 You can find the training code at
 https://github.com/k2-fsa/icefall/tree/master/egs/ljspeech/TTS#matcha
 EOF
  ls -lh
  popd
  tar cvjf $d.tar.bz2 $d
  mv $d.tar.bz2 /icefall
  mv $d /icefall
 }
 prepare_data
--- a/.github/workflows/ljspeech.yml
+++ b/.github/workflows/ljspeech.yml
@ -94,3 +94,71 @@ jobs:
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: tts-models
      - uses: actions/upload-artifact@v4
        if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0'
        with:
          name: generated-test-files-${{ matrix.python-version }}-${{ matrix.torch-version }}
          path: ./*.wav
      - uses: actions/upload-artifact@v4
        if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0'
        with:
          name: step-2
          path: ./model-steps-2.onnx
      - uses: actions/upload-artifact@v4
        if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0'
        with:
          name: step-3
          path: ./model-steps-3.onnx
      - uses: actions/upload-artifact@v4
        if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0'
        with:
          name: step-4
          path: ./model-steps-4.onnx
      - uses: actions/upload-artifact@v4
        if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0'
        with:
          name: step-5
          path: ./model-steps-5.onnx
      - uses: actions/upload-artifact@v4
        if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0'
        with:
          name: step-6
          path: ./model-steps-6.onnx
      - name: Upload models to huggingface
        if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0' && github.event_name == 'push'
        shell: bash
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        run: |
          d=matcha-icefall-en_US-ljspeech
          GIT_LFS_SKIP_SMUDGE=1  git clone https://huggingface.co/csukuangfj/$d hf
          cp -av $d/* hf/
          pushd hf
          git add .
          git config --global user.name "csukuangfj"
          git config --global user.email "csukuangfj@gmail.com"
          git config --global lfs.allowincompletepush true
          git commit -m "upload model" && git push https://csukuangfj:${HF_TOKEN}@huggingface.co/csukuangfj/$d main || true
          popd
      - name: Release exported onnx models
        if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0' && github.event_name == 'push'
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          overwrite: true
          file: matcha-icefall-*.tar.bz2
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: tts-models
--- a/egs/ljspeech/TTS/matcha/export_onnx.py
+++ b/egs/ljspeech/TTS/matcha/export_onnx.py
@ -176,12 +176,16 @@ def main():
            "language": "English",
            "voice": "en-us",
            "has_espeak": 1,
            "jieba": 0,
            "n_speakers": 1,
            "sample_rate": 22050,
            "version": 1,
            "pad_id": params.pad_id,
            "model_author": "icefall",
            "maintainer": "k2-fsa",
            "use_eos_bos": 0,
            "dataset": "LJ Speech",
            "dataset_url": "https://keithito.com/LJ-Speech-Dataset/",
            "num_ode_steps": num_steps,
        }
        add_meta_data(filename=filename, meta_data=meta_data)