Publish MatchaTTS onnx models trained with LJSpeech to huggingface (#1854)

2025-01-02 15:54:34 +08:00 · 2025-01-02 15:54:34 +08:00 · 3b263539cd
commit 3b263539cd
parent bfffda5afb
5 changed files with 118 additions and 4 deletions
--- a/.github/scripts/docker/Dockerfile
+++ b/.github/scripts/docker/Dockerfile
@ -49,7 +49,7 @@ RUN pip install --no-cache-dir \
      kaldifst \
      kaldilm \
      librosa \
-      matplotlib \
+      "matplotlib<=3.9.4" \
      multi_quantization \
      numba \
      "numpy<2.0" \
--- a/.github/scripts/ljspeech/TTS/run-matcha.sh
+++ b/.github/scripts/ljspeech/TTS/run-matcha.sh
@ -77,7 +77,7 @@ function export_onnx() {
  popd

  pushd data/fbank
-  rm -v *.json
+  rm -fv *.json
  curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-ljspeech-matcha-en-2024-10-28/resolve/main/data/cmvn.json
  popd

@ -115,6 +115,37 @@ function export_onnx() {

  ls -lh /icefall/*.wav
  soxi /icefall/generated-matcha-tts-steps-6-*.wav
+
+  cp ./model-steps-*.onnx /icefall
+
+  d=matcha-icefall-en_US-ljspeech
+  mkdir $d
+  cp -v data/tokens.txt $d
+  cp model-steps-3.onnx $d
+  pushd $d
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/espeak-ng-data.tar.bz2
+  tar xf espeak-ng-data.tar.bz2
+  rm espeak-ng-data.tar.bz2
+
+cat >README.md <<EOF
+# Introduction
+
+This model is trained using the dataset from
+https://keithito.com/LJ-Speech-Dataset/
+
+The dataset contains only 1 female speaker.
+
+You can find the training code at
+https://github.com/k2-fsa/icefall/tree/master/egs/ljspeech/TTS#matcha
+EOF
+
+  ls -lh
+
+  popd
+
+  tar cvjf $d.tar.bz2 $d
+  mv $d.tar.bz2 /icefall
+  mv $d /icefall
 }

 prepare_data
--- a/.github/workflows/ljspeech.yml
+++ b/.github/workflows/ljspeech.yml
@ -30,8 +30,8 @@ jobs:
        id: set-matrix
        run: |
          # outputting for debugging purposes
-          python ./.github/scripts/docker/generate_build_matrix.py
-          MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py)
+          python ./.github/scripts/docker/generate_build_matrix.py --min-torch-version "2.3"
+          MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py --min-torch-version "2.3")
          echo "::set-output name=matrix::${MATRIX}"

  ljspeech:
@ -70,6 +70,10 @@ jobs:
              cd /icefall
              git config --global --add safe.directory /icefall

+              pip install "matplotlib<=3.9.4"
+
+              pip list
+
              .github/scripts/ljspeech/TTS/run-matcha.sh
              .github/scripts/ljspeech/TTS/run.sh

@ -94,3 +98,69 @@ jobs:
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: tts-models
+
+      - uses: actions/upload-artifact@v4
+        if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0'
+        with:
+          name: step-2
+          path: ./model-steps-2.onnx
+
+      - uses: actions/upload-artifact@v4
+        if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0'
+        with:
+          name: step-3
+          path: ./model-steps-3.onnx
+
+      - uses: actions/upload-artifact@v4
+        if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0'
+        with:
+          name: step-4
+          path: ./model-steps-4.onnx
+
+      - uses: actions/upload-artifact@v4
+        if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0'
+        with:
+          name: step-5
+          path: ./model-steps-5.onnx
+
+      - uses: actions/upload-artifact@v4
+        if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0'
+        with:
+          name: step-6
+          path: ./model-steps-6.onnx
+
+      - name: Upload models to huggingface
+        if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0'
+        shell: bash
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        run: |
+          d=matcha-icefall-en_US-ljspeech
+
+          GIT_LFS_SKIP_SMUDGE=1  git clone https://huggingface.co/csukuangfj/$d hf
+          cp -av $d/* hf/
+
+          pushd hf
+
+          git lfs track "cmn_dict"
+          git lfs track "ru_dict"
+
+          git add .
+
+          git config --global user.name "csukuangfj"
+          git config --global user.email "csukuangfj@gmail.com"
+          git config --global lfs.allowincompletepush true
+
+          git commit -m "upload model" && git push https://csukuangfj:${HF_TOKEN}@huggingface.co/csukuangfj/$d main || true
+          popd
+
+      - name: Release exported onnx models
+        if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0'
+        uses: svenstaro/upload-release-action@v2
+        with:
+          file_glob: true
+          overwrite: true
+          file: matcha-icefall-*.tar.bz2
+          repo_name: k2-fsa/sherpa-onnx
+          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
+          tag: tts-models
--- a/egs/ljspeech/TTS/README.md
+++ b/egs/ljspeech/TTS/README.md
@ -176,6 +176,15 @@ The above command generates the following files:

 where the 2 in `model-steps-2.onnx` means it uses 2 steps for the ODE solver.

+**HINT**: If you get the following error while running `export_onnx.py`:
+
+```
+torch.onnx.errors.UnsupportedOperatorError: Exporting the operator
+'aten::scaled_dot_product_attention' to ONNX opset version 14 is not supported.
+```
+
+please use `torch>=2.2.0`.
+

 To export the Hifigan vocoder to onnx, please use:

--- a/egs/ljspeech/TTS/matcha/export_onnx.py
+++ b/egs/ljspeech/TTS/matcha/export_onnx.py
@ -176,12 +176,16 @@ def main():
            "language": "English",
            "voice": "en-us",
            "has_espeak": 1,
+            "jieba": 0,
            "n_speakers": 1,
            "sample_rate": 22050,
            "version": 1,
+            "pad_id": tokenizer.pad_id,
            "model_author": "icefall",
            "maintainer": "k2-fsa",
+            "use_eos_bos": 1,
            "dataset": "LJ Speech",
+            "dataset_url": "https://keithito.com/LJ-Speech-Dataset/",
            "num_ode_steps": num_steps,
        }
        add_meta_data(filename=filename, meta_data=meta_data)