From a3dd431ed0b0eb2b65fe9700d0ffaff134b7dd07 Mon Sep 17 00:00:00 2001
From: Fangjun Kuang <csukuangfj@gmail.com>
Date: Mon, 30 Dec 2024 16:39:06 +0800
Subject: [PATCH] Add CI

---
 .github/scripts/baker_zh/TTS/run-matcha.sh | 39 +++++++++
 .github/workflows/baker_zh.yml             | 98 ++++++++++++++++++++++
 egs/ljspeech/TTS/matcha/export_onnx.py     | 11 ++-
 egs/ljspeech/TTS/matcha/onnx_pretrained.py |  4 +-
 4 files changed, 144 insertions(+), 8 deletions(-)
 create mode 100755 .github/scripts/baker_zh/TTS/run-matcha.sh
 create mode 100644 .github/workflows/baker_zh.yml

diff --git a/.github/scripts/baker_zh/TTS/run-matcha.sh b/.github/scripts/baker_zh/TTS/run-matcha.sh
new file mode 100755
index 000000000..afa46fd98
--- /dev/null
+++ b/.github/scripts/baker_zh/TTS/run-matcha.sh
@@ -0,0 +1,39 @@
+#!/usr/bin/env bash
+
+set -ex
+
+apt-get update
+apt-get install -y sox
+
+python3 -m pip install numba conformer==0.3.2 diffusers librosa
+
+
+log() {
+  # This function is from espnet
+  local fname=${BASH_SOURCE[1]##*/}
+  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
+}
+
+cd egs/baker_zh/TTS
+
+sed -i.bak s/600/8/g ./prepare.sh
+sed -i.bak s/"first 100"/"first 3"/g ./prepare.sh
+sed -i.bak s/500/5/g ./prepare.sh
+git diff
+
+function prepare_data() {
+  # We have created a subset of the data for testing
+  #
+  mkdir -p download
+  pushd download
+  wget -q https://huggingface.co/csukuangfj/tmp-files/resolve/main/BZNSYP-samples.tar.bz2
+  tar xvf BZNSYP-samples.tar.bz2
+  mv BZNSYP-samples BZNSYP
+  rmBZNSYP-samples.tar.bz2
+  popd
+
+  ./prepare.sh
+  tree .
+}
+
+prepare_data
diff --git a/.github/workflows/baker_zh.yml b/.github/workflows/baker_zh.yml
new file mode 100644
index 000000000..4aa957629
--- /dev/null
+++ b/.github/workflows/baker_zh.yml
@@ -0,0 +1,98 @@
+name: baker_zh
+
+on:
+  push:
+    branches:
+      - master
+      - baker-matcha-2
+
+  pull_request:
+    branches:
+      - master
+
+  workflow_dispatch:
+
+concurrency:
+  group: ljspeech-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  generate_build_matrix:
+    if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa'
+    # see https://github.com/pytorch/pytorch/pull/50633
+    runs-on: ubuntu-latest
+    outputs:
+      matrix: ${{ steps.set-matrix.outputs.matrix }}
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+      - name: Generating build matrix
+        id: set-matrix
+        run: |
+          # outputting for debugging purposes
+          python ./.github/scripts/docker/generate_build_matrix.py
+          MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py)
+          echo "::set-output name=matrix::${MATRIX}"
+
+  baker_zh:
+    needs: generate_build_matrix
+    name: py${{ matrix.python-version }} torch${{ matrix.torch-version }} v${{ matrix.version }}
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        ${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}
+
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Free space
+        shell: bash
+        run: |
+          ls -lh
+          df -h
+          rm -rf /opt/hostedtoolcache
+          df -h
+          echo "pwd: $PWD"
+          echo "github.workspace ${{ github.workspace }}"
+
+      - name: Run tests
+        uses: addnab/docker-run-action@v3
+        with:
+            image: ghcr.io/${{ github.repository_owner }}/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-v${{ matrix.version }}
+            options: |
+              --volume ${{ github.workspace }}/:/icefall
+            shell: bash
+            run: |
+              export PYTHONPATH=/icefall:$PYTHONPATH
+              cd /icefall
+              git config --global --add safe.directory /icefall
+
+              .github/scripts/baker_zh/TTS/run-matcha.sh
+
+      - name: display files
+        shell: bash
+        run: |
+          ls -lh
+
+      - uses: actions/upload-artifact@v4
+        # if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0'
+        if: false
+        with:
+          name: generated-test-files-${{ matrix.python-version }}-${{ matrix.torch-version }}
+          path: ./*.wav
+
+      - name: Release exported onnx models
+        # if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0' && github.event_name == 'push'
+        if: false
+        uses: svenstaro/upload-release-action@v2
+        with:
+          file_glob: true
+          overwrite: true
+          file: vits-icefall-*.tar.bz2
+          repo_name: k2-fsa/sherpa-onnx
+          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
+          tag: tts-models
diff --git a/egs/ljspeech/TTS/matcha/export_onnx.py b/egs/ljspeech/TTS/matcha/export_onnx.py
index 623517431..39709cc36 100755
--- a/egs/ljspeech/TTS/matcha/export_onnx.py
+++ b/egs/ljspeech/TTS/matcha/export_onnx.py
@@ -93,14 +93,14 @@ class ModelWrapper(torch.nn.Module):
         self,
         x: torch.Tensor,
         x_lengths: torch.Tensor,
-        temperature: torch.Tensor,
+        noise_scale: torch.Tensor,
         length_scale: torch.Tensor,
     ) -> torch.Tensor:
         """
         Args: :
           x: (batch_size, num_tokens), torch.int64
           x_lengths: (batch_size,), torch.int64
-          temperature: (1,), torch.float32
+          noise_scale: (1,), torch.float32
           length_scale (1,), torch.float32
         Returns:
           audio: (batch_size, num_samples)
@@ -110,7 +110,7 @@ class ModelWrapper(torch.nn.Module):
             x=x,
             x_lengths=x_lengths,
             n_timesteps=self.num_steps,
-            temperature=temperature,
+            temperature=noise_scale,
             length_scale=length_scale,
         )["mel"]
         # mel: (batch_size, feat_dim, num_frames)
@@ -127,7 +127,6 @@ def main():
     params.update(vars(args))
 
     tokenizer = Tokenizer(params.tokens)
-    params.blank_id = tokenizer.pad_id
     params.vocab_size = tokenizer.vocab_size
     params.model_args.n_vocab = params.vocab_size
 
@@ -153,14 +152,14 @@ def main():
         # encoder has a large initial length
         x = torch.ones(1, 1000, dtype=torch.int64)
         x_lengths = torch.tensor([x.shape[1]], dtype=torch.int64)
-        temperature = torch.tensor([1.0])
+        noise_scale = torch.tensor([1.0])
         length_scale = torch.tensor([1.0])
 
         opset_version = 14
         filename = f"model-steps-{num_steps}.onnx"
         torch.onnx.export(
             wrapper,
-            (x, x_lengths, temperature, length_scale),
+            (x, x_lengths, noise_scale, length_scale),
             filename,
             opset_version=opset_version,
             input_names=["x", "x_length", "noise_scale", "length_scale"],
diff --git a/egs/ljspeech/TTS/matcha/onnx_pretrained.py b/egs/ljspeech/TTS/matcha/onnx_pretrained.py
index 6d92b16eb..19e9b49cb 100755
--- a/egs/ljspeech/TTS/matcha/onnx_pretrained.py
+++ b/egs/ljspeech/TTS/matcha/onnx_pretrained.py
@@ -132,7 +132,7 @@ class OnnxModel:
         print("x_lengths", x_lengths)
         print("x", x.shape)
 
-        temperature = torch.tensor([1.0], dtype=torch.float32)
+        noise_scale = torch.tensor([1.0], dtype=torch.float32)
         length_scale = torch.tensor([1.0], dtype=torch.float32)
 
         mel = self.model.run(
@@ -140,7 +140,7 @@ class OnnxModel:
             {
                 self.model.get_inputs()[0].name: x.numpy(),
                 self.model.get_inputs()[1].name: x_lengths.numpy(),
-                self.model.get_inputs()[2].name: temperature.numpy(),
+                self.model.get_inputs()[2].name: noise_scale.numpy(),
                 self.model.get_inputs()[3].name: length_scale.numpy(),
             },
         )[0]