From fa9f4d58fb70b82b9b6848fda3678616bb64da70 Mon Sep 17 00:00:00 2001
From: Fangjun Kuang <csukuangfj@gmail.com>
Date: Mon, 28 Oct 2024 23:25:11 +0800
Subject: [PATCH] fix typos

---
 .github/scripts/ljspeech/TTS/run-matcha.sh    | 46 ++++++++++---------
 .github/scripts/ljspeech/TTS/run.sh           |  2 +-
 egs/ljspeech/TTS/matcha/export_onnx.py        | 43 ++++++++++++++++-
 .../TTS/matcha/export_onnx_hifigan.py         |  4 ++
 4 files changed, 72 insertions(+), 23 deletions(-)

diff --git a/.github/scripts/ljspeech/TTS/run-matcha.sh b/.github/scripts/ljspeech/TTS/run-matcha.sh
index 26ce17b23..5da9fac57 100755
--- a/.github/scripts/ljspeech/TTS/run-matcha.sh
+++ b/.github/scripts/ljspeech/TTS/run-matcha.sh
@@ -2,13 +2,12 @@
 
 set -ex
 
-apt-get install sox
+apt-get update
+apt-get install -y sox
 
 python3 -m pip install piper_phonemize -f https://k2-fsa.github.io/icefall/piper_phonemize.html
 python3 -m pip install espnet_tts_frontend
-python3 -m pip install numba
-
-python3 -m pip install conformer==0.3.2 diffusers librosa
+python3 -m pip install numba conformer==0.3.2 diffusers librosa
 
 log() {
   # This function is from espnet
@@ -26,7 +25,7 @@ git diff
 function prepare_data() {
   # We have created a subset of the data for testing
   #
-  mkdir download
+  mkdir -p download
   pushd download
   wget -q https://huggingface.co/csukuangfj/ljspeech-subset-for-ci-test/resolve/main/LJSpeech-1.1.tar.bz2
   tar xvf LJSpeech-1.1.tar.bz2
@@ -50,8 +49,7 @@ function train() {
     --tokens data/tokens.txt \
     --max-duration 20
 
-    ls -lh match/exp
-  done
+    ls -lh matcha/exp
 }
 
 function infer() {
@@ -63,7 +61,7 @@ function infer() {
     --exp-dir ./matcha/exp \
     --tokens data/tokens.txt \
     --vocoder ./generator_v1 \
-    --input-text "how are you doing?"
+    --input-text "how are you doing?" \
     --output-wav ./generated.wav
 
   ls -lh *.wav
@@ -74,12 +72,7 @@ function infer() {
 
 function export_onnx() {
   pushd matcha/exp
-
   curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-ljspeech-matcha-en-2024-10-28/resolve/main/exp/epoch-4000.pt
-  curl -SL -O https://github.com/csukuangfj/models/raw/refs/heads/master/hifigan/generator_v1
-  curl -SL -O https://github.com/csukuangfj/models/raw/refs/heads/master/hifigan/generator_v2
-  curl -SL -O https://github.com/csukuangfj/models/raw/refs/heads/master/hifigan/generator_v3
-
   popd
 
   pushd data/fbank
@@ -87,24 +80,33 @@ function export_onnx() {
   curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-ljspeech-matcha-en-2024-10-28/resolve/main/data/cmvn.json
   popd
 
+  curl -SL -O https://github.com/csukuangfj/models/raw/refs/heads/master/hifigan/generator_v2
+  curl -SL -O https://github.com/csukuangfj/models/raw/refs/heads/master/hifigan/generator_v3
+
   ./matcha/export_onnx.py \
-    --exp-dir ./matcha/exp-new-3 \
+    --exp-dir ./matcha/exp \
     --epoch 4000 \
     --tokens ./data/tokens.txt \
     --cmvn ./data/fbank/cmvn.json
 
   ls -lh *.onnx
 
-  python3 ./matcha/export_onnx_hifigan.py
+  if false; then
+    # THe CI machine does not have enough memory to run it
+    python3 ./matcha/export_onnx_hifigan.py
+  else
+    curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-ljspeech-matcha-en-2024-10-28/resolve/main/exp/hifigan_v2.onnx
+  fi
 
   ls -lh *.onnx
 
-  python3 ./matcha/onnx_pretrained.py \
-   --acoustic-model ./model-steps-6.onnx \
-   --vocoder ./hifigan_v1.onnx \
-   --tokens ./data/tokens.txt \
-   --input-text "how are you doing?" \
-   --output-wav /icefall/generated-matcha-tts-6.wav
+
+    python3 ./matcha/onnx_pretrained.py \
+     --acoustic-model ./model-steps-6.onnx \
+     --vocoder ./hifigan_v2.onnx \
+     --tokens ./data/tokens.txt \
+     --input-text "how are you doing?" \
+     --output-wav /icefall/generated-matcha-tts-6.wav
 
   ls -lh /icefall/*.wav
   soxi /icefall/generated-matcha-tts-6.wav
@@ -114,3 +116,5 @@ prepare_data
 train
 infer
 export_onnx
+
+rm -rfv generator_v* matcha/exp
diff --git a/.github/scripts/ljspeech/TTS/run.sh b/.github/scripts/ljspeech/TTS/run.sh
index 707361782..733a12c47 100755
--- a/.github/scripts/ljspeech/TTS/run.sh
+++ b/.github/scripts/ljspeech/TTS/run.sh
@@ -22,7 +22,7 @@ git diff
 function prepare_data() {
   # We have created a subset of the data for testing
   #
-  mkdir download
+  mkdir -p download
   pushd download
   wget -q https://huggingface.co/csukuangfj/ljspeech-subset-for-ci-test/resolve/main/LJSpeech-1.1.tar.bz2
   tar xvf LJSpeech-1.1.tar.bz2
diff --git a/egs/ljspeech/TTS/matcha/export_onnx.py b/egs/ljspeech/TTS/matcha/export_onnx.py
index c0eebcde0..f7dc38c1b 100755
--- a/egs/ljspeech/TTS/matcha/export_onnx.py
+++ b/egs/ljspeech/TTS/matcha/export_onnx.py
@@ -6,19 +6,60 @@ Note that the model outputs fbank. You need to use a vocoder to convert
 it to audio. See also ./export_onnx_hifigan.py
 """
 
+import argparse
 import json
 import logging
+from pathlib import Path
 from typing import Any, Dict
 
 import onnx
 import torch
-from inference import get_parser
 from tokenizer import Tokenizer
 from train import get_model, get_params
 
 from icefall.checkpoint import load_checkpoint
 
 
+def get_parser():
+    parser = argparse.ArgumentParser(
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter
+    )
+
+    parser.add_argument(
+        "--epoch",
+        type=int,
+        default=4000,
+        help="""It specifies the checkpoint to use for decoding.
+        Note: Epoch counts from 1.
+        """,
+    )
+
+    parser.add_argument(
+        "--exp-dir",
+        type=Path,
+        default="matcha/exp-new-3",
+        help="""The experiment dir.
+        It specifies the directory where all training related
+        files, e.g., checkpoints, log, etc, are saved
+        """,
+    )
+
+    parser.add_argument(
+        "--tokens",
+        type=Path,
+        default="data/tokens.txt",
+    )
+
+    parser.add_argument(
+        "--cmvn",
+        type=str,
+        default="data/fbank/cmvn.json",
+        help="""Path to vocabulary.""",
+    )
+
+    return parser
+
+
 def add_meta_data(filename: str, meta_data: Dict[str, Any]):
     """Add meta data to an ONNX model. It is changed in-place.
 
diff --git a/egs/ljspeech/TTS/matcha/export_onnx_hifigan.py b/egs/ljspeech/TTS/matcha/export_onnx_hifigan.py
index af54f4e89..ea4435479 100755
--- a/egs/ljspeech/TTS/matcha/export_onnx_hifigan.py
+++ b/egs/ljspeech/TTS/matcha/export_onnx_hifigan.py
@@ -1,6 +1,7 @@
 #!/usr/bin/env python3
 
 import logging
+from pathlib import Path
 from typing import Any, Dict
 
 import onnx
@@ -58,6 +59,9 @@ def main():
 
     for f in model_filenames:
         logging.info(f)
+        if not Path(f).is_file():
+            logging.info(f"Skipping {f} since {f} does not exist")
+            continue
         model = load_vocoder(f)
         wrapper = ModelWrapper(model)
         wrapper.eval()