From fa9f4d58fb70b82b9b6848fda3678616bb64da70 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Mon, 28 Oct 2024 23:25:11 +0800 Subject: [PATCH] fix typos --- .github/scripts/ljspeech/TTS/run-matcha.sh | 46 ++++++++++--------- .github/scripts/ljspeech/TTS/run.sh | 2 +- egs/ljspeech/TTS/matcha/export_onnx.py | 43 ++++++++++++++++- .../TTS/matcha/export_onnx_hifigan.py | 4 ++ 4 files changed, 72 insertions(+), 23 deletions(-) diff --git a/.github/scripts/ljspeech/TTS/run-matcha.sh b/.github/scripts/ljspeech/TTS/run-matcha.sh index 26ce17b23..5da9fac57 100755 --- a/.github/scripts/ljspeech/TTS/run-matcha.sh +++ b/.github/scripts/ljspeech/TTS/run-matcha.sh @@ -2,13 +2,12 @@ set -ex -apt-get install sox +apt-get update +apt-get install -y sox python3 -m pip install piper_phonemize -f https://k2-fsa.github.io/icefall/piper_phonemize.html python3 -m pip install espnet_tts_frontend -python3 -m pip install numba - -python3 -m pip install conformer==0.3.2 diffusers librosa +python3 -m pip install numba conformer==0.3.2 diffusers librosa log() { # This function is from espnet @@ -26,7 +25,7 @@ git diff function prepare_data() { # We have created a subset of the data for testing # - mkdir download + mkdir -p download pushd download wget -q https://huggingface.co/csukuangfj/ljspeech-subset-for-ci-test/resolve/main/LJSpeech-1.1.tar.bz2 tar xvf LJSpeech-1.1.tar.bz2 @@ -50,8 +49,7 @@ function train() { --tokens data/tokens.txt \ --max-duration 20 - ls -lh match/exp - done + ls -lh matcha/exp } function infer() { @@ -63,7 +61,7 @@ function infer() { --exp-dir ./matcha/exp \ --tokens data/tokens.txt \ --vocoder ./generator_v1 \ - --input-text "how are you doing?" + --input-text "how are you doing?" \ --output-wav ./generated.wav ls -lh *.wav @@ -74,12 +72,7 @@ function infer() { function export_onnx() { pushd matcha/exp - curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-ljspeech-matcha-en-2024-10-28/resolve/main/exp/epoch-4000.pt - curl -SL -O https://github.com/csukuangfj/models/raw/refs/heads/master/hifigan/generator_v1 - curl -SL -O https://github.com/csukuangfj/models/raw/refs/heads/master/hifigan/generator_v2 - curl -SL -O https://github.com/csukuangfj/models/raw/refs/heads/master/hifigan/generator_v3 - popd pushd data/fbank @@ -87,24 +80,33 @@ function export_onnx() { curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-ljspeech-matcha-en-2024-10-28/resolve/main/data/cmvn.json popd + curl -SL -O https://github.com/csukuangfj/models/raw/refs/heads/master/hifigan/generator_v2 + curl -SL -O https://github.com/csukuangfj/models/raw/refs/heads/master/hifigan/generator_v3 + ./matcha/export_onnx.py \ - --exp-dir ./matcha/exp-new-3 \ + --exp-dir ./matcha/exp \ --epoch 4000 \ --tokens ./data/tokens.txt \ --cmvn ./data/fbank/cmvn.json ls -lh *.onnx - python3 ./matcha/export_onnx_hifigan.py + if false; then + # THe CI machine does not have enough memory to run it + python3 ./matcha/export_onnx_hifigan.py + else + curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-ljspeech-matcha-en-2024-10-28/resolve/main/exp/hifigan_v2.onnx + fi ls -lh *.onnx - python3 ./matcha/onnx_pretrained.py \ - --acoustic-model ./model-steps-6.onnx \ - --vocoder ./hifigan_v1.onnx \ - --tokens ./data/tokens.txt \ - --input-text "how are you doing?" \ - --output-wav /icefall/generated-matcha-tts-6.wav + + python3 ./matcha/onnx_pretrained.py \ + --acoustic-model ./model-steps-6.onnx \ + --vocoder ./hifigan_v2.onnx \ + --tokens ./data/tokens.txt \ + --input-text "how are you doing?" \ + --output-wav /icefall/generated-matcha-tts-6.wav ls -lh /icefall/*.wav soxi /icefall/generated-matcha-tts-6.wav @@ -114,3 +116,5 @@ prepare_data train infer export_onnx + +rm -rfv generator_v* matcha/exp diff --git a/.github/scripts/ljspeech/TTS/run.sh b/.github/scripts/ljspeech/TTS/run.sh index 707361782..733a12c47 100755 --- a/.github/scripts/ljspeech/TTS/run.sh +++ b/.github/scripts/ljspeech/TTS/run.sh @@ -22,7 +22,7 @@ git diff function prepare_data() { # We have created a subset of the data for testing # - mkdir download + mkdir -p download pushd download wget -q https://huggingface.co/csukuangfj/ljspeech-subset-for-ci-test/resolve/main/LJSpeech-1.1.tar.bz2 tar xvf LJSpeech-1.1.tar.bz2 diff --git a/egs/ljspeech/TTS/matcha/export_onnx.py b/egs/ljspeech/TTS/matcha/export_onnx.py index c0eebcde0..f7dc38c1b 100755 --- a/egs/ljspeech/TTS/matcha/export_onnx.py +++ b/egs/ljspeech/TTS/matcha/export_onnx.py @@ -6,19 +6,60 @@ Note that the model outputs fbank. You need to use a vocoder to convert it to audio. See also ./export_onnx_hifigan.py """ +import argparse import json import logging +from pathlib import Path from typing import Any, Dict import onnx import torch -from inference import get_parser from tokenizer import Tokenizer from train import get_model, get_params from icefall.checkpoint import load_checkpoint +def get_parser(): + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + + parser.add_argument( + "--epoch", + type=int, + default=4000, + help="""It specifies the checkpoint to use for decoding. + Note: Epoch counts from 1. + """, + ) + + parser.add_argument( + "--exp-dir", + type=Path, + default="matcha/exp-new-3", + help="""The experiment dir. + It specifies the directory where all training related + files, e.g., checkpoints, log, etc, are saved + """, + ) + + parser.add_argument( + "--tokens", + type=Path, + default="data/tokens.txt", + ) + + parser.add_argument( + "--cmvn", + type=str, + default="data/fbank/cmvn.json", + help="""Path to vocabulary.""", + ) + + return parser + + def add_meta_data(filename: str, meta_data: Dict[str, Any]): """Add meta data to an ONNX model. It is changed in-place. diff --git a/egs/ljspeech/TTS/matcha/export_onnx_hifigan.py b/egs/ljspeech/TTS/matcha/export_onnx_hifigan.py index af54f4e89..ea4435479 100755 --- a/egs/ljspeech/TTS/matcha/export_onnx_hifigan.py +++ b/egs/ljspeech/TTS/matcha/export_onnx_hifigan.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 import logging +from pathlib import Path from typing import Any, Dict import onnx @@ -58,6 +59,9 @@ def main(): for f in model_filenames: logging.info(f) + if not Path(f).is_file(): + logging.info(f"Skipping {f} since {f} does not exist") + continue model = load_vocoder(f) wrapper = ModelWrapper(model) wrapper.eval()