fix typos

This commit is contained in:
Fangjun Kuang 2024-10-28 23:25:11 +08:00
parent a6d018acec
commit fa9f4d58fb
4 changed files with 72 additions and 23 deletions

View File

@ -2,13 +2,12 @@
set -ex set -ex
apt-get install sox apt-get update
apt-get install -y sox
python3 -m pip install piper_phonemize -f https://k2-fsa.github.io/icefall/piper_phonemize.html python3 -m pip install piper_phonemize -f https://k2-fsa.github.io/icefall/piper_phonemize.html
python3 -m pip install espnet_tts_frontend python3 -m pip install espnet_tts_frontend
python3 -m pip install numba python3 -m pip install numba conformer==0.3.2 diffusers librosa
python3 -m pip install conformer==0.3.2 diffusers librosa
log() { log() {
# This function is from espnet # This function is from espnet
@ -26,7 +25,7 @@ git diff
function prepare_data() { function prepare_data() {
# We have created a subset of the data for testing # We have created a subset of the data for testing
# #
mkdir download mkdir -p download
pushd download pushd download
wget -q https://huggingface.co/csukuangfj/ljspeech-subset-for-ci-test/resolve/main/LJSpeech-1.1.tar.bz2 wget -q https://huggingface.co/csukuangfj/ljspeech-subset-for-ci-test/resolve/main/LJSpeech-1.1.tar.bz2
tar xvf LJSpeech-1.1.tar.bz2 tar xvf LJSpeech-1.1.tar.bz2
@ -50,8 +49,7 @@ function train() {
--tokens data/tokens.txt \ --tokens data/tokens.txt \
--max-duration 20 --max-duration 20
ls -lh match/exp ls -lh matcha/exp
done
} }
function infer() { function infer() {
@ -63,7 +61,7 @@ function infer() {
--exp-dir ./matcha/exp \ --exp-dir ./matcha/exp \
--tokens data/tokens.txt \ --tokens data/tokens.txt \
--vocoder ./generator_v1 \ --vocoder ./generator_v1 \
--input-text "how are you doing?" --input-text "how are you doing?" \
--output-wav ./generated.wav --output-wav ./generated.wav
ls -lh *.wav ls -lh *.wav
@ -74,12 +72,7 @@ function infer() {
function export_onnx() { function export_onnx() {
pushd matcha/exp pushd matcha/exp
curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-ljspeech-matcha-en-2024-10-28/resolve/main/exp/epoch-4000.pt curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-ljspeech-matcha-en-2024-10-28/resolve/main/exp/epoch-4000.pt
curl -SL -O https://github.com/csukuangfj/models/raw/refs/heads/master/hifigan/generator_v1
curl -SL -O https://github.com/csukuangfj/models/raw/refs/heads/master/hifigan/generator_v2
curl -SL -O https://github.com/csukuangfj/models/raw/refs/heads/master/hifigan/generator_v3
popd popd
pushd data/fbank pushd data/fbank
@ -87,24 +80,33 @@ function export_onnx() {
curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-ljspeech-matcha-en-2024-10-28/resolve/main/data/cmvn.json curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-ljspeech-matcha-en-2024-10-28/resolve/main/data/cmvn.json
popd popd
curl -SL -O https://github.com/csukuangfj/models/raw/refs/heads/master/hifigan/generator_v2
curl -SL -O https://github.com/csukuangfj/models/raw/refs/heads/master/hifigan/generator_v3
./matcha/export_onnx.py \ ./matcha/export_onnx.py \
--exp-dir ./matcha/exp-new-3 \ --exp-dir ./matcha/exp \
--epoch 4000 \ --epoch 4000 \
--tokens ./data/tokens.txt \ --tokens ./data/tokens.txt \
--cmvn ./data/fbank/cmvn.json --cmvn ./data/fbank/cmvn.json
ls -lh *.onnx ls -lh *.onnx
python3 ./matcha/export_onnx_hifigan.py if false; then
# THe CI machine does not have enough memory to run it
python3 ./matcha/export_onnx_hifigan.py
else
curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-ljspeech-matcha-en-2024-10-28/resolve/main/exp/hifigan_v2.onnx
fi
ls -lh *.onnx ls -lh *.onnx
python3 ./matcha/onnx_pretrained.py \
--acoustic-model ./model-steps-6.onnx \ python3 ./matcha/onnx_pretrained.py \
--vocoder ./hifigan_v1.onnx \ --acoustic-model ./model-steps-6.onnx \
--tokens ./data/tokens.txt \ --vocoder ./hifigan_v2.onnx \
--input-text "how are you doing?" \ --tokens ./data/tokens.txt \
--output-wav /icefall/generated-matcha-tts-6.wav --input-text "how are you doing?" \
--output-wav /icefall/generated-matcha-tts-6.wav
ls -lh /icefall/*.wav ls -lh /icefall/*.wav
soxi /icefall/generated-matcha-tts-6.wav soxi /icefall/generated-matcha-tts-6.wav
@ -114,3 +116,5 @@ prepare_data
train train
infer infer
export_onnx export_onnx
rm -rfv generator_v* matcha/exp

View File

@ -22,7 +22,7 @@ git diff
function prepare_data() { function prepare_data() {
# We have created a subset of the data for testing # We have created a subset of the data for testing
# #
mkdir download mkdir -p download
pushd download pushd download
wget -q https://huggingface.co/csukuangfj/ljspeech-subset-for-ci-test/resolve/main/LJSpeech-1.1.tar.bz2 wget -q https://huggingface.co/csukuangfj/ljspeech-subset-for-ci-test/resolve/main/LJSpeech-1.1.tar.bz2
tar xvf LJSpeech-1.1.tar.bz2 tar xvf LJSpeech-1.1.tar.bz2

View File

@ -6,19 +6,60 @@ Note that the model outputs fbank. You need to use a vocoder to convert
it to audio. See also ./export_onnx_hifigan.py it to audio. See also ./export_onnx_hifigan.py
""" """
import argparse
import json import json
import logging import logging
from pathlib import Path
from typing import Any, Dict from typing import Any, Dict
import onnx import onnx
import torch import torch
from inference import get_parser
from tokenizer import Tokenizer from tokenizer import Tokenizer
from train import get_model, get_params from train import get_model, get_params
from icefall.checkpoint import load_checkpoint from icefall.checkpoint import load_checkpoint
def get_parser():
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
parser.add_argument(
"--epoch",
type=int,
default=4000,
help="""It specifies the checkpoint to use for decoding.
Note: Epoch counts from 1.
""",
)
parser.add_argument(
"--exp-dir",
type=Path,
default="matcha/exp-new-3",
help="""The experiment dir.
It specifies the directory where all training related
files, e.g., checkpoints, log, etc, are saved
""",
)
parser.add_argument(
"--tokens",
type=Path,
default="data/tokens.txt",
)
parser.add_argument(
"--cmvn",
type=str,
default="data/fbank/cmvn.json",
help="""Path to vocabulary.""",
)
return parser
def add_meta_data(filename: str, meta_data: Dict[str, Any]): def add_meta_data(filename: str, meta_data: Dict[str, Any]):
"""Add meta data to an ONNX model. It is changed in-place. """Add meta data to an ONNX model. It is changed in-place.

View File

@ -1,6 +1,7 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import logging import logging
from pathlib import Path
from typing import Any, Dict from typing import Any, Dict
import onnx import onnx
@ -58,6 +59,9 @@ def main():
for f in model_filenames: for f in model_filenames:
logging.info(f) logging.info(f)
if not Path(f).is_file():
logging.info(f"Skipping {f} since {f} does not exist")
continue
model = load_vocoder(f) model = load_vocoder(f)
wrapper = ModelWrapper(model) wrapper = ModelWrapper(model)
wrapper.eval() wrapper.eval()