diff --git a/.github/scripts/ljspeech/TTS/run-matcha.sh b/.github/scripts/ljspeech/TTS/run-matcha.sh index 5da9fac57..b6eb81020 100755 --- a/.github/scripts/ljspeech/TTS/run-matcha.sh +++ b/.github/scripts/ljspeech/TTS/run-matcha.sh @@ -80,9 +80,6 @@ function export_onnx() { curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-ljspeech-matcha-en-2024-10-28/resolve/main/data/cmvn.json popd - curl -SL -O https://github.com/csukuangfj/models/raw/refs/heads/master/hifigan/generator_v2 - curl -SL -O https://github.com/csukuangfj/models/raw/refs/heads/master/hifigan/generator_v3 - ./matcha/export_onnx.py \ --exp-dir ./matcha/exp \ --epoch 4000 \ @@ -93,9 +90,13 @@ function export_onnx() { if false; then # THe CI machine does not have enough memory to run it + # + curl -SL -O https://github.com/csukuangfj/models/raw/refs/heads/master/hifigan/generator_v1 + curl -SL -O https://github.com/csukuangfj/models/raw/refs/heads/master/hifigan/generator_v2 + curl -SL -O https://github.com/csukuangfj/models/raw/refs/heads/master/hifigan/generator_v3 python3 ./matcha/export_onnx_hifigan.py else - curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-ljspeech-matcha-en-2024-10-28/resolve/main/exp/hifigan_v2.onnx + curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-ljspeech-matcha-en-2024-10-28/resolve/main/exp/hifigan_v1.onnx fi ls -lh *.onnx @@ -103,13 +104,13 @@ function export_onnx() { python3 ./matcha/onnx_pretrained.py \ --acoustic-model ./model-steps-6.onnx \ - --vocoder ./hifigan_v2.onnx \ + --vocoder ./hifigan_v1.onnx \ --tokens ./data/tokens.txt \ --input-text "how are you doing?" \ - --output-wav /icefall/generated-matcha-tts-6.wav + --output-wav /icefall/generated-matcha-tts-steps-6-v1.wav ls -lh /icefall/*.wav - soxi /icefall/generated-matcha-tts-6.wav + soxi /icefall/generated-matcha-tts-steps-6-v1.wav } prepare_data diff --git a/egs/ljspeech/TTS/matcha/LICENSE b/egs/ljspeech/TTS/matcha/LICENSE new file mode 100644 index 000000000..858018e75 --- /dev/null +++ b/egs/ljspeech/TTS/matcha/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2023 Shivam Mehta + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/egs/ljspeech/TTS/matcha/audio.py b/egs/ljspeech/TTS/matcha/audio.py index 0a9b8db2a..534331e59 100644 --- a/egs/ljspeech/TTS/matcha/audio.py +++ b/egs/ljspeech/TTS/matcha/audio.py @@ -1,3 +1,5 @@ +# This file is copied from +# https://github.com/shivammehta25/Matcha-TTS/blob/main/matcha/utils/audio.py import numpy as np import torch import torch.utils.data diff --git a/egs/ljspeech/TTS/matcha/export_onnx.py b/egs/ljspeech/TTS/matcha/export_onnx.py index f7dc38c1b..487ea2995 100755 --- a/egs/ljspeech/TTS/matcha/export_onnx.py +++ b/egs/ljspeech/TTS/matcha/export_onnx.py @@ -1,4 +1,5 @@ #!/usr/bin/env python3 +# Copyright 2024 Xiaomi Corp. (authors: Fangjun Kuang) """ This script exports a Matcha-TTS model to ONNX. diff --git a/egs/ljspeech/TTS/matcha/export_onnx_hifigan.py b/egs/ljspeech/TTS/matcha/export_onnx_hifigan.py index ea4435479..63d1fac20 100755 --- a/egs/ljspeech/TTS/matcha/export_onnx_hifigan.py +++ b/egs/ljspeech/TTS/matcha/export_onnx_hifigan.py @@ -1,4 +1,5 @@ #!/usr/bin/env python3 +# Copyright 2024 Xiaomi Corp. (authors: Fangjun Kuang) import logging from pathlib import Path diff --git a/egs/ljspeech/TTS/matcha/inference.py b/egs/ljspeech/TTS/matcha/inference.py index 1189160f6..64abd8e50 100755 --- a/egs/ljspeech/TTS/matcha/inference.py +++ b/egs/ljspeech/TTS/matcha/inference.py @@ -1,4 +1,5 @@ #!/usr/bin/env python3 +# Copyright 2024 Xiaomi Corp. (authors: Fangjun Kuang) import argparse import datetime as dt diff --git a/egs/ljspeech/TTS/matcha/model.py b/egs/ljspeech/TTS/matcha/model.py index a488ab4e8..6539ffc24 100644 --- a/egs/ljspeech/TTS/matcha/model.py +++ b/egs/ljspeech/TTS/matcha/model.py @@ -1,3 +1,5 @@ +# This file is copied from +# https://github.com/shivammehta25/Matcha-TTS/blob/main/matcha/utils/model.py """ from https://github.com/jaywalnut310/glow-tts """ import numpy as np diff --git a/egs/ljspeech/TTS/matcha/models/README.md b/egs/ljspeech/TTS/matcha/models/README.md new file mode 100644 index 000000000..1099ef3c8 --- /dev/null +++ b/egs/ljspeech/TTS/matcha/models/README.md @@ -0,0 +1,3 @@ +# Introduction +Files in this folder are copied from +https://github.com/shivammehta25/Matcha-TTS/tree/main/matcha/models diff --git a/egs/ljspeech/TTS/matcha/monotonic_align/__init__.py b/egs/ljspeech/TTS/matcha/monotonic_align/__init__.py index 58286bdd4..85e275fd0 100644 --- a/egs/ljspeech/TTS/matcha/monotonic_align/__init__.py +++ b/egs/ljspeech/TTS/matcha/monotonic_align/__init__.py @@ -1,3 +1,5 @@ +# Copied from +# https://github.com/shivammehta25/Matcha-TTS/blob/main/matcha/utils/monotonic_align/__init__.py import numpy as np import torch diff --git a/egs/ljspeech/TTS/matcha/monotonic_align/core.pyx b/egs/ljspeech/TTS/matcha/monotonic_align/core.pyx index 091fcc3a5..eabc7f273 100644 --- a/egs/ljspeech/TTS/matcha/monotonic_align/core.pyx +++ b/egs/ljspeech/TTS/matcha/monotonic_align/core.pyx @@ -1,3 +1,5 @@ +# Copied from +# https://github.com/shivammehta25/Matcha-TTS/blob/main/matcha/utils/monotonic_align/core.pyx import numpy as np cimport cython diff --git a/egs/ljspeech/TTS/matcha/monotonic_align/setup.py b/egs/ljspeech/TTS/matcha/monotonic_align/setup.py index 6092e20d2..e406d6786 100644 --- a/egs/ljspeech/TTS/matcha/monotonic_align/setup.py +++ b/egs/ljspeech/TTS/matcha/monotonic_align/setup.py @@ -1,3 +1,5 @@ +# Copied from +# https://github.com/shivammehta25/Matcha-TTS/blob/main/matcha/utils/monotonic_align/setup.py from distutils.core import setup from Cython.Build import cythonize import numpy diff --git a/egs/ljspeech/TTS/matcha/onnx_pretrained.py b/egs/ljspeech/TTS/matcha/onnx_pretrained.py index 6a37f3c17..be34343d3 100755 --- a/egs/ljspeech/TTS/matcha/onnx_pretrained.py +++ b/egs/ljspeech/TTS/matcha/onnx_pretrained.py @@ -1,4 +1,6 @@ #!/usr/bin/env python3 +# Copyright 2024 Xiaomi Corp. (authors: Fangjun Kuang) + import argparse import datetime as dt import logging diff --git a/egs/ljspeech/TTS/matcha/train.py b/egs/ljspeech/TTS/matcha/train.py index ce13e7e42..5e713fdfd 100755 --- a/egs/ljspeech/TTS/matcha/train.py +++ b/egs/ljspeech/TTS/matcha/train.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# Copyright 2023 Xiaomi Corp. (authors: Fangjun Kuang) +# Copyright 2024 Xiaomi Corp. (authors: Fangjun Kuang) import argparse