From fa73dc54a55dc2c2d24fdcc59c282ed01fe08f3d Mon Sep 17 00:00:00 2001 From: jinzr Date: Mon, 18 Mar 2024 10:39:01 +0800 Subject: [PATCH] misc. update --- egs/vctk/TTS/README.md | 3 +-- egs/vctk/TTS/vits/export-onnx.py | 7 +++++-- egs/vctk/TTS/vits/train.py | 3 ++- egs/vctk/TTS/vits/tts_datamodule.py | 5 +++-- 4 files changed, 11 insertions(+), 7 deletions(-) diff --git a/egs/vctk/TTS/README.md b/egs/vctk/TTS/README.md index c07516b77..c2703dbe2 100644 --- a/egs/vctk/TTS/README.md +++ b/egs/vctk/TTS/README.md @@ -10,7 +10,7 @@ The above information is from the [CSTR VCTK website](https://datashare.ed.ac.uk This recipe provides a VITS model trained on the VCTK dataset. -Pretrained model can be found [here](https://huggingface.co/zrjin/icefall-tts-vctk-vits-2023-12-05), note that this model was pretrained on the Edinburgh DataShare VCTK dataset. +Pretrained model can be found [here](https://huggingface.co/zrjin/icefall-tts-vctk-vits-2024-03-18), note that this model was pretrained on the Edinburgh DataShare VCTK dataset. For tutorial and more details, please refer to the [VITS documentation](https://k2-fsa.github.io/icefall/recipes/TTS/vctk/vits.html). @@ -21,7 +21,6 @@ export CUDA_VISIBLE_DEVICES="0,1,2,3" --world-size 4 \ --num-epochs 1000 \ --start-epoch 1 \ - --use-fp16 1 \ --exp-dir vits/exp \ --tokens data/tokens.txt --max-duration 350 diff --git a/egs/vctk/TTS/vits/export-onnx.py b/egs/vctk/TTS/vits/export-onnx.py index 31be01a2d..d00450f08 100755 --- a/egs/vctk/TTS/vits/export-onnx.py +++ b/egs/vctk/TTS/vits/export-onnx.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 # -# Copyright 2023 Xiaomi Corporation (Author: Zengwei Yao) +# Copyright 2023-2024 Xiaomi Corporation (Author: Zengwei Yao, +# Zengrui Jin,) # # See ../../../../LICENSE for clarification regarding multiple authors # @@ -160,6 +161,7 @@ def export_model_onnx( model: nn.Module, model_filename: str, vocab_size: int, + n_speakers: int, opset_version: int = 11, ) -> None: """Export the given generator model to ONNX format. @@ -219,7 +221,7 @@ def export_model_onnx( "language": "English", "voice": "en-us", # Choose your language appropriately "has_espeak": 1, - "n_speakers": 108, + "n_speakers": n_speakers, "sample_rate": 22050, # Must match the real sample rate } logging.info(f"meta_data: {meta_data}") @@ -269,6 +271,7 @@ def main(): model, model_filename, params.vocab_size, + params.num_spks, opset_version=opset_version, ) logging.info(f"Exported generator to {model_filename}") diff --git a/egs/vctk/TTS/vits/train.py b/egs/vctk/TTS/vits/train.py index 8dca57a6a..81e318360 100755 --- a/egs/vctk/TTS/vits/train.py +++ b/egs/vctk/TTS/vits/train.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 -# Copyright 2023 Xiaomi Corp. (authors: Zengwei Yao) +# Copyright 2023-2024 Xiaomi Corporation (Author: Zengwei Yao, +# Zengrui Jin,) # # See ../../../../LICENSE for clarification regarding multiple authors # diff --git a/egs/vctk/TTS/vits/tts_datamodule.py b/egs/vctk/TTS/vits/tts_datamodule.py index 52fc5179f..6c785d8c3 100644 --- a/egs/vctk/TTS/vits/tts_datamodule.py +++ b/egs/vctk/TTS/vits/tts_datamodule.py @@ -1,6 +1,7 @@ # Copyright 2021 Piotr Żelasko -# Copyright 2022-2023 Xiaomi Corporation (Authors: Mingshuang Luo, -# Zengwei Yao) +# Copyright 2022-2024 Xiaomi Corporation (Authors: Mingshuang Luo, +# Zengwei Yao, +# Zengrui Jin,) # # See ../../../../LICENSE for clarification regarding multiple authors #