misc. update

This commit is contained in:
jinzr 2024-03-18 10:39:01 +08:00
parent 7ea100a26a
commit fa73dc54a5
4 changed files with 11 additions and 7 deletions

View File

@ -10,7 +10,7 @@ The above information is from the [CSTR VCTK website](https://datashare.ed.ac.uk
This recipe provides a VITS model trained on the VCTK dataset. This recipe provides a VITS model trained on the VCTK dataset.
Pretrained model can be found [here](https://huggingface.co/zrjin/icefall-tts-vctk-vits-2023-12-05), note that this model was pretrained on the Edinburgh DataShare VCTK dataset. Pretrained model can be found [here](https://huggingface.co/zrjin/icefall-tts-vctk-vits-2024-03-18), note that this model was pretrained on the Edinburgh DataShare VCTK dataset.
For tutorial and more details, please refer to the [VITS documentation](https://k2-fsa.github.io/icefall/recipes/TTS/vctk/vits.html). For tutorial and more details, please refer to the [VITS documentation](https://k2-fsa.github.io/icefall/recipes/TTS/vctk/vits.html).
@ -21,7 +21,6 @@ export CUDA_VISIBLE_DEVICES="0,1,2,3"
--world-size 4 \ --world-size 4 \
--num-epochs 1000 \ --num-epochs 1000 \
--start-epoch 1 \ --start-epoch 1 \
--use-fp16 1 \
--exp-dir vits/exp \ --exp-dir vits/exp \
--tokens data/tokens.txt --tokens data/tokens.txt
--max-duration 350 --max-duration 350

View File

@ -1,6 +1,7 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# #
# Copyright 2023 Xiaomi Corporation (Author: Zengwei Yao) # Copyright 2023-2024 Xiaomi Corporation (Author: Zengwei Yao,
# Zengrui Jin,)
# #
# See ../../../../LICENSE for clarification regarding multiple authors # See ../../../../LICENSE for clarification regarding multiple authors
# #
@ -160,6 +161,7 @@ def export_model_onnx(
model: nn.Module, model: nn.Module,
model_filename: str, model_filename: str,
vocab_size: int, vocab_size: int,
n_speakers: int,
opset_version: int = 11, opset_version: int = 11,
) -> None: ) -> None:
"""Export the given generator model to ONNX format. """Export the given generator model to ONNX format.
@ -219,7 +221,7 @@ def export_model_onnx(
"language": "English", "language": "English",
"voice": "en-us", # Choose your language appropriately "voice": "en-us", # Choose your language appropriately
"has_espeak": 1, "has_espeak": 1,
"n_speakers": 108, "n_speakers": n_speakers,
"sample_rate": 22050, # Must match the real sample rate "sample_rate": 22050, # Must match the real sample rate
} }
logging.info(f"meta_data: {meta_data}") logging.info(f"meta_data: {meta_data}")
@ -269,6 +271,7 @@ def main():
model, model,
model_filename, model_filename,
params.vocab_size, params.vocab_size,
params.num_spks,
opset_version=opset_version, opset_version=opset_version,
) )
logging.info(f"Exported generator to {model_filename}") logging.info(f"Exported generator to {model_filename}")

View File

@ -1,5 +1,6 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# Copyright 2023 Xiaomi Corp. (authors: Zengwei Yao) # Copyright 2023-2024 Xiaomi Corporation (Author: Zengwei Yao,
# Zengrui Jin,)
# #
# See ../../../../LICENSE for clarification regarding multiple authors # See ../../../../LICENSE for clarification regarding multiple authors
# #

View File

@ -1,6 +1,7 @@
# Copyright 2021 Piotr Żelasko # Copyright 2021 Piotr Żelasko
# Copyright 2022-2023 Xiaomi Corporation (Authors: Mingshuang Luo, # Copyright 2022-2024 Xiaomi Corporation (Authors: Mingshuang Luo,
# Zengwei Yao) # Zengwei Yao,
# Zengrui Jin,)
# #
# See ../../../../LICENSE for clarification regarding multiple authors # See ../../../../LICENSE for clarification regarding multiple authors
# #