From fa73dc54a55dc2c2d24fdcc59c282ed01fe08f3d Mon Sep 17 00:00:00 2001
From: jinzr <zengrui.jin0@gmail.com>
Date: Mon, 18 Mar 2024 10:39:01 +0800
Subject: [PATCH] misc. update

---
 egs/vctk/TTS/README.md              | 3 +--
 egs/vctk/TTS/vits/export-onnx.py    | 7 +++++--
 egs/vctk/TTS/vits/train.py          | 3 ++-
 egs/vctk/TTS/vits/tts_datamodule.py | 5 +++--
 4 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/egs/vctk/TTS/README.md b/egs/vctk/TTS/README.md
index c07516b77..c2703dbe2 100644
--- a/egs/vctk/TTS/README.md
+++ b/egs/vctk/TTS/README.md
@@ -10,7 +10,7 @@ The above information is from the [CSTR VCTK website](https://datashare.ed.ac.uk
 
 This recipe provides a VITS model trained on the VCTK dataset.
 
-Pretrained model can be found [here](https://huggingface.co/zrjin/icefall-tts-vctk-vits-2023-12-05), note that this model was pretrained on the Edinburgh DataShare VCTK dataset.
+Pretrained model can be found [here](https://huggingface.co/zrjin/icefall-tts-vctk-vits-2024-03-18), note that this model was pretrained on the Edinburgh DataShare VCTK dataset.
 
 For tutorial and more details, please refer to the [VITS documentation](https://k2-fsa.github.io/icefall/recipes/TTS/vctk/vits.html).
 
@@ -21,7 +21,6 @@ export CUDA_VISIBLE_DEVICES="0,1,2,3"
   --world-size 4 \
   --num-epochs 1000 \
   --start-epoch 1 \
-  --use-fp16 1 \
   --exp-dir vits/exp \
   --tokens data/tokens.txt
   --max-duration 350
diff --git a/egs/vctk/TTS/vits/export-onnx.py b/egs/vctk/TTS/vits/export-onnx.py
index 31be01a2d..d00450f08 100755
--- a/egs/vctk/TTS/vits/export-onnx.py
+++ b/egs/vctk/TTS/vits/export-onnx.py
@@ -1,6 +1,7 @@
 #!/usr/bin/env python3
 #
-# Copyright      2023 Xiaomi Corporation     (Author: Zengwei Yao)
+# Copyright   2023-2024  Xiaomi Corporation     (Author: Zengwei Yao,
+#                                                        Zengrui Jin,)
 #
 # See ../../../../LICENSE for clarification regarding multiple authors
 #
@@ -160,6 +161,7 @@ def export_model_onnx(
     model: nn.Module,
     model_filename: str,
     vocab_size: int,
+    n_speakers: int,
     opset_version: int = 11,
 ) -> None:
     """Export the given generator model to ONNX format.
@@ -219,7 +221,7 @@ def export_model_onnx(
         "language": "English",
         "voice": "en-us",  # Choose your language appropriately
         "has_espeak": 1,
-        "n_speakers": 108,
+        "n_speakers": n_speakers,
         "sample_rate": 22050,  # Must match the real sample rate
     }
     logging.info(f"meta_data: {meta_data}")
@@ -269,6 +271,7 @@ def main():
         model,
         model_filename,
         params.vocab_size,
+        params.num_spks,
         opset_version=opset_version,
     )
     logging.info(f"Exported generator to {model_filename}")
diff --git a/egs/vctk/TTS/vits/train.py b/egs/vctk/TTS/vits/train.py
index 8dca57a6a..81e318360 100755
--- a/egs/vctk/TTS/vits/train.py
+++ b/egs/vctk/TTS/vits/train.py
@@ -1,5 +1,6 @@
 #!/usr/bin/env python3
-# Copyright         2023  Xiaomi Corp.        (authors: Zengwei Yao)
+# Copyright   2023-2024  Xiaomi Corporation     (Author: Zengwei Yao,
+#                                                        Zengrui Jin,)
 #
 # See ../../../../LICENSE for clarification regarding multiple authors
 #
diff --git a/egs/vctk/TTS/vits/tts_datamodule.py b/egs/vctk/TTS/vits/tts_datamodule.py
index 52fc5179f..6c785d8c3 100644
--- a/egs/vctk/TTS/vits/tts_datamodule.py
+++ b/egs/vctk/TTS/vits/tts_datamodule.py
@@ -1,6 +1,7 @@
 # Copyright      2021  Piotr Żelasko
-# Copyright      2022-2023  Xiaomi Corporation     (Authors: Mingshuang Luo,
-#                                                            Zengwei Yao)
+# Copyright      2022-2024  Xiaomi Corporation     (Authors: Mingshuang Luo,
+#                                                            Zengwei Yao,
+#                                                            Zengrui Jin,)
 #
 # See ../../../../LICENSE for clarification regarding multiple authors
 #