From d7a2aa9d07ba31c73f67e0b64c51ab9dae9ee18e Mon Sep 17 00:00:00 2001 From: yaozengwei Date: Wed, 29 Nov 2023 13:29:12 +0000 Subject: [PATCH] deploy: 0622dea30deacf2680dcca0549f7a05c0b965066 --- _sources/recipes/TTS/index.rst.txt | 7 + _sources/recipes/TTS/ljspeech/vits.rst.txt | 113 +++++++++ _sources/recipes/index.rst.txt | 3 +- contributing/code-style.html | 10 +- contributing/doc.html | 10 +- contributing/how-to-create-a-recipe.html | 10 +- contributing/index.html | 14 +- decoding-with-langugage-models/LODR.html | 16 +- decoding-with-langugage-models/index.html | 14 +- decoding-with-langugage-models/rescoring.html | 30 +-- .../shallow-fusion.html | 18 +- docker/index.html | 10 +- docker/intro.html | 10 +- faqs.html | 10 +- for-dummies/data-preparation.html | 10 +- for-dummies/decoding.html | 10 +- for-dummies/environment-setup.html | 10 +- for-dummies/index.html | 10 +- for-dummies/model-export.html | 10 +- for-dummies/training.html | 10 +- genindex.html | 8 +- huggingface/index.html | 10 +- huggingface/pretrained-models.html | 10 +- huggingface/spaces.html | 10 +- index.html | 14 +- installation/index.html | 10 +- model-export/export-model-state-dict.html | 10 +- model-export/export-ncnn-conv-emformer.html | 10 +- model-export/export-ncnn-lstm.html | 10 +- model-export/export-ncnn-zipformer.html | 10 +- model-export/export-ncnn.html | 10 +- model-export/export-onnx.html | 10 +- .../export-with-torch-jit-script.html | 10 +- model-export/export-with-torch-jit-trace.html | 10 +- model-export/index.html | 10 +- objects.inv | Bin 1927 -> 1955 bytes .../aishell/conformer_ctc.html | 11 +- recipes/Non-streaming-ASR/aishell/index.html | 11 +- .../aishell/stateless_transducer.html | 11 +- .../aishell/tdnn_lstm_ctc.html | 11 +- recipes/Non-streaming-ASR/index.html | 11 +- .../librispeech/conformer_ctc.html | 11 +- .../librispeech/distillation.html | 11 +- .../Non-streaming-ASR/librispeech/index.html | 11 +- .../pruned_transducer_stateless.html | 11 +- .../librispeech/tdnn_lstm_ctc.html | 11 +- .../librispeech/zipformer_ctc_blankskip.html | 11 +- .../librispeech/zipformer_mmi.html | 11 +- recipes/Non-streaming-ASR/timit/index.html | 11 +- .../timit/tdnn_ligru_ctc.html | 11 +- .../timit/tdnn_lstm_ctc.html | 11 +- recipes/Non-streaming-ASR/yesno/index.html | 11 +- recipes/Non-streaming-ASR/yesno/tdnn.html | 11 +- recipes/RNN-LM/index.html | 11 +- recipes/RNN-LM/librispeech/lm-training.html | 15 +- recipes/Streaming-ASR/index.html | 11 +- recipes/Streaming-ASR/introduction.html | 11 +- recipes/Streaming-ASR/librispeech/index.html | 11 +- .../lstm_pruned_stateless_transducer.html | 11 +- .../pruned_transducer_stateless.html | 11 +- .../librispeech/zipformer_transducer.html | 11 +- recipes/TTS/index.html | 148 +++++++++++ recipes/TTS/ljspeech/vits.html | 238 ++++++++++++++++++ recipes/index.html | 17 +- search.html | 8 +- searchindex.js | 2 +- 66 files changed, 919 insertions(+), 260 deletions(-) create mode 100644 _sources/recipes/TTS/index.rst.txt create mode 100644 _sources/recipes/TTS/ljspeech/vits.rst.txt create mode 100644 recipes/TTS/index.html create mode 100644 recipes/TTS/ljspeech/vits.html diff --git a/_sources/recipes/TTS/index.rst.txt b/_sources/recipes/TTS/index.rst.txt new file mode 100644 index 000000000..aa891c072 --- /dev/null +++ b/_sources/recipes/TTS/index.rst.txt @@ -0,0 +1,7 @@ +TTS +====== + +.. toctree:: + :maxdepth: 2 + + ljspeech/vits diff --git a/_sources/recipes/TTS/ljspeech/vits.rst.txt b/_sources/recipes/TTS/ljspeech/vits.rst.txt new file mode 100644 index 000000000..385fd3c70 --- /dev/null +++ b/_sources/recipes/TTS/ljspeech/vits.rst.txt @@ -0,0 +1,113 @@ +VITS +=============== + +This tutorial shows you how to train an VITS model +with the `LJSpeech `_ dataset. + +.. note:: + + The VITS paper: `Conditional Variational Autoencoder with Adversarial Learning for End-to-End Text-to-Speech `_ + + +Data preparation +---------------- + +.. code-block:: bash + + $ cd egs/ljspeech/TTS + $ ./prepare.sh + +To run stage 1 to stage 5, use + +.. code-block:: bash + + $ ./prepare.sh --stage 1 --stop_stage 5 + + +Build Monotonic Alignment Search +-------------------------------- + +.. code-block:: bash + + $ cd vits/monotonic_align + $ python setup.py build_ext --inplace + $ cd ../../ + + +Training +-------- + +.. code-block:: bash + + $ export CUDA_VISIBLE_DEVICES="0,1,2,3" + $ ./vits/train.py \ + --world-size 4 \ + --num-epochs 1000 \ + --start-epoch 1 \ + --use-fp16 1 \ + --exp-dir vits/exp \ + --tokens data/tokens.txt + --max-duration 500 + +.. note:: + + You can adjust the hyper-parameters to control the size of the VITS model and + the training configurations. For more details, please run ``./vits/train.py --help``. + +.. note:: + + The training can take a long time (usually a couple of days). + +Training logs, checkpoints and tensorboard logs are saved in ``vits/exp``. + + +Inference +--------- + +The inference part uses checkpoints saved by the training part, so you have to run the +training part first. It will save the ground-truth and generated wavs to the directory +``vits/exp/infer/epoch-*/wav``, e.g., ``vits/exp/infer/epoch-1000/wav``. + +.. code-block:: bash + + $ export CUDA_VISIBLE_DEVICES="0" + $ ./vits/infer.py \ + --epoch 1000 \ + --exp-dir vits/exp \ + --tokens data/tokens.txt + --max-duration 500 + +.. note:: + + For more details, please run ``./vits/infer.py --help``. + + +Export models +------------- + +Currently we only support ONNX model exporting. It will generate two files in the given ``exp-dir``: +``vits-epoch-*.onnx`` and ``vits-epoch-*.int8.onnx``. + +.. code-block:: bash + + $ ./vits/export-onnx.py \ + --epoch 1000 \ + --exp-dir vits/exp \ + --tokens data/tokens.txt + +You can test the exported ONNX model with: + +.. code-block:: bash + + $ ./vits/test_onnx.py \ + --model-filename vits/exp/vits-epoch-1000.onnx \ + --tokens data/tokens.txt + + +Download pretrained models +-------------------------- + +If you don't want to train from scratch, you can download the pretrained models +by visiting the following link: + + - ``_ diff --git a/_sources/recipes/index.rst.txt b/_sources/recipes/index.rst.txt index 7265e1cf6..8df61f0d0 100644 --- a/_sources/recipes/index.rst.txt +++ b/_sources/recipes/index.rst.txt @@ -2,7 +2,7 @@ Recipes ======= This page contains various recipes in ``icefall``. -Currently, only speech recognition recipes are provided. +Currently, we provide recipes for speech recognition, language model, and speech synthesis. We may add recipes for other tasks as well in the future. @@ -16,3 +16,4 @@ We may add recipes for other tasks as well in the future. Non-streaming-ASR/index Streaming-ASR/index RNN-LM/index + TTS/index diff --git a/contributing/code-style.html b/contributing/code-style.html index debb240d4..74d418a75 100644 --- a/contributing/code-style.html +++ b/contributing/code-style.html @@ -1,12 +1,14 @@ - + - + Follow the code style — icefall 0.1 documentation - - + + + + diff --git a/contributing/doc.html b/contributing/doc.html index 85ebdf023..dac487b7f 100644 --- a/contributing/doc.html +++ b/contributing/doc.html @@ -1,12 +1,14 @@ - + - + Contributing to Documentation — icefall 0.1 documentation - - + + + + diff --git a/contributing/how-to-create-a-recipe.html b/contributing/how-to-create-a-recipe.html index 6443991bf..49c374276 100644 --- a/contributing/how-to-create-a-recipe.html +++ b/contributing/how-to-create-a-recipe.html @@ -1,12 +1,14 @@ - + - + How to create a recipe — icefall 0.1 documentation - - + + + + diff --git a/contributing/index.html b/contributing/index.html index 1345751f7..8645f6487 100644 --- a/contributing/index.html +++ b/contributing/index.html @@ -1,12 +1,14 @@ - + - + Contributing — icefall 0.1 documentation - - + + + + @@ -20,7 +22,7 @@ - + @@ -133,7 +135,7 @@ and code to icefall