diff --git a/.flake8 b/.flake8 index 410cb5482..cf276d0ba 100644 --- a/.flake8 +++ b/.flake8 @@ -15,7 +15,7 @@ per-file-ignores = egs/librispeech/ASR/zipformer_mmi/*.py: E501, E203 egs/librispeech/ASR/zipformer/*.py: E501, E203 egs/librispeech/ASR/RESULTS.md: E999, - + egs/ljspeech/TTS/vits/*.py: E501, E203 # invalid escape sequence (cause by tex formular), W605 icefall/utils.py: E501, W605 diff --git a/docs/source/recipes/TTS/ljspeech/vits.rst b/docs/source/recipes/TTS/ljspeech/vits.rst index 0f0d97a9e..535d8999f 100644 --- a/docs/source/recipes/TTS/ljspeech/vits.rst +++ b/docs/source/recipes/TTS/ljspeech/vits.rst @@ -103,4 +103,10 @@ You can test the exported ONNX model with: --model-filename vits/exp/vits-epoch-1000.onnx \ --tokens data/tokens.txt +Download pretrained models +-------------------------- +If you don't want to train from scratch, you can download the pretrained models +by visiting the following link: + + - ``_ diff --git a/egs/ljspeech/TTS/local/compute_spectrogram_ljspeech.py b/egs/ljspeech/TTS/local/compute_spectrogram_ljspeech.py index eacf0df57..97c9008fc 100755 --- a/egs/ljspeech/TTS/local/compute_spectrogram_ljspeech.py +++ b/egs/ljspeech/TTS/local/compute_spectrogram_ljspeech.py @@ -29,7 +29,13 @@ import os from pathlib import Path import torch -from lhotse import CutSet, Spectrogram, SpectrogramConfig, LilcomChunkyWriter, load_manifest +from lhotse import ( + CutSet, + LilcomChunkyWriter, + Spectrogram, + SpectrogramConfig, + load_manifest, +) from lhotse.audio import RecordingSet from lhotse.supervision import SupervisionSet diff --git a/egs/ljspeech/TTS/local/prepare_token_file.py b/egs/ljspeech/TTS/local/prepare_token_file.py index 007bb299b..df976804a 100755 --- a/egs/ljspeech/TTS/local/prepare_token_file.py +++ b/egs/ljspeech/TTS/local/prepare_token_file.py @@ -73,7 +73,7 @@ def get_token2id(manifest_file: Path) -> Dict[str, int]: extra_tokens = [ "", # 0 for blank "", # 1 for sos and eos symbols. - "" # 2 for OOV + "", # 2 for OOV ] all_tokens = set() diff --git a/egs/ljspeech/TTS/local/prepare_tokens_ljspeech.py b/egs/ljspeech/TTS/local/prepare_tokens_ljspeech.py index f7fa7e2d2..fcd0137a0 100755 --- a/egs/ljspeech/TTS/local/prepare_tokens_ljspeech.py +++ b/egs/ljspeech/TTS/local/prepare_tokens_ljspeech.py @@ -34,9 +34,7 @@ def prepare_tokens_ljspeech(): suffix = "jsonl.gz" partition = "all" - cut_set = load_manifest( - output_dir / f"{prefix}_cuts_{partition}.{suffix}" - ) + cut_set = load_manifest(output_dir / f"{prefix}_cuts_{partition}.{suffix}") g2p = g2p_en.G2p() new_cuts = [] @@ -51,9 +49,7 @@ def prepare_tokens_ljspeech(): new_cuts.append(cut) new_cut_set = CutSet.from_cuts(new_cuts) - new_cut_set.to_file( - output_dir / f"{prefix}_cuts_with_tokens_{partition}.{suffix}" - ) + new_cut_set.to_file(output_dir / f"{prefix}_cuts_with_tokens_{partition}.{suffix}") if __name__ == "__main__": diff --git a/egs/ljspeech/TTS/vits/README.md b/egs/ljspeech/TTS/vits/README.md index 45b544516..1141326b9 100644 --- a/egs/ljspeech/TTS/vits/README.md +++ b/egs/ljspeech/TTS/vits/README.md @@ -1 +1,3 @@ See https://k2-fsa.github.io/icefall/recipes/TTS/ljspeech/vits.html for detailed tutorials. + +Training logs, Tensorboard logs, and checkpoints are uploaded to https://huggingface.co/Zengwei/icefall-tts-ljspeech-vits-2023-11-29.