diff --git a/egs/ljspeech/TTS/vits/export-onnx.py b/egs/ljspeech/TTS/vits/export-onnx.py index 8d66d5b35..0740757c0 100755 --- a/egs/ljspeech/TTS/vits/export-onnx.py +++ b/egs/ljspeech/TTS/vits/export-onnx.py @@ -76,7 +76,8 @@ def get_parser(): parser.add_argument( "--model-type", type=str, - default="", + default="high", + choices=["low", "medium", "high"], help="""If not empty, valid values are: low, medium, high. It controls the model size. low -> runs faster. """, diff --git a/egs/ljspeech/TTS/vits/infer.py b/egs/ljspeech/TTS/vits/infer.py index 40988adc4..7be76e315 100755 --- a/egs/ljspeech/TTS/vits/infer.py +++ b/egs/ljspeech/TTS/vits/infer.py @@ -75,7 +75,8 @@ def get_parser(): parser.add_argument( "--model-type", type=str, - default="", + default="high", + choices=["low", "medium", "high"], help="""If not empty, valid values are: low, medium, high. It controls the model size. low -> runs faster. """, diff --git a/egs/ljspeech/TTS/vits/train.py b/egs/ljspeech/TTS/vits/train.py index 767689b6c..34b943765 100755 --- a/egs/ljspeech/TTS/vits/train.py +++ b/egs/ljspeech/TTS/vits/train.py @@ -156,7 +156,8 @@ def get_parser(): parser.add_argument( "--model-type", type=str, - default="", + default="high", + choices=["low", "medium", "high"], help="""If not empty, valid values are: low, medium, high. It controls the model size. low -> runs faster. """, diff --git a/egs/ljspeech/TTS/vits/vits.py b/egs/ljspeech/TTS/vits/vits.py index 43d8ce6a3..0b9575cbd 100644 --- a/egs/ljspeech/TTS/vits/vits.py +++ b/egs/ljspeech/TTS/vits/vits.py @@ -66,7 +66,7 @@ HIGH_CONFIG = { "decoder_upsample_kernel_sizes": (16, 16, 4, 4), "decoder_resblock_kernel_sizes": (3, 7, 11), "decoder_resblock_dilations": ((1, 3, 5), (1, 3, 5), (1, 3, 5)), - "text_encoder_cnn_module_kernel": 3, + "text_encoder_cnn_module_kernel": 5, }