From 32cdbdfebbf965e4040e9fb2d0ccd7c71633f5c9 Mon Sep 17 00:00:00 2001 From: zr_jin Date: Tue, 22 Oct 2024 12:34:07 +0800 Subject: [PATCH] Update vits.py --- egs/ljspeech/TTS/vits/vits.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/egs/ljspeech/TTS/vits/vits.py b/egs/ljspeech/TTS/vits/vits.py index 952e2fde7..a1fabf9ad 100644 --- a/egs/ljspeech/TTS/vits/vits.py +++ b/egs/ljspeech/TTS/vits/vits.py @@ -623,6 +623,7 @@ class VITS(nn.Module): text_lengths: torch.Tensor, sids: Optional[torch.Tensor] = None, spembs: Optional[torch.Tensor] = None, + lids: Optional[torch.Tensor] = None, durations: Optional[torch.Tensor] = None, noise_scale: float = 0.667, noise_scale_dur: float = 0.8, @@ -637,6 +638,7 @@ class VITS(nn.Module): text_lengths (Tensor): Input text index tensor (B,). sids (Tensor): Speaker index tensor (B,). spembs (Optional[Tensor]): Speaker embedding tensor (B, spk_embed_dim). + lids (Tensor): Language index tensor (B,). noise_scale (float): Noise scale value for flow. noise_scale_dur (float): Noise scale value for duration predictor. alpha (float): Alpha parameter to control the speed of generated speech. @@ -653,6 +655,7 @@ class VITS(nn.Module): text_lengths=text_lengths, sids=sids, spembs=spembs, + lids=lids, noise_scale=noise_scale, noise_scale_dur=noise_scale_dur, alpha=alpha,