Fix the normalized_text in LibriTTS recipe (#1825)

This commit is contained in:
Han Zhu 2024-12-05 15:12:06 +08:00 committed by GitHub
parent a1ade8ecb7
commit bdd0f85704
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -31,15 +31,6 @@ from piper_phonemize import phonemize_espeak
from tqdm.auto import tqdm
def remove_punc_to_upper(text: str) -> str:
text = text.replace("", "'")
text = text.replace("", "'")
tokens = set("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'")
s_list = [x.upper() if x in tokens else " " for x in text]
s = " ".join("".join(s_list).split()).strip()
return s
def prepare_tokens_libritts():
output_dir = Path("data/spectrogram")
prefix = "libritts"
@ -72,7 +63,7 @@ def prepare_tokens_libritts():
for t in tokens_list:
tokens.extend(t)
cut.tokens = tokens
cut.supervisions[0].normalized_text = remove_punc_to_upper(text)
cut.supervisions[0].normalized_text = text
new_cuts.append(cut)