diff --git a/egs/ljspeech/TTS/matcha/text/__init__.py b/egs/ljspeech/TTS/matcha/text/__init__.py deleted file mode 100644 index 78c8b1f18..000000000 --- a/egs/ljspeech/TTS/matcha/text/__init__.py +++ /dev/null @@ -1,64 +0,0 @@ -""" from https://github.com/keithito/tacotron """ -from matcha.text import cleaners -from matcha.text.symbols import symbols - -# Mappings from symbol to numeric ID and vice versa: -_symbol_to_id = {s: i for i, s in enumerate(symbols)} -_id_to_symbol = { - i: s for i, s in enumerate(symbols) -} # pylint: disable=unnecessary-comprehension - - -def text_to_sequence(text, cleaner_names): - """Converts a string of text to a sequence of IDs corresponding to the symbols in the text. - Args: - text: string to convert to a sequence - cleaner_names: names of the cleaner functions to run the text through - Returns: - List of integers corresponding to the symbols in the text - """ - sequence = [] - - clean_text = _clean_text(text, cleaner_names) - for symbol in clean_text: - try: - if symbol in "_()[]# ̃": - continue - symbol_id = _symbol_to_id[symbol] - except Exception as ex: - print(text) - print(clean_text) - raise RuntimeError( - f"text: {text}, clean_text: {clean_text}, ex: {ex}, symbol: {symbol}" - ) - sequence += [symbol_id] - return sequence, clean_text - - -def cleaned_text_to_sequence(cleaned_text): - """Converts a string of text to a sequence of IDs corresponding to the symbols in the text. - Args: - text: string to convert to a sequence - Returns: - List of integers corresponding to the symbols in the text - """ - sequence = [_symbol_to_id[symbol] for symbol in cleaned_text] - return sequence - - -def sequence_to_text(sequence): - """Converts a sequence of IDs back to a string""" - result = "" - for symbol_id in sequence: - s = _id_to_symbol[symbol_id] - result += s - return result - - -def _clean_text(text, cleaner_names): - for name in cleaner_names: - cleaner = getattr(cleaners, name) - if not cleaner: - raise Exception("Unknown cleaner: %s" % name) - text = cleaner(text) - return text diff --git a/egs/ljspeech/TTS/matcha/text/cleaners.py b/egs/ljspeech/TTS/matcha/text/cleaners.py deleted file mode 100644 index 0a1979afe..000000000 --- a/egs/ljspeech/TTS/matcha/text/cleaners.py +++ /dev/null @@ -1,130 +0,0 @@ -""" from https://github.com/keithito/tacotron - -Cleaners are transformations that run over the input text at both training and eval time. - -Cleaners can be selected by passing a comma-delimited list of cleaner names as the "cleaners" -hyperparameter. Some cleaners are English-specific. You'll typically want to use: - 1. "english_cleaners" for English text - 2. "transliteration_cleaners" for non-English text that can be transliterated to ASCII using - the Unidecode library (https://pypi.python.org/pypi/Unidecode) - 3. "basic_cleaners" if you do not want to transliterate (in this case, you should also update - the symbols in symbols.py to match your data). -""" - -import logging -import re - -import phonemizer -from unidecode import unidecode - -# To avoid excessive logging we set the log level of the phonemizer package to Critical -critical_logger = logging.getLogger("phonemizer") -critical_logger.setLevel(logging.CRITICAL) - -# Intializing the phonemizer globally significantly reduces the speed -# now the phonemizer is not initialising at every call -# Might be less flexible, but it is much-much faster -global_phonemizer = phonemizer.backend.EspeakBackend( - language="en-us", - preserve_punctuation=True, - with_stress=True, - language_switch="remove-flags", - logger=critical_logger, -) - - -# Regular expression matching whitespace: -_whitespace_re = re.compile(r"\s+") - -# List of (regular expression, replacement) pairs for abbreviations: -_abbreviations = [ - (re.compile("\\b%s\\." % x[0], re.IGNORECASE), x[1]) - for x in [ - ("mrs", "misess"), - ("mr", "mister"), - ("dr", "doctor"), - ("st", "saint"), - ("co", "company"), - ("jr", "junior"), - ("maj", "major"), - ("gen", "general"), - ("drs", "doctors"), - ("rev", "reverend"), - ("lt", "lieutenant"), - ("hon", "honorable"), - ("sgt", "sergeant"), - ("capt", "captain"), - ("esq", "esquire"), - ("ltd", "limited"), - ("col", "colonel"), - ("ft", "fort"), - ] -] - - -def expand_abbreviations(text): - for regex, replacement in _abbreviations: - text = re.sub(regex, replacement, text) - return text - - -def lowercase(text): - return text.lower() - - -def collapse_whitespace(text): - return re.sub(_whitespace_re, " ", text) - - -def remove_parentheses(text): - text = text.replace("(", "") - text = text.replace(")", "") - text = text.replace("[", "") - text = text.replace("]", "") - return text - - -def convert_to_ascii(text): - return unidecode(text) - - -def basic_cleaners(text): - """Basic pipeline that lowercases and collapses whitespace without transliteration.""" - text = lowercase(text) - text = collapse_whitespace(text) - return text - - -def transliteration_cleaners(text): - """Pipeline for non-English text that transliterates to ASCII.""" - text = convert_to_ascii(text) - text = lowercase(text) - text = collapse_whitespace(text) - return text - - -def english_cleaners2(text): - """Pipeline for English text, including abbreviation expansion. + punctuation + stress""" - text = convert_to_ascii(text) - text = lowercase(text) - text = expand_abbreviations(text) - text = remove_parentheses(text) - phonemes = global_phonemizer.phonemize([text], strip=True, njobs=1)[0] - phonemes = collapse_whitespace(phonemes) - return phonemes - - -# I am removing this due to incompatibility with several version of python -# However, if you want to use it, you can uncomment it -# and install piper-phonemize with the following command: -# pip install piper-phonemize - -# import piper_phonemize -# def english_cleaners_piper(text): -# """Pipeline for English text, including abbreviation expansion. + punctuation + stress""" -# text = convert_to_ascii(text) -# text = lowercase(text) -# text = expand_abbreviations(text) -# phonemes = "".join(piper_phonemize.phonemize_espeak(text=text, voice="en-US")[0]) -# phonemes = collapse_whitespace(phonemes) -# return phonemes diff --git a/egs/ljspeech/TTS/matcha/text/numbers.py b/egs/ljspeech/TTS/matcha/text/numbers.py deleted file mode 100644 index 49c21d4e9..000000000 --- a/egs/ljspeech/TTS/matcha/text/numbers.py +++ /dev/null @@ -1,73 +0,0 @@ -""" from https://github.com/keithito/tacotron """ - -import re - -import inflect - -_inflect = inflect.engine() -_comma_number_re = re.compile(r"([0-9][0-9\,]+[0-9])") -_decimal_number_re = re.compile(r"([0-9]+\.[0-9]+)") -_pounds_re = re.compile(r"£([0-9\,]*[0-9]+)") -_dollars_re = re.compile(r"\$([0-9\.\,]*[0-9]+)") -_ordinal_re = re.compile(r"[0-9]+(st|nd|rd|th)") -_number_re = re.compile(r"[0-9]+") - - -def _remove_commas(m): - return m.group(1).replace(",", "") - - -def _expand_decimal_point(m): - return m.group(1).replace(".", " point ") - - -def _expand_dollars(m): - match = m.group(1) - parts = match.split(".") - if len(parts) > 2: - return match + " dollars" - dollars = int(parts[0]) if parts[0] else 0 - cents = int(parts[1]) if len(parts) > 1 and parts[1] else 0 - if dollars and cents: - dollar_unit = "dollar" if dollars == 1 else "dollars" - cent_unit = "cent" if cents == 1 else "cents" - return f"{dollars} {dollar_unit}, {cents} {cent_unit}" - elif dollars: - dollar_unit = "dollar" if dollars == 1 else "dollars" - return f"{dollars} {dollar_unit}" - elif cents: - cent_unit = "cent" if cents == 1 else "cents" - return f"{cents} {cent_unit}" - else: - return "zero dollars" - - -def _expand_ordinal(m): - return _inflect.number_to_words(m.group(0)) - - -def _expand_number(m): - num = int(m.group(0)) - if num > 1000 and num < 3000: - if num == 2000: - return "two thousand" - elif num > 2000 and num < 2010: - return "two thousand " + _inflect.number_to_words(num % 100) - elif num % 100 == 0: - return _inflect.number_to_words(num // 100) + " hundred" - else: - return _inflect.number_to_words( - num, andword="", zero="oh", group=2 - ).replace(", ", " ") - else: - return _inflect.number_to_words(num, andword="") - - -def normalize_numbers(text): - text = re.sub(_comma_number_re, _remove_commas, text) - text = re.sub(_pounds_re, r"\1 pounds", text) - text = re.sub(_dollars_re, _expand_dollars, text) - text = re.sub(_decimal_number_re, _expand_decimal_point, text) - text = re.sub(_ordinal_re, _expand_ordinal, text) - text = re.sub(_number_re, _expand_number, text) - return text diff --git a/egs/ljspeech/TTS/matcha/text/symbols.py b/egs/ljspeech/TTS/matcha/text/symbols.py deleted file mode 100644 index b32c12430..000000000 --- a/egs/ljspeech/TTS/matcha/text/symbols.py +++ /dev/null @@ -1,15 +0,0 @@ -""" from https://github.com/keithito/tacotron - -Defines the set of symbols used in text input to the model. -""" -_pad = "_" -_punctuation = ';:,.!?¡¿—…"«»“” ' -_letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" -_letters_ipa = "ɑɐɒæɓʙβɔɕçɗɖðʤəɘɚɛɜɝɞɟʄɡɠɢʛɦɧħɥʜɨɪʝɭɬɫɮʟɱɯɰŋɳɲɴøɵɸθœɶʘɹɺɾɻʀʁɽʂʃʈʧʉʊʋⱱʌɣɤʍχʎʏʑʐʒʔʡʕʢǀǁǂǃˈˌːˑʼʴʰʱʲʷˠˤ˞↓↑→↗↘'̩'ᵻ" - - -# Export all symbols: -symbols = [_pad] + list(_punctuation) + list(_letters) + list(_letters_ipa) - -# Special symbol ids -SPACE_ID = symbols.index(" ")