minor updates

This commit is contained in:
yaozengwei 2024-02-20 22:14:46 +08:00
parent cb04833f8e
commit 1851443801
3 changed files with 22 additions and 10 deletions

View File

@ -43,10 +43,14 @@ def get_args():
def get_token2id(filename: Path) -> Dict[str, int]:
"""Get a dict that maps token to IDs, and save it to the given filename."""
all_tokens = get_espeak_map()
all_tokens = get_espeak_map() # token: [token_id]
all_tokens = {token: token_id[0] for token, token_id in all_tokens.items()}
# sort by token_id
all_tokens = sorted(all_tokens.items(), key=lambda x: x[1])
with open(filename, "w", encoding="utf-8") as f:
for token, token_id in all_tokens.items():
f.write(f"{token} {token_id[0]}\n")
for token, token_id in all_tokens:
f.write(f"{token} {token_id}\n")
if __name__ == "__main__":

View File

@ -39,7 +39,7 @@ def prepare_tokens_ljspeech():
new_cuts = []
for cut in cut_set:
# Each cut only contains one supervision
assert len(cut.supervisions) == 1, len(cut.supervisions)
assert len(cut.supervisions) == 1, (len(cut.supervisions), cut)
text = cut.supervisions[0].normalized_text
# Text normalization
text = tacotron_cleaner.cleaners.custom_english_cleaners(text)

View File

@ -1,4 +1,4 @@
# Copyright 2023 Xiaomi Corp. (authors: Zengwei Yao)
# Copyright 2023-2024 Xiaomi Corp. (authors: Zengwei Yao)
#
# See ../../LICENSE for clarification regarding multiple authors
#
@ -14,6 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
from typing import Dict, List
import tacotron_cleaner.cleaners
@ -55,7 +56,8 @@ class Tokenizer(object):
intersperse_blank: bool = True,
add_sos: bool = False,
add_eos: bool = False,
):
lang: str = "en-us",
) -> List[List[int]]:
"""
Args:
texts:
@ -66,6 +68,8 @@ class Tokenizer(object):
Whether to add sos token at the start.
add_eos:
Whether to add eos token at the end.
lang:
Language argument passed to phonemize_espeak().
Returns:
Return a list of token id list [utterance][token_id]
@ -76,14 +80,16 @@ class Tokenizer(object):
# Text normalization
text = tacotron_cleaner.cleaners.custom_english_cleaners(text)
# Convert to phonemes
tokens_list = phonemize_espeak(text, "en-us")
tokens_list = phonemize_espeak(text, lang)
tokens = []
for t in tokens_list:
tokens.extend(t)
token_ids = []
for t in tokens:
assert t in self.token2id, t
if t not in self.token2id:
logging.warning(f"Skip OOV {t}")
continue
token_ids.append(self.token2id[t])
if intersperse_blank:
@ -103,7 +109,7 @@ class Tokenizer(object):
intersperse_blank: bool = True,
add_sos: bool = False,
add_eos: bool = False,
):
) -> List[List[int]]:
"""
Args:
tokens_list:
@ -123,7 +129,9 @@ class Tokenizer(object):
for tokens in tokens_list:
token_ids = []
for t in tokens:
assert t in self.token2id, t
if t not in self.token2id:
logging.warning(f"Skip OOV {t}")
continue
token_ids.append(self.token2id[t])
if intersperse_blank: