fix style

This commit is contained in:
PingFeng Luo 2021-12-30 18:56:04 +08:00
parent 8b8cf6c68e
commit a82d826987
3 changed files with 7 additions and 74 deletions

View File

@ -1,68 +0,0 @@
#!/usr/bin/env python3
# Copyright 2021 (Author: Pingfeng Luo)
"""
make syllables lexicon and handle heteronym
"""
import argparse
from pathlib import Path
from pypinyin import pinyin, lazy_pinyin, Style
def get_args():
parser = argparse.ArgumentParser()
parser.add_argument("--lexicon", type=str, help="The input lexicon file.")
return parser.parse_args()
def process_line(
line: str
) -> None:
"""
Args:
line:
A line of transcript consisting of space(s) separated word and phones
input :
你好 n i3 h ao3
晴天 q ing2 t ian1
output :
你好 ni3 hao3
晴天 qing2 tian1
Returns:
Return None.
"""
chars = line.strip().split()[0]
pinyins = pinyin(chars, style=Style.TONE3, heteronym=True)
word_syllables = []
word_syllables_num = 1
inited = False
for char_syllables in pinyins:
new_char_syllables_num = len(char_syllables)
if not inited and len(char_syllables):
word_syllables = [char_syllables[0]]
inited = True
elif new_char_syllables_num == 1:
for i in range(word_syllables_num):
word_syllables[i] += " " + str(char_syllables)
elif new_char_syllables_num > 1:
word_syllables = word_syllables * new_char_syllables_num
for pre_index in range(word_syllables_num):
for expand_index in range(new_char_syllables_num):
word_syllables[pre_index * new_char_syllables_num + expand_index] += " " + char_syllables[expand_index]
word_syllables_num *= new_char_syllables_num
for word_syallable in word_syllables:
print("{} {}".format(chars.strip(), str(word_syallable).strip()))
def main():
args = get_args()
assert Path(args.lexicon).is_file()
with open(args.lexicon) as f:
for line in f:
process_line(line=line)
if __name__ == "__main__":
main()

View File

@ -317,7 +317,8 @@ def lexicon_to_fst(
def get_args():
parser = argparse.ArgumentParser()
parser.add_argument("--lang-dir", type=str, help="The lang dir, data/lang_phone or data/lang_syllable")
parser.add_argument("--lang-dir", type=str,
help="The lang dir, data/lang_phone")
return parser.parse_args()

View File

@ -340,17 +340,17 @@ class AishellAsrDataModule:
def train_cuts(self) -> CutSet:
logging.info("About to get train cuts")
cuts_train = load_manifest(self.args.manifest_dir /
"cuts_train.json.gz")
"cuts_train.json.gz")
return cuts_train
@lru_cache()
def valid_cuts(self) -> CutSet:
logging.info("About to get dev cuts")
return load_manifest(self.args.manifest_dir /
"cuts_dev.json.gz")
return load_manifest(self.args.manifest_dir /
"cuts_dev.json.gz")
@lru_cache()
def test_cuts(self) -> List[CutSet]:
logging.info("About to get test cuts")
return load_manifest(self.args.manifest_dir /
"cuts_test.json.gz")
return load_manifest(self.args.manifest_dir /
"cuts_test.json.gz")