mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-08-27 18:54:18 +00:00
44 lines
1011 B
Python
Executable File
44 lines
1011 B
Python
Executable File
#!/usr/bin/env python3
|
|
|
|
# Copyright 2021 Xiaomi Corporation (Author: Pingfeng Luo)
|
|
import argparse
|
|
import re
|
|
from pathlib import Path
|
|
from typing import Dict, List
|
|
from pypinyin import pinyin, lazy_pinyin, Style
|
|
|
|
def get_args():
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("--lexicon", type=str, help="The input lexicon file.")
|
|
return parser.parse_args()
|
|
|
|
|
|
def process_line(
|
|
line: str
|
|
) -> None:
|
|
"""
|
|
Args:
|
|
line:
|
|
A line of transcript consisting of space(s) separated words.
|
|
Returns:
|
|
Return None.
|
|
"""
|
|
char = line.strip().split()[0]
|
|
syllables = pinyin(char, style=Style.TONE3, heteronym=True)
|
|
syllables = ''.join(str(syllables[0][:]))
|
|
for s in syllables.split(',') :
|
|
print("{} {}".format(char, re.sub(r'[]', '', s)))
|
|
|
|
|
|
def main():
|
|
args = get_args()
|
|
assert Path(args.lexicon).is_file()
|
|
|
|
with open(args.lexicon) as f:
|
|
for line in f:
|
|
process_line(line=line)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|