mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-09-08 00:24:19 +00:00
27 lines
713 B
Python
27 lines
713 B
Python
import re
|
||
|
||
def replace_full_width_symbol(s: str) -> str:
|
||
# replace full-width symbol with theri half width counterpart
|
||
s = s.replace("“", '"')
|
||
s = s.replace("”", '"')
|
||
s = s.replace("‘", "'")
|
||
s = s.replace("’", "'")
|
||
|
||
return s
|
||
|
||
|
||
def upper_ref_text(text: str) -> str:
|
||
text = replace_full_width_symbol(text)
|
||
text = text.upper() # upper case all characters
|
||
|
||
# Only keep all alpha-numeric characters, hypen and apostrophe
|
||
text = text.replace("--", " ")
|
||
text = re.sub("[^a-zA-Z0-9\s\'-]+", "", text)
|
||
return text
|
||
|
||
def simple_normalization(text: str) -> str:
|
||
text = replace_full_width_symbol(text)
|
||
text = text.replace("--", " ")
|
||
|
||
return text
|
||
|