mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-09-08 16:44:20 +00:00
change the text normalization for upper_case_no_punc
This commit is contained in:
parent
77890a6115
commit
522273f97e
@ -30,7 +30,8 @@ def ref_text_normalization(ref_text: str) -> str:
|
|||||||
def remove_non_alphabetic(text: str, strict: bool=True) -> str:
|
def remove_non_alphabetic(text: str, strict: bool=True) -> str:
|
||||||
if not strict:
|
if not strict:
|
||||||
# Note, this also keeps space, single quote(') and hypen (-)
|
# Note, this also keeps space, single quote(') and hypen (-)
|
||||||
text = text.replace("--", " ")
|
text = text.replace("-", " ")
|
||||||
|
text = text.replace("—", " ")
|
||||||
return re.sub("[^a-zA-Z0-9\s']+", "", text)
|
return re.sub("[^a-zA-Z0-9\s']+", "", text)
|
||||||
else:
|
else:
|
||||||
# only keeps space
|
# only keeps space
|
||||||
@ -53,7 +54,7 @@ def upper_all_char(text: str) -> str:
|
|||||||
return text.upper()
|
return text.upper()
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
ref_text = " Hello “! My name is ‘ haha"
|
ref_text = "Mixed-case English transcription, with punctuation. Actually, it is fully not related."
|
||||||
print(ref_text)
|
print(ref_text)
|
||||||
res = train_text_normalization(ref_text)
|
res = upper_only_alpha(ref_text)
|
||||||
print(res)
|
print(res)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user