mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-09-08 16:44:20 +00:00
change the text normalization for upper_case_no_punc
This commit is contained in:
parent
77890a6115
commit
522273f97e
@ -30,7 +30,8 @@ def ref_text_normalization(ref_text: str) -> str:
|
||||
def remove_non_alphabetic(text: str, strict: bool=True) -> str:
|
||||
if not strict:
|
||||
# Note, this also keeps space, single quote(') and hypen (-)
|
||||
text = text.replace("--", " ")
|
||||
text = text.replace("-", " ")
|
||||
text = text.replace("—", " ")
|
||||
return re.sub("[^a-zA-Z0-9\s']+", "", text)
|
||||
else:
|
||||
# only keeps space
|
||||
@ -53,7 +54,7 @@ def upper_all_char(text: str) -> str:
|
||||
return text.upper()
|
||||
|
||||
if __name__ == "__main__":
|
||||
ref_text = " Hello “! My name is ‘ haha"
|
||||
ref_text = "Mixed-case English transcription, with punctuation. Actually, it is fully not related."
|
||||
print(ref_text)
|
||||
res = train_text_normalization(ref_text)
|
||||
res = upper_only_alpha(ref_text)
|
||||
print(res)
|
||||
|
Loading…
x
Reference in New Issue
Block a user