From 522273f97ec31058a5c1e37ee381edb024237388 Mon Sep 17 00:00:00 2001 From: marcoyang1998 Date: Fri, 8 Sep 2023 09:57:24 +0800 Subject: [PATCH] change the text normalization for upper_case_no_punc --- .../ASR/zipformer_prompt_asr/text_normalization.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/egs/libriheavy/ASR/zipformer_prompt_asr/text_normalization.py b/egs/libriheavy/ASR/zipformer_prompt_asr/text_normalization.py index 1f7e65462..024c444f1 100644 --- a/egs/libriheavy/ASR/zipformer_prompt_asr/text_normalization.py +++ b/egs/libriheavy/ASR/zipformer_prompt_asr/text_normalization.py @@ -30,7 +30,8 @@ def ref_text_normalization(ref_text: str) -> str: def remove_non_alphabetic(text: str, strict: bool=True) -> str: if not strict: # Note, this also keeps space, single quote(') and hypen (-) - text = text.replace("--", " ") + text = text.replace("-", " ") + text = text.replace("—", " ") return re.sub("[^a-zA-Z0-9\s']+", "", text) else: # only keeps space @@ -53,7 +54,7 @@ def upper_all_char(text: str) -> str: return text.upper() if __name__ == "__main__": - ref_text = " Hello “! My name is ‘ haha" + ref_text = "Mixed-case English transcription, with punctuation. Actually, it is fully not related." print(ref_text) - res = train_text_normalization(ref_text) + res = upper_only_alpha(ref_text) print(res)