From abafc690e4cd7c726c30432c4eb8b0ebf95148a5 Mon Sep 17 00:00:00 2001 From: kobenaxie <572745565@qq.com> Date: Tue, 25 Jul 2023 17:24:57 +0800 Subject: [PATCH] Update preprocess_commonvoice.py to fix text normalization bug. --- egs/commonvoice/ASR/local/preprocess_commonvoice.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/egs/commonvoice/ASR/local/preprocess_commonvoice.py b/egs/commonvoice/ASR/local/preprocess_commonvoice.py index c5ec14502..e60459765 100755 --- a/egs/commonvoice/ASR/local/preprocess_commonvoice.py +++ b/egs/commonvoice/ASR/local/preprocess_commonvoice.py @@ -45,7 +45,7 @@ def get_args(): def normalize_text(utt: str) -> str: utt = re.sub(r"[{0}]+".format("-"), " ", utt) - return re.sub(r"[^a-zA-Z\s]", "", utt).upper() + return re.sub(r"[^a-zA-Z\s']", "", utt).upper() def preprocess_commonvoice(