From 80d922c1583b9b7fb7e9b47008302cdc74ef58b7 Mon Sep 17 00:00:00 2001 From: kobenaxie <572745565@qq.com> Date: Wed, 26 Jul 2023 16:54:42 +0800 Subject: [PATCH] Update preprocess_commonvoice.py to fix text normalization bug. (#1181) --- egs/commonvoice/ASR/local/preprocess_commonvoice.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/egs/commonvoice/ASR/local/preprocess_commonvoice.py b/egs/commonvoice/ASR/local/preprocess_commonvoice.py index c5ec14502..e60459765 100755 --- a/egs/commonvoice/ASR/local/preprocess_commonvoice.py +++ b/egs/commonvoice/ASR/local/preprocess_commonvoice.py @@ -45,7 +45,7 @@ def get_args(): def normalize_text(utt: str) -> str: utt = re.sub(r"[{0}]+".format("-"), " ", utt) - return re.sub(r"[^a-zA-Z\s]", "", utt).upper() + return re.sub(r"[^a-zA-Z\s']", "", utt).upper() def preprocess_commonvoice(