Update preprocess_commonvoice.py to fix text normalization bug. (#1181)

This commit is contained in:
kobenaxie 2023-07-26 16:54:42 +08:00 committed by GitHub
parent 1dbbd7759e
commit 80d922c158
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -45,7 +45,7 @@ def get_args():
def normalize_text(utt: str) -> str:
utt = re.sub(r"[{0}]+".format("-"), " ", utt)
return re.sub(r"[^a-zA-Z\s]", "", utt).upper()
return re.sub(r"[^a-zA-Z\s']", "", utt).upper()
def preprocess_commonvoice(