diff --git a/egs/commonvoice/ASR/local/preprocess_commonvoice.py b/egs/commonvoice/ASR/local/preprocess_commonvoice.py index d41af4015..3be85ed15 100755 --- a/egs/commonvoice/ASR/local/preprocess_commonvoice.py +++ b/egs/commonvoice/ASR/local/preprocess_commonvoice.py @@ -93,6 +93,10 @@ def normalize_text(utt: str, language: str) -> str: .replace("~", "") .replace(";", "") .replace("", "") + .replace("﹔", "") + .replace("/", "") + .replace("A", "") + .replace("B", "") .upper() ) else: