mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-08-08 09:32:20 +00:00
Merge c053b7c8f0772e22fc1481aa7e61a8d03d020f5f into 34fc1fdf0d8ff520e2bb18267d046ca207c78ef9
This commit is contained in:
commit
a852b92de6
@ -52,6 +52,11 @@ def normalize_text(utt: str, language: str) -> str:
|
||||
return re.sub(r"[^A-ZÀÂÆÇÉÈÊËÎÏÔŒÙÛÜ' ]", "", utt).upper()
|
||||
elif language == "pl":
|
||||
return re.sub(r"[^a-ząćęłńóśźżA-ZĄĆĘŁŃÓŚŹŻ' ]", "", utt).upper()
|
||||
elif language == "fa":
|
||||
utt = utt.replace("ي", "ی").replace("ك", "ک")
|
||||
utt = re.sub(r"[^\u0600-\u06FF0-9\u06F0-\u06F9\s]|[.,?!\-]", "", utt)
|
||||
utt = re.sub(r"\s+", " ", utt).strip()
|
||||
return utt
|
||||
elif language in ["yue", "zh-HK"]:
|
||||
# Mozilla Common Voice uses both "yue" and "zh-HK" for Cantonese
|
||||
# Not sure why they decided to do this...
|
||||
|
@ -42,7 +42,7 @@ use_invalidated=false
|
||||
# - speech
|
||||
|
||||
dl_dir=$PWD/download
|
||||
release=cv-corpus-12.0-2022-12-07
|
||||
release=cv-corpus-12.0-2022-12-07 ## -> consider changing relaese name or download the file manually and move it to download folder.
|
||||
lang=fr
|
||||
perturb_speed=false
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user