mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-08-09 01:52:41 +00:00
add text norm script for pl (#1532)
This commit is contained in:
parent
335a9962de
commit
cdb3fb5675
@ -48,8 +48,18 @@ def normalize_text(utt: str, language: str) -> str:
|
||||
utt = re.sub("’", "'", utt)
|
||||
if language == "en":
|
||||
return re.sub(r"[^a-zA-Z\s]", "", utt).upper()
|
||||
if language == "fr":
|
||||
elif language == "fr":
|
||||
return re.sub(r"[^A-ZÀÂÆÇÉÈÊËÎÏÔŒÙÛÜ' ]", "", utt).upper()
|
||||
elif language == "pl":
|
||||
return re.sub(r"[^a-ząćęłńóśźżA-ZĄĆĘŁŃÓŚŹŻ' ]", "", utt).upper()
|
||||
else:
|
||||
raise NotImplementedError(
|
||||
f"""
|
||||
Text normalization not implemented for language: {language},
|
||||
please consider implementing it in the local/preprocess_commonvoice.py
|
||||
or raise an issue on GitHub to request it.
|
||||
"""
|
||||
)
|
||||
|
||||
|
||||
def preprocess_commonvoice(
|
||||
|
Loading…
x
Reference in New Issue
Block a user