mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-08-09 10:02:22 +00:00
add text norm script for pl (#1532)
This commit is contained in:
parent
335a9962de
commit
cdb3fb5675
@ -48,8 +48,18 @@ def normalize_text(utt: str, language: str) -> str:
|
|||||||
utt = re.sub("’", "'", utt)
|
utt = re.sub("’", "'", utt)
|
||||||
if language == "en":
|
if language == "en":
|
||||||
return re.sub(r"[^a-zA-Z\s]", "", utt).upper()
|
return re.sub(r"[^a-zA-Z\s]", "", utt).upper()
|
||||||
if language == "fr":
|
elif language == "fr":
|
||||||
return re.sub(r"[^A-ZÀÂÆÇÉÈÊËÎÏÔŒÙÛÜ' ]", "", utt).upper()
|
return re.sub(r"[^A-ZÀÂÆÇÉÈÊËÎÏÔŒÙÛÜ' ]", "", utt).upper()
|
||||||
|
elif language == "pl":
|
||||||
|
return re.sub(r"[^a-ząćęłńóśźżA-ZĄĆĘŁŃÓŚŹŻ' ]", "", utt).upper()
|
||||||
|
else:
|
||||||
|
raise NotImplementedError(
|
||||||
|
f"""
|
||||||
|
Text normalization not implemented for language: {language},
|
||||||
|
please consider implementing it in the local/preprocess_commonvoice.py
|
||||||
|
or raise an issue on GitHub to request it.
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def preprocess_commonvoice(
|
def preprocess_commonvoice(
|
||||||
|
Loading…
x
Reference in New Issue
Block a user