mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-08-26 10:16:14 +00:00
Update preprocess_gigaspeech2.py
This commit is contained in:
parent
4a6405fe34
commit
aa17542e9e
@ -63,11 +63,16 @@ def normalize_text(
|
|||||||
text = re.sub(r"\u0039", r"\u0E59", text)
|
text = re.sub(r"\u0039", r"\u0E59", text)
|
||||||
|
|
||||||
# Currency symbols mapping
|
# Currency symbols mapping
|
||||||
text = re.sub(r"\u0024", r"\u0E14\u0E2D\u0E25\u0E25\u0E32\u0E23\u0E4C", text) # $ -> ดอลลาร์
|
text = re.sub(r"\u0024", "ดอลลาร์", text) # $
|
||||||
text = re.sub(r"\u00A3", r"\u0E1B\u0E2D\u0E19\u0E14\u0E4C", text) # £ -> ปอนด์
|
text = re.sub(r"\u00A3", "ปอนด์", text) # £
|
||||||
text = re.sub(r"\u00A5", r"\u0E2E\u0E22\u0E27\u0E31\u0E19", text) # ¥ -> หยวน
|
text = re.sub(r"\u00A5", "หยวน", text) # ¥
|
||||||
text = re.sub(r"\u20AC", r"\u0E22\u0E39\u0E42\u0E23", text) # € -> ยูโร
|
text = re.sub(r"\u20AC", "ยูโร", text) # €
|
||||||
text = re.sub(r"\u0E3F", r"\u0E1A\u0E32\u0E17", text) # ฿ -> บาท
|
text = re.sub(r"\u0E3F", "บาท", text) # ฿
|
||||||
|
|
||||||
|
# Temperature/Angle symbols mapping
|
||||||
|
text = re.sub(r"\u00B0\u0043", "องศาเซลเซียส", text) # °C
|
||||||
|
text = re.sub(r"\u00B0\u0046", "องศาฟาเรนไฮต์", text) # °F
|
||||||
|
text = re.sub(r"\u00B0", "องศา", text) # °
|
||||||
|
|
||||||
# Remove blank symbols
|
# Remove blank symbols
|
||||||
text = re.sub(r"\s", "", utt)
|
text = re.sub(r"\s", "", utt)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user