mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-08-26 18:24:18 +00:00
Update preprocess_commonvoice.py
This commit is contained in:
parent
b30a4d6162
commit
eaceb691d8
@ -21,7 +21,7 @@ import re
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
from lhotse import CutSet, SupervisionSegment
|
from lhotse import CutSet
|
||||||
from lhotse.recipes.utils import read_manifests_if_cached
|
from lhotse.recipes.utils import read_manifests_if_cached
|
||||||
|
|
||||||
|
|
||||||
@ -82,6 +82,17 @@ def normalize_text(utt: str, language: str) -> str:
|
|||||||
.replace("…", "")
|
.replace("…", "")
|
||||||
.replace("⋯", "")
|
.replace("⋯", "")
|
||||||
.replace("·", "")
|
.replace("·", "")
|
||||||
|
.replace("﹒", "")
|
||||||
|
.replace(".", "")
|
||||||
|
.replace(":", "")
|
||||||
|
.replace("︰", "")
|
||||||
|
.replace("﹖", "")
|
||||||
|
.replace("(", "")
|
||||||
|
.replace(")", "")
|
||||||
|
.replace("-", "")
|
||||||
|
.replace("~", "")
|
||||||
|
.replace(";", "")
|
||||||
|
.replace("", "")
|
||||||
.upper()
|
.upper()
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user