mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-12-11 06:55:27 +00:00
limit normalize log
This commit is contained in:
parent
787a461dfc
commit
44cb7c7967
@ -85,15 +85,17 @@ def preprocess_peoples_speech(dataset: Optional[str] = None):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
logging.info(f"Normalizing text in {partition}")
|
logging.info(f"Normalizing text in {partition}")
|
||||||
|
i = 0
|
||||||
for sup in m["supervisions"]:
|
for sup in m["supervisions"]:
|
||||||
text = str(sup.text)
|
text = str(sup.text)
|
||||||
orig_text = text
|
orig_text = text
|
||||||
sup.text = normalize_text(sup.text)
|
sup.text = normalize_text(sup.text)
|
||||||
text = str(sup.text)
|
text = str(sup.text)
|
||||||
if len(orig_text) != len(text):
|
if i < 10 and len(orig_text) != len(text):
|
||||||
logging.info(
|
logging.info(
|
||||||
f"\nOriginal text vs normalized text:\n{orig_text}\n{text}"
|
f"\nOriginal text vs normalized text:\n{orig_text}\n{text}"
|
||||||
)
|
)
|
||||||
|
i += 1
|
||||||
|
|
||||||
# Create long-recording cut manifests.
|
# Create long-recording cut manifests.
|
||||||
cut_set = CutSet.from_manifests(
|
cut_set = CutSet.from_manifests(
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user