mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-09-09 09:04:19 +00:00
Update egs/multi_zh-hans/ASR/local/preprocess_kespeech.py
Co-authored-by: Fangjun Kuang <csukuangfj@gmail.com>
This commit is contained in:
parent
465ff40470
commit
5fb9730a61
@ -95,14 +95,12 @@ def preprocess_kespeech(speed_perturb: bool = False):
|
|||||||
m["supervisions"] = m["supervisions"].filter(has_no_oov)
|
m["supervisions"] = m["supervisions"].filter(has_no_oov)
|
||||||
logging.info(f"Normalizing text in {partition}")
|
logging.info(f"Normalizing text in {partition}")
|
||||||
for sup in m["supervisions"]:
|
for sup in m["supervisions"]:
|
||||||
text = str(sup.text)
|
orig_text = sup.text
|
||||||
orig_text = text
|
|
||||||
sup.text = normalize_text(sup.text)
|
sup.text = normalize_text(sup.text)
|
||||||
text = str(sup.text)
|
if logging_count < logging_threshold and len(orig_text) != len(sup.text):
|
||||||
if len(orig_text) != len(text) and logging_count < logging_threshold:
|
|
||||||
logging_count += 1
|
logging_count += 1
|
||||||
logging.info(
|
logging.info(
|
||||||
f"\nOriginal text vs normalized text:\n{orig_text}\n{text}"
|
f"\nOriginal text vs normalized text:\n{orig_text}\n{sup.text}"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Create long-recording cut manifests.
|
# Create long-recording cut manifests.
|
||||||
|
Loading…
x
Reference in New Issue
Block a user