Update egs/multi_zh-hans/ASR/local/preprocess_kespeech.py

Co-authored-by: Fangjun Kuang <csukuangfj@gmail.com>
This commit is contained in:
zr_jin 2023-09-07 15:09:02 +08:00 committed by GitHub
parent 465ff40470
commit 5fb9730a61
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -95,14 +95,12 @@ def preprocess_kespeech(speed_perturb: bool = False):
m["supervisions"] = m["supervisions"].filter(has_no_oov)
logging.info(f"Normalizing text in {partition}")
for sup in m["supervisions"]:
text = str(sup.text)
orig_text = text
orig_text = sup.text
sup.text = normalize_text(sup.text)
text = str(sup.text)
if len(orig_text) != len(text) and logging_count < logging_threshold:
if logging_count < logging_threshold and len(orig_text) != len(sup.text):
logging_count += 1
logging.info(
f"\nOriginal text vs normalized text:\n{orig_text}\n{text}"
f"\nOriginal text vs normalized text:\n{orig_text}\n{sup.text}"
)
# Create long-recording cut manifests.