From 5fb9730a61e9c83f8676a68adb97c4da37a946c5 Mon Sep 17 00:00:00 2001 From: zr_jin Date: Thu, 7 Sep 2023 15:09:02 +0800 Subject: [PATCH] Update egs/multi_zh-hans/ASR/local/preprocess_kespeech.py Co-authored-by: Fangjun Kuang --- egs/multi_zh-hans/ASR/local/preprocess_kespeech.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/egs/multi_zh-hans/ASR/local/preprocess_kespeech.py b/egs/multi_zh-hans/ASR/local/preprocess_kespeech.py index 79b3a14dc..20274263f 100755 --- a/egs/multi_zh-hans/ASR/local/preprocess_kespeech.py +++ b/egs/multi_zh-hans/ASR/local/preprocess_kespeech.py @@ -95,14 +95,12 @@ def preprocess_kespeech(speed_perturb: bool = False): m["supervisions"] = m["supervisions"].filter(has_no_oov) logging.info(f"Normalizing text in {partition}") for sup in m["supervisions"]: - text = str(sup.text) - orig_text = text + orig_text = sup.text sup.text = normalize_text(sup.text) - text = str(sup.text) - if len(orig_text) != len(text) and logging_count < logging_threshold: + if logging_count < logging_threshold and len(orig_text) != len(sup.text): logging_count += 1 logging.info( - f"\nOriginal text vs normalized text:\n{orig_text}\n{text}" + f"\nOriginal text vs normalized text:\n{orig_text}\n{sup.text}" ) # Create long-recording cut manifests.