From 44cb7c796786e62a13e937b1a5901429b8611ae8 Mon Sep 17 00:00:00 2001 From: Yifan Yang Date: Wed, 31 May 2023 12:07:45 +0800 Subject: [PATCH] limit normalize log --- egs/peoples_speech/ASR/local/preprocess_peoples_speech.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/egs/peoples_speech/ASR/local/preprocess_peoples_speech.py b/egs/peoples_speech/ASR/local/preprocess_peoples_speech.py index 53addfbd2..c5417049f 100755 --- a/egs/peoples_speech/ASR/local/preprocess_peoples_speech.py +++ b/egs/peoples_speech/ASR/local/preprocess_peoples_speech.py @@ -85,15 +85,17 @@ def preprocess_peoples_speech(dataset: Optional[str] = None): continue logging.info(f"Normalizing text in {partition}") + i = 0 for sup in m["supervisions"]: text = str(sup.text) orig_text = text sup.text = normalize_text(sup.text) text = str(sup.text) - if len(orig_text) != len(text): + if i < 10 and len(orig_text) != len(text): logging.info( f"\nOriginal text vs normalized text:\n{orig_text}\n{text}" ) + i += 1 # Create long-recording cut manifests. cut_set = CutSet.from_manifests(