mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-12-11 06:55:27 +00:00
Update preprocess_kespeech.py
This commit is contained in:
parent
40a7c17a3c
commit
ecffd6ce93
@ -78,6 +78,8 @@ def preprocess_kespeech(speed_perturb: bool = False):
|
||||
list(manifests.keys()),
|
||||
dataset_parts,
|
||||
)
|
||||
logging_threshold = 50
|
||||
logging_count = 0
|
||||
|
||||
for partition, m in manifests.items():
|
||||
logging.info(f"Processing {partition}")
|
||||
@ -97,7 +99,8 @@ def preprocess_kespeech(speed_perturb: bool = False):
|
||||
orig_text = text
|
||||
sup.text = normalize_text(sup.text)
|
||||
text = str(sup.text)
|
||||
if len(orig_text) != len(text):
|
||||
if len(orig_text) != len(text) and logging_count < logging_threshold:
|
||||
logging_count += 1
|
||||
logging.info(
|
||||
f"\nOriginal text vs normalized text:\n{orig_text}\n{text}"
|
||||
)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user