Merge branch 'dev_multi_zh-hans' of https://github.com/JinZr/icefall into dev_multi_zh-hans

This commit is contained in:
jinzr 2023-09-07 15:12:11 +08:00
commit 14328f0995
2 changed files with 4 additions and 6 deletions

View File

@ -72,7 +72,7 @@ def get_parser():
"--num-splits",
type=int,
required=True,
help="The number of splits of the L subset",
help="The number of splits of the given subset",
)
parser.add_argument(

View File

@ -95,14 +95,12 @@ def preprocess_kespeech(speed_perturb: bool = False):
m["supervisions"] = m["supervisions"].filter(has_no_oov)
logging.info(f"Normalizing text in {partition}")
for sup in m["supervisions"]:
text = str(sup.text)
orig_text = text
orig_text = sup.text
sup.text = normalize_text(sup.text)
text = str(sup.text)
if len(orig_text) != len(text) and logging_count < logging_threshold:
if logging_count < logging_threshold and len(orig_text) != len(sup.text):
logging_count += 1
logging.info(
f"\nOriginal text vs normalized text:\n{orig_text}\n{text}"
f"\nOriginal text vs normalized text:\n{orig_text}\n{sup.text}"
)
# Create long-recording cut manifests.