mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-09-13 19:14:20 +00:00
Merge branch 'dev_multi_zh-hans' of https://github.com/JinZr/icefall into dev_multi_zh-hans
This commit is contained in:
commit
14328f0995
@ -72,7 +72,7 @@ def get_parser():
|
||||
"--num-splits",
|
||||
type=int,
|
||||
required=True,
|
||||
help="The number of splits of the L subset",
|
||||
help="The number of splits of the given subset",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
|
@ -95,14 +95,12 @@ def preprocess_kespeech(speed_perturb: bool = False):
|
||||
m["supervisions"] = m["supervisions"].filter(has_no_oov)
|
||||
logging.info(f"Normalizing text in {partition}")
|
||||
for sup in m["supervisions"]:
|
||||
text = str(sup.text)
|
||||
orig_text = text
|
||||
orig_text = sup.text
|
||||
sup.text = normalize_text(sup.text)
|
||||
text = str(sup.text)
|
||||
if len(orig_text) != len(text) and logging_count < logging_threshold:
|
||||
if logging_count < logging_threshold and len(orig_text) != len(sup.text):
|
||||
logging_count += 1
|
||||
logging.info(
|
||||
f"\nOriginal text vs normalized text:\n{orig_text}\n{text}"
|
||||
f"\nOriginal text vs normalized text:\n{orig_text}\n{sup.text}"
|
||||
)
|
||||
|
||||
# Create long-recording cut manifests.
|
||||
|
Loading…
x
Reference in New Issue
Block a user