misc. fix

This commit is contained in:
jinzr 2024-03-15 09:49:28 +08:00
parent 7d01eb46db
commit d77b03517f

View File

@ -170,7 +170,7 @@ def preprocess_commonvoice(
The 'validated' partition contains the data of both 'train', 'dev'
and 'test' partitions. We filter out the 'dev' and 'test' partition
here.
"""
"""
)
dev_ids = src_dir / f"cv-{language}_dev_ids"
test_ids = src_dir / f"cv-{language}_test_ids"
@ -182,7 +182,9 @@ def preprocess_commonvoice(
), f"{test_ids} does not exist, please check stage 1 of the prepare.sh"
dev_ids = dev_ids.read_text().strip().split("\n")
test_ids = test_ids.read_text().strip().split("\n")
cut_set = cut_set.filter(lambda x: x.id not in dev_ids + test_ids)
cut_set = cut_set.filter(
lambda x: x.supervisions[0].id not in dev_ids + test_ids
)
# Run data augmentation that needs to be done in the
# time domain.