misc. fix

This commit is contained in:
jinzr 2024-03-15 09:49:28 +08:00
parent 7d01eb46db
commit d77b03517f

View File

@ -170,7 +170,7 @@ def preprocess_commonvoice(
The 'validated' partition contains the data of both 'train', 'dev' The 'validated' partition contains the data of both 'train', 'dev'
and 'test' partitions. We filter out the 'dev' and 'test' partition and 'test' partitions. We filter out the 'dev' and 'test' partition
here. here.
""" """
) )
dev_ids = src_dir / f"cv-{language}_dev_ids" dev_ids = src_dir / f"cv-{language}_dev_ids"
test_ids = src_dir / f"cv-{language}_test_ids" test_ids = src_dir / f"cv-{language}_test_ids"
@ -182,7 +182,9 @@ def preprocess_commonvoice(
), f"{test_ids} does not exist, please check stage 1 of the prepare.sh" ), f"{test_ids} does not exist, please check stage 1 of the prepare.sh"
dev_ids = dev_ids.read_text().strip().split("\n") dev_ids = dev_ids.read_text().strip().split("\n")
test_ids = test_ids.read_text().strip().split("\n") test_ids = test_ids.read_text().strip().split("\n")
cut_set = cut_set.filter(lambda x: x.id not in dev_ids + test_ids) cut_set = cut_set.filter(
lambda x: x.supervisions[0].id not in dev_ids + test_ids
)
# Run data augmentation that needs to be done in the # Run data augmentation that needs to be done in the
# time domain. # time domain.