From d77b03517f6165baeaa6d2401e35ecdad2135b9d Mon Sep 17 00:00:00 2001 From: jinzr Date: Fri, 15 Mar 2024 09:49:28 +0800 Subject: [PATCH] misc. fix --- egs/commonvoice/ASR/local/preprocess_commonvoice.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/egs/commonvoice/ASR/local/preprocess_commonvoice.py b/egs/commonvoice/ASR/local/preprocess_commonvoice.py index 4137b2dd1..d41af4015 100755 --- a/egs/commonvoice/ASR/local/preprocess_commonvoice.py +++ b/egs/commonvoice/ASR/local/preprocess_commonvoice.py @@ -170,7 +170,7 @@ def preprocess_commonvoice( The 'validated' partition contains the data of both 'train', 'dev' and 'test' partitions. We filter out the 'dev' and 'test' partition here. - """ + """ ) dev_ids = src_dir / f"cv-{language}_dev_ids" test_ids = src_dir / f"cv-{language}_test_ids" @@ -182,7 +182,9 @@ def preprocess_commonvoice( ), f"{test_ids} does not exist, please check stage 1 of the prepare.sh" dev_ids = dev_ids.read_text().strip().split("\n") test_ids = test_ids.read_text().strip().split("\n") - cut_set = cut_set.filter(lambda x: x.id not in dev_ids + test_ids) + cut_set = cut_set.filter( + lambda x: x.supervisions[0].id not in dev_ids + test_ids + ) # Run data augmentation that needs to be done in the # time domain.