Update compute_fbank_commonvoice_splits.py (#1437)

This commit is contained in:
zr_jin 2023-12-30 13:03:26 +08:00 committed by GitHub
parent 140e6381ad
commit f42258caf8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -109,10 +109,10 @@ def compute_fbank_commonvoice_splits(args):
extractor = KaldifeatFbank(KaldifeatFbankConfig(device=device)) extractor = KaldifeatFbank(KaldifeatFbankConfig(device=device))
logging.info(f"device: {device}") logging.info(f"device: {device}")
set_audio_duration_mismatch_tolerance(0.01) # 10ms tolerance set_audio_duration_mismatch_tolerance(0.05) # 50ms tolerance
set_caching_enabled(False) set_caching_enabled(False)
for i in range(start, stop): for i in range(start, stop):
idx = f"{i + 1}".zfill(num_digits) idx = f"{i}".zfill(num_digits)
logging.info(f"Processing {idx}/{num_splits}") logging.info(f"Processing {idx}/{num_splits}")
cuts_path = output_dir / f"cv-{language}_cuts_{subset}.{idx}.jsonl.gz" cuts_path = output_dir / f"cv-{language}_cuts_{subset}.{idx}.jsonl.gz"