diff --git a/egs/commonvoice/ASR/local/compute_fbank_commonvoice_splits.py b/egs/commonvoice/ASR/local/compute_fbank_commonvoice_splits.py index f31b45aa5..68c72200f 100755 --- a/egs/commonvoice/ASR/local/compute_fbank_commonvoice_splits.py +++ b/egs/commonvoice/ASR/local/compute_fbank_commonvoice_splits.py @@ -17,7 +17,6 @@ import argparse import logging -from datetime import datetime from pathlib import Path import torch @@ -30,6 +29,8 @@ from lhotse import ( set_caching_enabled, ) +from icefall.utils import str2bool + # Torch's multithreaded behavior needs to be disabled or # it wastes a lot of CPU and slow things down. # Do this outside of main() in case it needs to take effect @@ -83,6 +84,13 @@ def get_args(): help="Stop processing pieces until this number (exclusive).", ) + parser.add_argument( + "--perturb-speed", + type=str2bool, + default=False, + help="""Perturb speed with factor 0.9 and 1.1 on train subset.""", + ) + return parser.parse_args() @@ -130,6 +138,10 @@ def compute_fbank_commonvoice_splits(args): keep_overlapping=False, min_duration=None ) + if args.perturb_speed: + logging.info(f"Doing speed perturb") + cut_set = cut_set + cut_set.perturb_speed(0.9) + cut_set.perturb_speed(1.1) + logging.info("Computing features") cut_set = cut_set.compute_and_store_features_batch( extractor=extractor, diff --git a/egs/commonvoice/ASR/prepare.sh b/egs/commonvoice/ASR/prepare.sh index dcd09c90b..f01ae5b12 100755 --- a/egs/commonvoice/ASR/prepare.sh +++ b/egs/commonvoice/ASR/prepare.sh @@ -38,6 +38,7 @@ num_splits=1000 dl_dir=$PWD/download release=cv-corpus-12.0-2022-12-07 lang=fr +perturb_speed=false . shared/parse_options.sh || exit 1 @@ -149,7 +150,8 @@ if [ $stage -le 6 ] && [ $stop_stage -ge 6 ]; then --batch-duration 200 \ --start 0 \ --num-splits $num_splits \ - --language $lang + --language $lang \ + --perturb-speed $perturb_speed touch data/${lang}/fbank/.cv-${lang}_train.done fi fi