This commit is contained in:
jinzr 2024-03-12 12:24:24 +08:00
parent 4cae6b6c9a
commit 9820bf92f6
2 changed files with 16 additions and 2 deletions

View File

@ -17,7 +17,6 @@
import argparse
import logging
from datetime import datetime
from pathlib import Path
import torch
@ -30,6 +29,8 @@ from lhotse import (
set_caching_enabled,
)
from icefall.utils import str2bool
# Torch's multithreaded behavior needs to be disabled or
# it wastes a lot of CPU and slow things down.
# Do this outside of main() in case it needs to take effect
@ -83,6 +84,13 @@ def get_args():
help="Stop processing pieces until this number (exclusive).",
)
parser.add_argument(
"--perturb-speed",
type=str2bool,
default=False,
help="""Perturb speed with factor 0.9 and 1.1 on train subset.""",
)
return parser.parse_args()
@ -130,6 +138,10 @@ def compute_fbank_commonvoice_splits(args):
keep_overlapping=False, min_duration=None
)
if args.perturb_speed:
logging.info(f"Doing speed perturb")
cut_set = cut_set + cut_set.perturb_speed(0.9) + cut_set.perturb_speed(1.1)
logging.info("Computing features")
cut_set = cut_set.compute_and_store_features_batch(
extractor=extractor,

View File

@ -38,6 +38,7 @@ num_splits=1000
dl_dir=$PWD/download
release=cv-corpus-12.0-2022-12-07
lang=fr
perturb_speed=false
. shared/parse_options.sh || exit 1
@ -149,7 +150,8 @@ if [ $stage -le 6 ] && [ $stop_stage -ge 6 ]; then
--batch-duration 200 \
--start 0 \
--num-splits $num_splits \
--language $lang
--language $lang \
--perturb-speed $perturb_speed
touch data/${lang}/fbank/.cv-${lang}_train.done
fi
fi