minor updates

This commit is contained in:
zr_jin 2023-07-24 16:26:19 +08:00
parent e85a9359b9
commit 1e29c781fc
6 changed files with 26 additions and 44 deletions

View File

@ -85,13 +85,10 @@ def compute_fbank_aidatatang_200zh(num_mel_bins: int = 80, speed_perturb: bool =
recordings=m["recordings"],
supervisions=m["supervisions"],
)
if "train" in partition:
if speed_perturb:
cut_set = (
cut_set
+ cut_set.perturb_speed(0.9)
+ cut_set.perturb_speed(1.1)
)
if "train" in partition and speed_perturb:
cut_set = (
cut_set + cut_set.perturb_speed(0.9) + cut_set.perturb_speed(1.1)
)
cut_set = cut_set.compute_and_store_features(
extractor=extractor,
storage_path=f"{output_dir}/{prefix}_feats_{partition}",

View File

@ -81,13 +81,10 @@ def compute_fbank_aishell(num_mel_bins: int = 80, speed_perturb: bool = False):
recordings=m["recordings"],
supervisions=m["supervisions"],
)
if "train" in partition:
if speed_perturb:
cut_set = (
cut_set
+ cut_set.perturb_speed(0.9)
+ cut_set.perturb_speed(1.1)
)
if "train" in partition and speed_perturb:
cut_set = (
cut_set + cut_set.perturb_speed(0.9) + cut_set.perturb_speed(1.1)
)
cut_set = cut_set.compute_and_store_features(
extractor=extractor,
storage_path=f"{output_dir}/{prefix}_feats_{partition}",

View File

@ -81,13 +81,10 @@ def compute_fbank_aishell2(num_mel_bins: int = 80, speed_perturb: bool = False):
recordings=m["recordings"],
supervisions=m["supervisions"],
)
if "train" in partition:
if speed_perturb:
cut_set = (
cut_set
+ cut_set.perturb_speed(0.9)
+ cut_set.perturb_speed(1.1)
)
if "train" in partition and speed_perturb:
cut_set = (
cut_set + cut_set.perturb_speed(0.9) + cut_set.perturb_speed(1.1)
)
cut_set = cut_set.compute_and_store_features(
extractor=extractor,
storage_path=f"{output_dir}/{prefix}_feats_{partition}",

View File

@ -83,13 +83,10 @@ def compute_fbank_aishell4(num_mel_bins: int = 80, speed_perturb: bool = False):
recordings=m["recordings"],
supervisions=m["supervisions"],
)
if "train" in partition:
if speed_perturb:
cut_set = (
cut_set
+ cut_set.perturb_speed(0.9)
+ cut_set.perturb_speed(1.1)
)
if "train" in partition and speed_perturb:
cut_set = (
cut_set + cut_set.perturb_speed(0.9) + cut_set.perturb_speed(1.1)
)
cut_set = cut_set.compute_and_store_features(
extractor=extractor,

View File

@ -82,13 +82,10 @@ def compute_fbank_alimeeting(num_mel_bins: int = 80, speed_perturb: bool = False
recordings=m["recordings"],
supervisions=m["supervisions"],
)
if "train" in partition:
if speed_perturb:
cut_set = (
cut_set
+ cut_set.perturb_speed(0.9)
+ cut_set.perturb_speed(1.1)
)
if "train" in partition and speed_perturb:
cut_set = (
cut_set + cut_set.perturb_speed(0.9) + cut_set.perturb_speed(1.1)
)
cur_num_jobs = num_jobs if ex is None else 80
cur_num_jobs = min(cur_num_jobs, len(cut_set))

View File

@ -111,15 +111,12 @@ def preprocess_wenet_speech(speed_perturb: bool = False):
)
# Run data augmentation that needs to be done in the
# time domain.
if partition not in ["DEV", "TEST_NET", "TEST_MEETING"]:
if speed_perturb:
logging.info(
f"Speed perturb for {partition} with factors 0.9 and 1.1 "
"(Perturbing may take 8 minutes and saving may take 20 minutes)"
)
cut_set = (
cut_set + cut_set.perturb_speed(0.9) + cut_set.perturb_speed(1.1)
)
if partition not in ["DEV", "TEST_NET", "TEST_MEETING"] and speed_perturb:
logging.info(
f"Speed perturb for {partition} with factors 0.9 and 1.1 "
"(Perturbing may take 8 minutes and saving may take 20 minutes)"
)
cut_set = cut_set + cut_set.perturb_speed(0.9) + cut_set.perturb_speed(1.1)
logging.info(f"Saving to {raw_cuts_path}")
cut_set.to_file(raw_cuts_path)