mv split cuts before computing feature (#461)

This commit is contained in:
Mingshuang Luo 2022-07-04 11:59:37 +08:00 committed by GitHub
parent 10e8bc5b56
commit 8e0b7ea518
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 10 additions and 11 deletions

View File

@ -63,8 +63,12 @@ def compute_fbank_wenetspeech_dev_test():
logging.info(f"Loading {raw_cuts_path}") logging.info(f"Loading {raw_cuts_path}")
cut_set = CutSet.from_file(raw_cuts_path) cut_set = CutSet.from_file(raw_cuts_path)
logging.info("Computing features") logging.info("Splitting cuts into smaller chunks")
cut_set = cut_set.trim_to_supervisions(
keep_overlapping=False, min_duration=None
)
logging.info("Computing features")
cut_set = cut_set.compute_and_store_features_batch( cut_set = cut_set.compute_and_store_features_batch(
extractor=extractor, extractor=extractor,
storage_path=f"{in_out_dir}/feats_{partition}", storage_path=f"{in_out_dir}/feats_{partition}",
@ -72,9 +76,6 @@ def compute_fbank_wenetspeech_dev_test():
batch_duration=batch_duration, batch_duration=batch_duration,
storage_type=LilcomHdf5Writer, storage_type=LilcomHdf5Writer,
) )
cut_set = cut_set.trim_to_supervisions(
keep_overlapping=False, min_duration=None
)
logging.info(f"Saving to {cuts_path}") logging.info(f"Saving to {cuts_path}")
cut_set.to_file(cuts_path) cut_set.to_file(cuts_path)

View File

@ -128,8 +128,12 @@ def compute_fbank_wenetspeech_splits(args):
logging.info(f"Loading {raw_cuts_path}") logging.info(f"Loading {raw_cuts_path}")
cut_set = CutSet.from_file(raw_cuts_path) cut_set = CutSet.from_file(raw_cuts_path)
logging.info("Computing features") logging.info("Splitting cuts into smaller chunks.")
cut_set = cut_set.trim_to_supervisions(
keep_overlapping=False, min_duration=None
)
logging.info("Computing features")
cut_set = cut_set.compute_and_store_features_batch( cut_set = cut_set.compute_and_store_features_batch(
extractor=extractor, extractor=extractor,
storage_path=f"{output_dir}/feats_{subset}_{idx}", storage_path=f"{output_dir}/feats_{subset}_{idx}",
@ -138,14 +142,8 @@ def compute_fbank_wenetspeech_splits(args):
storage_type=LilcomChunkyWriter, storage_type=LilcomChunkyWriter,
) )
logging.info("About to split cuts into smaller chunks.")
cut_set = cut_set.trim_to_supervisions(
keep_overlapping=False, min_duration=None
)
logging.info(f"Saving to {cuts_path}") logging.info(f"Saving to {cuts_path}")
cut_set.to_file(cuts_path) cut_set.to_file(cuts_path)
logging.info(f"Saved to {cuts_path}")
def main(): def main():