From 8e0b7ea518a62b2b89789161af4a57bac6cda3f7 Mon Sep 17 00:00:00 2001 From: Mingshuang Luo <37799481+luomingshuang@users.noreply.github.com> Date: Mon, 4 Jul 2022 11:59:37 +0800 Subject: [PATCH] mv split cuts before computing feature (#461) --- .../ASR/local/compute_fbank_wenetspeech_dev_test.py | 9 +++++---- .../ASR/local/compute_fbank_wenetspeech_splits.py | 12 +++++------- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/egs/wenetspeech/ASR/local/compute_fbank_wenetspeech_dev_test.py b/egs/wenetspeech/ASR/local/compute_fbank_wenetspeech_dev_test.py index 7efcbd4fd..c10a84d21 100755 --- a/egs/wenetspeech/ASR/local/compute_fbank_wenetspeech_dev_test.py +++ b/egs/wenetspeech/ASR/local/compute_fbank_wenetspeech_dev_test.py @@ -63,8 +63,12 @@ def compute_fbank_wenetspeech_dev_test(): logging.info(f"Loading {raw_cuts_path}") cut_set = CutSet.from_file(raw_cuts_path) - logging.info("Computing features") + logging.info("Splitting cuts into smaller chunks") + cut_set = cut_set.trim_to_supervisions( + keep_overlapping=False, min_duration=None + ) + logging.info("Computing features") cut_set = cut_set.compute_and_store_features_batch( extractor=extractor, storage_path=f"{in_out_dir}/feats_{partition}", @@ -72,9 +76,6 @@ def compute_fbank_wenetspeech_dev_test(): batch_duration=batch_duration, storage_type=LilcomHdf5Writer, ) - cut_set = cut_set.trim_to_supervisions( - keep_overlapping=False, min_duration=None - ) logging.info(f"Saving to {cuts_path}") cut_set.to_file(cuts_path) diff --git a/egs/wenetspeech/ASR/local/compute_fbank_wenetspeech_splits.py b/egs/wenetspeech/ASR/local/compute_fbank_wenetspeech_splits.py index 4622bdb55..bf9a03b20 100755 --- a/egs/wenetspeech/ASR/local/compute_fbank_wenetspeech_splits.py +++ b/egs/wenetspeech/ASR/local/compute_fbank_wenetspeech_splits.py @@ -128,8 +128,12 @@ def compute_fbank_wenetspeech_splits(args): logging.info(f"Loading {raw_cuts_path}") cut_set = CutSet.from_file(raw_cuts_path) - logging.info("Computing features") + logging.info("Splitting cuts into smaller chunks.") + cut_set = cut_set.trim_to_supervisions( + keep_overlapping=False, min_duration=None + ) + logging.info("Computing features") cut_set = cut_set.compute_and_store_features_batch( extractor=extractor, storage_path=f"{output_dir}/feats_{subset}_{idx}", @@ -138,14 +142,8 @@ def compute_fbank_wenetspeech_splits(args): storage_type=LilcomChunkyWriter, ) - logging.info("About to split cuts into smaller chunks.") - cut_set = cut_set.trim_to_supervisions( - keep_overlapping=False, min_duration=None - ) - logging.info(f"Saving to {cuts_path}") cut_set.to_file(cuts_path) - logging.info(f"Saved to {cuts_path}") def main():