From be00b3df2c04a81e216eb582a0ab89afb37d855c Mon Sep 17 00:00:00 2001 From: sathvik udupa Date: Wed, 19 Apr 2023 18:56:26 +0530 Subject: [PATCH] modify cut json with trim supervision --- egs/mucs/ASR/local/compute_fbank_mucs.py | 5 +++++ egs/mucs/ASR/prepare.sh | 8 ++++---- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/egs/mucs/ASR/local/compute_fbank_mucs.py b/egs/mucs/ASR/local/compute_fbank_mucs.py index ef2ddeb46..fa486cd74 100755 --- a/egs/mucs/ASR/local/compute_fbank_mucs.py +++ b/egs/mucs/ASR/local/compute_fbank_mucs.py @@ -104,6 +104,8 @@ def compute_fbank_mucs( with get_executor() as ex: # Initialize the executor only once. for partition, m in manifests.items(): + # print(m["recordings"]) + # exit() cuts_filename = f"{prefix}_cuts_{partition}.{suffix}" if (output_dir / cuts_filename).is_file(): logging.info(f"{partition} already exists - skipping.") @@ -128,6 +130,9 @@ def compute_fbank_mucs( executor=ex, storage_type=LilcomChunkyWriter, ) + cut_set = cut_set.trim_to_supervisions( + keep_overlapping=False, min_duration=None + ) cut_set.to_file(output_dir / cuts_filename) diff --git a/egs/mucs/ASR/prepare.sh b/egs/mucs/ASR/prepare.sh index b8a09e39e..37ed117c6 100755 --- a/egs/mucs/ASR/prepare.sh +++ b/egs/mucs/ASR/prepare.sh @@ -6,8 +6,8 @@ export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python set -eou pipefail nj=60 -stage=8 -stop_stage=8 +stage=3 +stop_stage=3 # We assume dl_dir (download dir) contains the following # directories and files. If not, they will be downloaded @@ -97,12 +97,12 @@ if [ $stage -le 3 ] && [ $stop_stage -ge 3 ]; then touch data/fbank/.mucs.done fi - + # exit if [ ! -e data/fbank/.mucs-validated.done ]; then log "Validating data/fbank for mucs" parts=( - train + train, test ) for part in ${parts[@]}; do