modify cut json with trim supervision

This commit is contained in:
sathvik udupa 2023-04-19 18:56:26 +05:30
parent 26d376d68a
commit be00b3df2c
2 changed files with 9 additions and 4 deletions

View File

@ -104,6 +104,8 @@ def compute_fbank_mucs(
with get_executor() as ex: # Initialize the executor only once. with get_executor() as ex: # Initialize the executor only once.
for partition, m in manifests.items(): for partition, m in manifests.items():
# print(m["recordings"])
# exit()
cuts_filename = f"{prefix}_cuts_{partition}.{suffix}" cuts_filename = f"{prefix}_cuts_{partition}.{suffix}"
if (output_dir / cuts_filename).is_file(): if (output_dir / cuts_filename).is_file():
logging.info(f"{partition} already exists - skipping.") logging.info(f"{partition} already exists - skipping.")
@ -128,6 +130,9 @@ def compute_fbank_mucs(
executor=ex, executor=ex,
storage_type=LilcomChunkyWriter, storage_type=LilcomChunkyWriter,
) )
cut_set = cut_set.trim_to_supervisions(
keep_overlapping=False, min_duration=None
)
cut_set.to_file(output_dir / cuts_filename) cut_set.to_file(output_dir / cuts_filename)

View File

@ -6,8 +6,8 @@ export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
set -eou pipefail set -eou pipefail
nj=60 nj=60
stage=8 stage=3
stop_stage=8 stop_stage=3
# We assume dl_dir (download dir) contains the following # We assume dl_dir (download dir) contains the following
# directories and files. If not, they will be downloaded # directories and files. If not, they will be downloaded
@ -97,12 +97,12 @@ if [ $stage -le 3 ] && [ $stop_stage -ge 3 ]; then
touch data/fbank/.mucs.done touch data/fbank/.mucs.done
fi fi
# exit
if [ ! -e data/fbank/.mucs-validated.done ]; then if [ ! -e data/fbank/.mucs-validated.done ]; then
log "Validating data/fbank for mucs" log "Validating data/fbank for mucs"
parts=( parts=(
train train,
test test
) )
for part in ${parts[@]}; do for part in ${parts[@]}; do