From 92e125b64a2fef4ff014e56d8da2d8c92cb5b3ff Mon Sep 17 00:00:00 2001 From: Desh Raj Date: Thu, 9 Mar 2023 16:52:34 -0500 Subject: [PATCH] decode SURT with libricss --- .../asr_datamodule.py | 4 +-- .../decode_libricss.py | 30 ++++++++-------- .../SURT/local/compute_fbank_librimix.py | 36 +++++++++---------- egs/libricss/SURT/prepare.sh | 15 ++++---- 4 files changed, 41 insertions(+), 44 deletions(-) diff --git a/egs/libricss/SURT/dprnn_pruned_transducer_stateless7/asr_datamodule.py b/egs/libricss/SURT/dprnn_pruned_transducer_stateless7/asr_datamodule.py index a824cda45..f6f56cc6f 100644 --- a/egs/libricss/SURT/dprnn_pruned_transducer_stateless7/asr_datamodule.py +++ b/egs/libricss/SURT/dprnn_pruned_transducer_stateless7/asr_datamodule.py @@ -336,7 +336,7 @@ class LibrimixAsrDataModule: logging.info("About to get train cuts") rvb_affix = "_rvb" if reverberated else "_norvb" cs = load_manifest_lazy( - self.args.manifest_dir / f"cuts_train{rvb_affix}.jsonl.gz" + self.args.manifest_dir / f"cuts_train{rvb_affix}_v1.jsonl.gz" ) # Trim to supervision groups cs = cs.trim_to_supervision_groups(max_pause=1.0) @@ -348,7 +348,7 @@ class LibrimixAsrDataModule: logging.info("About to get dev cuts") rvb_affix = "_rvb" if reverberated else "_norvb" cs = load_manifest_lazy( - self.args.manifest_dir / f"cuts_dev{rvb_affix}.jsonl.gz" + self.args.manifest_dir / f"cuts_dev{rvb_affix}_v1.jsonl.gz" ) cs = cs.filter(lambda c: c.duration >= 0.1) return cs diff --git a/egs/libricss/SURT/dprnn_pruned_transducer_stateless7/decode_libricss.py b/egs/libricss/SURT/dprnn_pruned_transducer_stateless7/decode_libricss.py index d797c8f61..17a7ac8a6 100755 --- a/egs/libricss/SURT/dprnn_pruned_transducer_stateless7/decode_libricss.py +++ b/egs/libricss/SURT/dprnn_pruned_transducer_stateless7/decode_libricss.py @@ -760,22 +760,22 @@ def main(): # masks=masks, # ) - for test_set, ol in zip(test_cuts_grouped, OVERLAP_RATIOS): - test_dl = librimix.test_dataloaders(test_set) - results_dict = decode_dataset( - dl=test_dl, - params=params, - model=model, - sp=sp, - word_table=word_table, - decoding_graph=decoding_graph, - ) + # for test_set, ol in zip(test_cuts_grouped, OVERLAP_RATIOS): + # test_dl = librimix.test_dataloaders(test_set) + # results_dict = decode_dataset( + # dl=test_dl, + # params=params, + # model=model, + # sp=sp, + # word_table=word_table, + # decoding_graph=decoding_graph, + # ) - save_results( - params=params, - test_set_name=f"test_{ol}", - results_dict=results_dict, - ) + # save_results( + # params=params, + # test_set_name=f"test_{ol}", + # results_dict=results_dict, + # ) # if params.save_masks: # save_masks( diff --git a/egs/libricss/SURT/local/compute_fbank_librimix.py b/egs/libricss/SURT/local/compute_fbank_librimix.py index 9eca365ef..aeed3c25b 100755 --- a/egs/libricss/SURT/local/compute_fbank_librimix.py +++ b/egs/libricss/SURT/local/compute_fbank_librimix.py @@ -63,7 +63,7 @@ def compute_fbank_librimix(): logging.info("Reading manifests") manifests = read_manifests_if_cached( - dataset_parts=["train_norvb", "dev_norvb", "train_2spk_norvb"], + dataset_parts=["train_norvb_v1", "dev_norvb_v1"], types=["cuts"], output_dir=src_dir, prefix="libri-mix", @@ -71,15 +71,15 @@ def compute_fbank_librimix(): lazy=True, ) - train_cuts = manifests["train_norvb"]["cuts"] - dev_cuts = manifests["dev_norvb"]["cuts"] - train_2spk_cuts = manifests["train_2spk_norvb"]["cuts"] + train_cuts = manifests["train_norvb_v1"]["cuts"] + dev_cuts = manifests["dev_norvb_v1"]["cuts"] + # train_2spk_cuts = manifests["train_2spk_norvb"]["cuts"] logging.info("Extracting fbank features for training cuts") _ = train_cuts.compute_and_store_features_batch( extractor=extractor, - storage_path=output_dir / "librimix_feats_train_norvb", - manifest_path=src_dir / "cuts_train_norvb.jsonl.gz", + storage_path=output_dir / "librimix_feats_train_norvb_v1", + manifest_path=src_dir / "cuts_train_norvb_v1.jsonl.gz", batch_duration=5000, num_workers=4, storage_type=LilcomChunkyWriter, @@ -89,24 +89,24 @@ def compute_fbank_librimix(): logging.info("Extracting fbank features for dev cuts") _ = dev_cuts.compute_and_store_features_batch( extractor=extractor, - storage_path=output_dir / "librimix_feats_dev_norvb", - manifest_path=src_dir / "cuts_dev_norvb.jsonl.gz", + storage_path=output_dir / "librimix_feats_dev_norvb_v1", + manifest_path=src_dir / "cuts_dev_norvb_v1.jsonl.gz", batch_duration=5000, num_workers=4, storage_type=LilcomChunkyWriter, overwrite=True, ) - logging.info("Extracting fbank features for 2-spk train cuts") - _ = train_2spk_cuts.compute_and_store_features_batch( - extractor=extractor, - storage_path=output_dir / "librimix_feats_train_2spk_norvb", - manifest_path=src_dir / "cuts_train_2spk_norvb.jsonl.gz", - batch_duration=5000, - num_workers=4, - storage_type=LilcomChunkyWriter, - overwrite=True, - ) + # logging.info("Extracting fbank features for 2-spk train cuts") + # _ = train_2spk_cuts.compute_and_store_features_batch( + # extractor=extractor, + # storage_path=output_dir / "librimix_feats_train_2spk_norvb", + # manifest_path=src_dir / "cuts_train_2spk_norvb.jsonl.gz", + # batch_duration=5000, + # num_workers=4, + # storage_type=LilcomChunkyWriter, + # overwrite=True, + # ) if __name__ == "__main__": diff --git a/egs/libricss/SURT/prepare.sh b/egs/libricss/SURT/prepare.sh index 482b18a21..192ccd6b9 100755 --- a/egs/libricss/SURT/prepare.sh +++ b/egs/libricss/SURT/prepare.sh @@ -121,8 +121,8 @@ if [ $stage -le 5 ] && [ $stop_stage -ge 5 ]; then sim_cmd="queue.pl --mem 16G -l 'num_proc=4,h_rt=600:00:00'" gunzip -c data/manifests/libricss-sdm_supervisions_all.jsonl.gz |\ - grep -v "0L" | grep -v "OV10" | grep -v "OV20" |\ - gzip -c > data/manifests/libricss-sdm_supervisions_all_v2.jsonl.gz + grep -v "0L" | grep -v "OV10" |\ + gzip -c > data/manifests/libricss-sdm_supervisions_all_v1.jsonl.gz # 2-speaker anechoic # log "Generating 2-speaker anechoic training set" @@ -154,7 +154,7 @@ if [ $stage -le 5 ] && [ $stop_stage -ge 5 ]; then # data/manifests/libri-mix_cuts_train_2spk_rvb.jsonl.gz # Full training set (2,3 speakers) anechoic - for part in train; do + for part in dev train; do if [ $part == "dev" ]; then num_jobs=1 else @@ -163,18 +163,15 @@ if [ $stage -le 5 ] && [ $stop_stage -ge 5 ]; then log "Generating anechoic ${part} set (full)" $sim_cmd exp/sim_${part}.log lhotse workflows simulate-meetings \ --method conversational \ + --fit-to-supervisions data/manifests/libricss-sdm_supervisions_all_v1.jsonl.gz \ --num-repeats 1 \ - --same-spk-pause 0.5 \ - --diff-spk-pause 0.5 \ - --diff-spk-overlap 2 \ - --prob-diff-spk-overlap 0.75 \ --num-speakers-per-meeting 2,3 \ --max-duration-per-speaker 15.0 \ --max-utterances-per-speaker 3 \ --seed 1234 \ --num-jobs ${num_jobs} \ data/manifests/librispeech_cuts_${part}_trimmed.jsonl.gz \ - data/manifests/libri-mix_cuts_${part}_norvb.jsonl.gz + data/manifests/libri-mix_cuts_${part}_norvb_v1.jsonl.gz done # Full training set (2,3,4 speakers) reverberant @@ -202,7 +199,7 @@ fi if [ $stage -le 7 ] && [ $stop_stage -ge 7 ]; then log "Stage 7: Compute fbank features for simulated Libri-mix" mkdir -p data/fbank - $cmd exp/feats_librimix_norvb.log python local/compute_fbank_librimix.py + $cmd exp/feats_librimix_norvb_v1.log python local/compute_fbank_librimix.py fi if [ $stage -le 8 ] && [ $stop_stage -ge 8 ]; then