decode SURT with libricss

This commit is contained in:
Desh Raj 2023-03-09 16:52:34 -05:00
parent e9931b7896
commit 92e125b64a
4 changed files with 41 additions and 44 deletions

View File

@ -336,7 +336,7 @@ class LibrimixAsrDataModule:
logging.info("About to get train cuts") logging.info("About to get train cuts")
rvb_affix = "_rvb" if reverberated else "_norvb" rvb_affix = "_rvb" if reverberated else "_norvb"
cs = load_manifest_lazy( cs = load_manifest_lazy(
self.args.manifest_dir / f"cuts_train{rvb_affix}.jsonl.gz" self.args.manifest_dir / f"cuts_train{rvb_affix}_v1.jsonl.gz"
) )
# Trim to supervision groups # Trim to supervision groups
cs = cs.trim_to_supervision_groups(max_pause=1.0) cs = cs.trim_to_supervision_groups(max_pause=1.0)
@ -348,7 +348,7 @@ class LibrimixAsrDataModule:
logging.info("About to get dev cuts") logging.info("About to get dev cuts")
rvb_affix = "_rvb" if reverberated else "_norvb" rvb_affix = "_rvb" if reverberated else "_norvb"
cs = load_manifest_lazy( cs = load_manifest_lazy(
self.args.manifest_dir / f"cuts_dev{rvb_affix}.jsonl.gz" self.args.manifest_dir / f"cuts_dev{rvb_affix}_v1.jsonl.gz"
) )
cs = cs.filter(lambda c: c.duration >= 0.1) cs = cs.filter(lambda c: c.duration >= 0.1)
return cs return cs

View File

@ -760,22 +760,22 @@ def main():
# masks=masks, # masks=masks,
# ) # )
for test_set, ol in zip(test_cuts_grouped, OVERLAP_RATIOS): # for test_set, ol in zip(test_cuts_grouped, OVERLAP_RATIOS):
test_dl = librimix.test_dataloaders(test_set) # test_dl = librimix.test_dataloaders(test_set)
results_dict = decode_dataset( # results_dict = decode_dataset(
dl=test_dl, # dl=test_dl,
params=params, # params=params,
model=model, # model=model,
sp=sp, # sp=sp,
word_table=word_table, # word_table=word_table,
decoding_graph=decoding_graph, # decoding_graph=decoding_graph,
) # )
save_results( # save_results(
params=params, # params=params,
test_set_name=f"test_{ol}", # test_set_name=f"test_{ol}",
results_dict=results_dict, # results_dict=results_dict,
) # )
# if params.save_masks: # if params.save_masks:
# save_masks( # save_masks(

View File

@ -63,7 +63,7 @@ def compute_fbank_librimix():
logging.info("Reading manifests") logging.info("Reading manifests")
manifests = read_manifests_if_cached( manifests = read_manifests_if_cached(
dataset_parts=["train_norvb", "dev_norvb", "train_2spk_norvb"], dataset_parts=["train_norvb_v1", "dev_norvb_v1"],
types=["cuts"], types=["cuts"],
output_dir=src_dir, output_dir=src_dir,
prefix="libri-mix", prefix="libri-mix",
@ -71,15 +71,15 @@ def compute_fbank_librimix():
lazy=True, lazy=True,
) )
train_cuts = manifests["train_norvb"]["cuts"] train_cuts = manifests["train_norvb_v1"]["cuts"]
dev_cuts = manifests["dev_norvb"]["cuts"] dev_cuts = manifests["dev_norvb_v1"]["cuts"]
train_2spk_cuts = manifests["train_2spk_norvb"]["cuts"] # train_2spk_cuts = manifests["train_2spk_norvb"]["cuts"]
logging.info("Extracting fbank features for training cuts") logging.info("Extracting fbank features for training cuts")
_ = train_cuts.compute_and_store_features_batch( _ = train_cuts.compute_and_store_features_batch(
extractor=extractor, extractor=extractor,
storage_path=output_dir / "librimix_feats_train_norvb", storage_path=output_dir / "librimix_feats_train_norvb_v1",
manifest_path=src_dir / "cuts_train_norvb.jsonl.gz", manifest_path=src_dir / "cuts_train_norvb_v1.jsonl.gz",
batch_duration=5000, batch_duration=5000,
num_workers=4, num_workers=4,
storage_type=LilcomChunkyWriter, storage_type=LilcomChunkyWriter,
@ -89,24 +89,24 @@ def compute_fbank_librimix():
logging.info("Extracting fbank features for dev cuts") logging.info("Extracting fbank features for dev cuts")
_ = dev_cuts.compute_and_store_features_batch( _ = dev_cuts.compute_and_store_features_batch(
extractor=extractor, extractor=extractor,
storage_path=output_dir / "librimix_feats_dev_norvb", storage_path=output_dir / "librimix_feats_dev_norvb_v1",
manifest_path=src_dir / "cuts_dev_norvb.jsonl.gz", manifest_path=src_dir / "cuts_dev_norvb_v1.jsonl.gz",
batch_duration=5000, batch_duration=5000,
num_workers=4, num_workers=4,
storage_type=LilcomChunkyWriter, storage_type=LilcomChunkyWriter,
overwrite=True, overwrite=True,
) )
logging.info("Extracting fbank features for 2-spk train cuts") # logging.info("Extracting fbank features for 2-spk train cuts")
_ = train_2spk_cuts.compute_and_store_features_batch( # _ = train_2spk_cuts.compute_and_store_features_batch(
extractor=extractor, # extractor=extractor,
storage_path=output_dir / "librimix_feats_train_2spk_norvb", # storage_path=output_dir / "librimix_feats_train_2spk_norvb",
manifest_path=src_dir / "cuts_train_2spk_norvb.jsonl.gz", # manifest_path=src_dir / "cuts_train_2spk_norvb.jsonl.gz",
batch_duration=5000, # batch_duration=5000,
num_workers=4, # num_workers=4,
storage_type=LilcomChunkyWriter, # storage_type=LilcomChunkyWriter,
overwrite=True, # overwrite=True,
) # )
if __name__ == "__main__": if __name__ == "__main__":

View File

@ -121,8 +121,8 @@ if [ $stage -le 5 ] && [ $stop_stage -ge 5 ]; then
sim_cmd="queue.pl --mem 16G -l 'num_proc=4,h_rt=600:00:00'" sim_cmd="queue.pl --mem 16G -l 'num_proc=4,h_rt=600:00:00'"
gunzip -c data/manifests/libricss-sdm_supervisions_all.jsonl.gz |\ gunzip -c data/manifests/libricss-sdm_supervisions_all.jsonl.gz |\
grep -v "0L" | grep -v "OV10" | grep -v "OV20" |\ grep -v "0L" | grep -v "OV10" |\
gzip -c > data/manifests/libricss-sdm_supervisions_all_v2.jsonl.gz gzip -c > data/manifests/libricss-sdm_supervisions_all_v1.jsonl.gz
# 2-speaker anechoic # 2-speaker anechoic
# log "Generating 2-speaker anechoic training set" # log "Generating 2-speaker anechoic training set"
@ -154,7 +154,7 @@ if [ $stage -le 5 ] && [ $stop_stage -ge 5 ]; then
# data/manifests/libri-mix_cuts_train_2spk_rvb.jsonl.gz # data/manifests/libri-mix_cuts_train_2spk_rvb.jsonl.gz
# Full training set (2,3 speakers) anechoic # Full training set (2,3 speakers) anechoic
for part in train; do for part in dev train; do
if [ $part == "dev" ]; then if [ $part == "dev" ]; then
num_jobs=1 num_jobs=1
else else
@ -163,18 +163,15 @@ if [ $stage -le 5 ] && [ $stop_stage -ge 5 ]; then
log "Generating anechoic ${part} set (full)" log "Generating anechoic ${part} set (full)"
$sim_cmd exp/sim_${part}.log lhotse workflows simulate-meetings \ $sim_cmd exp/sim_${part}.log lhotse workflows simulate-meetings \
--method conversational \ --method conversational \
--fit-to-supervisions data/manifests/libricss-sdm_supervisions_all_v1.jsonl.gz \
--num-repeats 1 \ --num-repeats 1 \
--same-spk-pause 0.5 \
--diff-spk-pause 0.5 \
--diff-spk-overlap 2 \
--prob-diff-spk-overlap 0.75 \
--num-speakers-per-meeting 2,3 \ --num-speakers-per-meeting 2,3 \
--max-duration-per-speaker 15.0 \ --max-duration-per-speaker 15.0 \
--max-utterances-per-speaker 3 \ --max-utterances-per-speaker 3 \
--seed 1234 \ --seed 1234 \
--num-jobs ${num_jobs} \ --num-jobs ${num_jobs} \
data/manifests/librispeech_cuts_${part}_trimmed.jsonl.gz \ data/manifests/librispeech_cuts_${part}_trimmed.jsonl.gz \
data/manifests/libri-mix_cuts_${part}_norvb.jsonl.gz data/manifests/libri-mix_cuts_${part}_norvb_v1.jsonl.gz
done done
# Full training set (2,3,4 speakers) reverberant # Full training set (2,3,4 speakers) reverberant
@ -202,7 +199,7 @@ fi
if [ $stage -le 7 ] && [ $stop_stage -ge 7 ]; then if [ $stage -le 7 ] && [ $stop_stage -ge 7 ]; then
log "Stage 7: Compute fbank features for simulated Libri-mix" log "Stage 7: Compute fbank features for simulated Libri-mix"
mkdir -p data/fbank mkdir -p data/fbank
$cmd exp/feats_librimix_norvb.log python local/compute_fbank_librimix.py $cmd exp/feats_librimix_norvb_v1.log python local/compute_fbank_librimix.py
fi fi
if [ $stage -le 8 ] && [ $stop_stage -ge 8 ]; then if [ $stage -le 8 ] && [ $stop_stage -ge 8 ]; then