Minor bug fixes and descriptive text for the LibriCSS recipe (#1268)

This commit is contained in:
zr_jin 2023-10-12 22:02:49 +08:00 committed by GitHub
parent 162ceaf4b3
commit eeeeef390b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -90,6 +90,14 @@ if [ $stage -le 1 ] && [ $stop_stage -ge 1 ]; then
# NOTE: Alignments are required for this recipe.
mkdir -p data/manifests
log "This recipe uses mfa alignment for trimming"
if [ ! -d $dl_dir/libri_alignments/LibriSpeech ]; then
log "No alignment provided. please refer to ../../librispeech/ASR/add_alignments.sh \n \
for mfa alignments. Once you have downloaded and unzipped the .zip file containing \n \
all alignments, the folder should be renamed to libri_alignments and moved to your $dl_dir ."
exit 0
fi
lhotse prepare librispeech -p train-clean-100 -p train-clean-360 -p train-other-500 -p dev-clean \
-j 4 --alignments-dir $dl_dir/libri_alignments/LibriSpeech $dl_dir/librispeech data/manifests/
fi
@ -118,9 +126,12 @@ fi
if [ $stage -le 4 ] && [ $stop_stage -ge 4 ]; then
log "Stage 4: Extract features for LibriSpeech, trim to alignments, and shuffle the cuts"
python local/compute_fbank_librispeech.py
lhotse combine data/manifests/librispeech_cuts_train* - |\
lhotse cut trim-to-alignments --type word --max-pause 0.2 - - |\
# python local/compute_fbank_librispeech.py
lhotse combine data/manifests/librispeech_cuts_train* data/manifests/librispeech_cuts_train_all.jsonl.gz
lhotse cut trim-to-alignments --type word --max-pause 0.2 \
data/manifests/librispeech_cuts_train_all.jsonl.gz \
data/manifests/librispeech_cuts_train_all_trimmed.jsonl.gz
cat <(gunzip -c data/manifests/librispeech_cuts_train_all_trimmed.jsonl.gz) | \
shuf | gzip -c > data/manifests/librispeech_cuts_train_trimmed.jsonl.gz
fi
@ -152,7 +163,7 @@ if [ $stage -le 5 ] && [ $stop_stage -ge 5 ]; then
data/manifests/lsmix_cuts_train_clean_ov40.jsonl.gz
# Full training set (2,3 speakers) anechoic
log "Generating anechoic ${part} set (full)"
log "Generating anechoic set (full)"
lhotse workflows simulate-meetings \
--method conversational \
--fit-to-supervisions data/manifests/libricss-sdm_supervisions_all_v1.jsonl.gz \