diff --git a/egs/librispeech/ASR/.prepare_vox.sh.swp b/egs/librispeech/ASR/.prepare_vox.sh.swp index 7fd48707a..36108be5a 100644 Binary files a/egs/librispeech/ASR/.prepare_vox.sh.swp and b/egs/librispeech/ASR/.prepare_vox.sh.swp differ diff --git a/egs/librispeech/ASR/prepare_vox.sh b/egs/librispeech/ASR/prepare_vox.sh index 49fdf831f..619e4d37a 100755 --- a/egs/librispeech/ASR/prepare_vox.sh +++ b/egs/librispeech/ASR/prepare_vox.sh @@ -165,3 +165,64 @@ if [ $stage -le 5 ] && [ $stop_stage -ge 5 ]; then done fi +if [ $stage -le 6 ] && [ $stop_stage -ge 6 ]; then + log "Stage 1: Prepare LJSpeech manifest" + # We assume that you have downloaded the LJSpeech corpus (ver 1.1) + # You need to prepare LJSpeech according to data_settings/*_list.txt like below + # $dl_dir/LJSpeech + # |-- wavs + # | |-- train + # | |-- dev + # | |-- test + # |-- texts + # |-- metadata.csv + + # to $dl_dir/LJSpeech + if [ ! -e $dl_dir/vox/.vox.done ]; then + #for dset in "4446"; do + # log "Resampling vox/$dset set" + # file_list=`ls $dl_dir/vox/$dset/` + # for wavfile in $file_list; do + # echo $wavfile + # sox -v 0.9 $dl_dir/vox/$dset/$wavfile -r 16000 -e signed-integer $dl_dir/vox/$dset/tmp_$wavfile + # mv $dl_dir/vox/$dset/tmp_$wavfile $dl_dir/vox/$dset/$wavfile + # done + # log "Resampling $dset done" + #done + + mkdir -p data/manifests + if [ ! -e data/manifests/.vox.done ]; then + for dest in "test-clean" "test-other"; do + for spk in $dl_dir/$dest/*; do + spk_id=${spk#*$dest\/} + python local/prepare_vox.py $dl_dir/$dest "$spk_id" + done + done + #touch data/manifests/.vox.done + fi +fi + +if [ $stage -le 7 ] && [ $stop_stage -ge 7 ]; then + log "Stage 7: Prepare musan manifest" + # We assume that you have downloaded the musan corpus + # to data/musan + mkdir -p data/manifests + if [ ! -e data/manifests/.musan.done ]; then + lhotse prepare musan $dl_dir/musan data/manifests + touch data/manifests/.musan.done + fi +fi + +if [ $stage -le 3 ] && [ $stop_stage -ge 3 ]; then + log "Stage 3: Compute fbank for Vox" + mkdir -p data/fbank + if [ ! -e data/fbank/.LJSpeech.done ]; then + for dest in "test-clean" "test-other"; do + for spk in $dl_dir/$dest/*; do + spk_id=${spk#*$dest\/} + ./local/compute_fbank_vox.py --data-dir $spk --spk-id $spk_id + done + done + #touch data/fbank/.vox.done + fi +