Fixes after review.

This commit is contained in:
Fangjun Kuang 2021-07-20 00:14:24 +08:00
parent 0b19aa09c1
commit f25eedf2d4
4 changed files with 6 additions and 77 deletions

View File

@ -1,21 +0,0 @@
#!/usr/bin/env python3
"""
This file downloads the librispeech dataset
to the directory data/LibriSpeech.
It's compatible with kaldi's egs/librispeech/s5/local/download_and_untar.sh .
"""
from lhotse.recipes import download_librispeech
def download_data():
target_dir = "data"
download_librispeech(target_dir=target_dir, dataset_parts="librispeech")
if __name__ == "__main__":
download_data()

View File

@ -1,29 +0,0 @@
#!/usr/bin/env python3
"""
This file generates manifests for the librispeech dataset.
It expects the dataset is saved in data/LibriSpeech
and the generated manifests are saved in data/manifests.
"""
import os
from pathlib import Path
from lhotse.recipes import prepare_librispeech
def prepare_librispeech_mainfest():
corpus_dir = Path("data/LibriSpeech")
output_dir = Path("data/manifests")
num_jobs = min(15, os.cpu_count())
librispeech_manifests = prepare_librispeech(
corpus_dir=corpus_dir,
dataset_parts="auto",
output_dir=output_dir,
num_jobs=num_jobs,
)
if __name__ == "__main__":
prepare_librispeech_mainfest()

View File

@ -1,22 +0,0 @@
#!/usr/bin/env python3
"""
This file generates manifests for the musan dataset.
It expects the dataset is saved in data/musan
and the generated manifests are saved in data/manifests.
"""
from pathlib import Path
from lhotse.recipes import prepare_musan
def prepare_musan_mainfest():
corpus_dir = Path("data/musan")
output_dir = Path("data/manifests")
prepare_musan(corpus_dir=corpus_dir, output_dir=output_dir)
if __name__ == "__main__":
prepare_musan_mainfest()

View File

@ -2,6 +2,7 @@
set -eou pipefail
nj=15
stage=-1
stop_stage=100
@ -28,7 +29,7 @@ if [ $stage -le 0 ] && [ $stop_stage -ge 0 ]; then
if [ ! -f data/LibriSpeech/train-other-500/.completed ]; then
# It's compatible with kaldi's egs/librispeech/s5/local/download_and_untar.sh
./local/download_data.py
lhotse download librispeech --full data
fi
# If you have pre-downloaded it to /path/to/musan,
@ -36,8 +37,8 @@ if [ $stage -le 0 ] && [ $stop_stage -ge 0 ]; then
#
# ln -s /path/to/musan data/
#
if [ ! -e data/musan ]; then
wget https://www.openslr.org/resources/17/musan.tar.gz
if [ ! -f data/musan/.musan_completed ]; then
lhotse download musan data
fi
fi
@ -46,7 +47,7 @@ if [ $stage -le 1 ] && [ $stop_stage -ge 1 ]; then
# We assume that you have downloaded the librispeech corpus
# to data/LibriSpeech
mkdir -p data/manifests
./local/prepare_librispeech_manifest.py
lhotse prepare librispeech -j $nj data/LibriSpeech data/manifests
fi
if [ $stage -le 2 ] && [ $stop_stage -ge 2 ]; then
@ -54,7 +55,7 @@ if [ $stage -le 2 ] && [ $stop_stage -ge 2 ]; then
# We assume that you have downloaded the musan corpus
# to data/musan
mkdir -p data/manifests
./local/prepare_musan_manifest.py
lhotse prepare musan data/musan data/manifests
fi
if [ $stage -le 3 ] && [ $stop_stage -ge 3 ]; then