mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-08-08 09:32:20 +00:00
Fixes after review.
This commit is contained in:
parent
0b19aa09c1
commit
f25eedf2d4
@ -1,21 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
"""
|
||||
This file downloads the librispeech dataset
|
||||
to the directory data/LibriSpeech.
|
||||
|
||||
It's compatible with kaldi's egs/librispeech/s5/local/download_and_untar.sh .
|
||||
"""
|
||||
|
||||
|
||||
from lhotse.recipes import download_librispeech
|
||||
|
||||
|
||||
def download_data():
|
||||
target_dir = "data"
|
||||
|
||||
download_librispeech(target_dir=target_dir, dataset_parts="librispeech")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
download_data()
|
@ -1,29 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
"""
|
||||
This file generates manifests for the librispeech dataset.
|
||||
It expects the dataset is saved in data/LibriSpeech
|
||||
and the generated manifests are saved in data/manifests.
|
||||
"""
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from lhotse.recipes import prepare_librispeech
|
||||
|
||||
|
||||
def prepare_librispeech_mainfest():
|
||||
corpus_dir = Path("data/LibriSpeech")
|
||||
output_dir = Path("data/manifests")
|
||||
num_jobs = min(15, os.cpu_count())
|
||||
|
||||
librispeech_manifests = prepare_librispeech(
|
||||
corpus_dir=corpus_dir,
|
||||
dataset_parts="auto",
|
||||
output_dir=output_dir,
|
||||
num_jobs=num_jobs,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
prepare_librispeech_mainfest()
|
@ -1,22 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
"""
|
||||
This file generates manifests for the musan dataset.
|
||||
It expects the dataset is saved in data/musan
|
||||
and the generated manifests are saved in data/manifests.
|
||||
"""
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from lhotse.recipes import prepare_musan
|
||||
|
||||
|
||||
def prepare_musan_mainfest():
|
||||
corpus_dir = Path("data/musan")
|
||||
output_dir = Path("data/manifests")
|
||||
|
||||
prepare_musan(corpus_dir=corpus_dir, output_dir=output_dir)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
prepare_musan_mainfest()
|
@ -2,6 +2,7 @@
|
||||
|
||||
set -eou pipefail
|
||||
|
||||
nj=15
|
||||
stage=-1
|
||||
stop_stage=100
|
||||
|
||||
@ -28,7 +29,7 @@ if [ $stage -le 0 ] && [ $stop_stage -ge 0 ]; then
|
||||
|
||||
if [ ! -f data/LibriSpeech/train-other-500/.completed ]; then
|
||||
# It's compatible with kaldi's egs/librispeech/s5/local/download_and_untar.sh
|
||||
./local/download_data.py
|
||||
lhotse download librispeech --full data
|
||||
fi
|
||||
|
||||
# If you have pre-downloaded it to /path/to/musan,
|
||||
@ -36,8 +37,8 @@ if [ $stage -le 0 ] && [ $stop_stage -ge 0 ]; then
|
||||
#
|
||||
# ln -s /path/to/musan data/
|
||||
#
|
||||
if [ ! -e data/musan ]; then
|
||||
wget https://www.openslr.org/resources/17/musan.tar.gz
|
||||
if [ ! -f data/musan/.musan_completed ]; then
|
||||
lhotse download musan data
|
||||
fi
|
||||
fi
|
||||
|
||||
@ -46,7 +47,7 @@ if [ $stage -le 1 ] && [ $stop_stage -ge 1 ]; then
|
||||
# We assume that you have downloaded the librispeech corpus
|
||||
# to data/LibriSpeech
|
||||
mkdir -p data/manifests
|
||||
./local/prepare_librispeech_manifest.py
|
||||
lhotse prepare librispeech -j $nj data/LibriSpeech data/manifests
|
||||
fi
|
||||
|
||||
if [ $stage -le 2 ] && [ $stop_stage -ge 2 ]; then
|
||||
@ -54,7 +55,7 @@ if [ $stage -le 2 ] && [ $stop_stage -ge 2 ]; then
|
||||
# We assume that you have downloaded the musan corpus
|
||||
# to data/musan
|
||||
mkdir -p data/manifests
|
||||
./local/prepare_musan_manifest.py
|
||||
lhotse prepare musan data/musan data/manifests
|
||||
fi
|
||||
|
||||
if [ $stage -le 3 ] && [ $stop_stage -ge 3 ]; then
|
||||
|
Loading…
x
Reference in New Issue
Block a user