mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-08-09 10:02:22 +00:00
Fixes after review.
This commit is contained in:
parent
0b19aa09c1
commit
f25eedf2d4
@ -1,21 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
|
|
||||||
"""
|
|
||||||
This file downloads the librispeech dataset
|
|
||||||
to the directory data/LibriSpeech.
|
|
||||||
|
|
||||||
It's compatible with kaldi's egs/librispeech/s5/local/download_and_untar.sh .
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
from lhotse.recipes import download_librispeech
|
|
||||||
|
|
||||||
|
|
||||||
def download_data():
|
|
||||||
target_dir = "data"
|
|
||||||
|
|
||||||
download_librispeech(target_dir=target_dir, dataset_parts="librispeech")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
download_data()
|
|
@ -1,29 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
|
|
||||||
"""
|
|
||||||
This file generates manifests for the librispeech dataset.
|
|
||||||
It expects the dataset is saved in data/LibriSpeech
|
|
||||||
and the generated manifests are saved in data/manifests.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import os
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
from lhotse.recipes import prepare_librispeech
|
|
||||||
|
|
||||||
|
|
||||||
def prepare_librispeech_mainfest():
|
|
||||||
corpus_dir = Path("data/LibriSpeech")
|
|
||||||
output_dir = Path("data/manifests")
|
|
||||||
num_jobs = min(15, os.cpu_count())
|
|
||||||
|
|
||||||
librispeech_manifests = prepare_librispeech(
|
|
||||||
corpus_dir=corpus_dir,
|
|
||||||
dataset_parts="auto",
|
|
||||||
output_dir=output_dir,
|
|
||||||
num_jobs=num_jobs,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
prepare_librispeech_mainfest()
|
|
@ -1,22 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
|
|
||||||
"""
|
|
||||||
This file generates manifests for the musan dataset.
|
|
||||||
It expects the dataset is saved in data/musan
|
|
||||||
and the generated manifests are saved in data/manifests.
|
|
||||||
"""
|
|
||||||
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
from lhotse.recipes import prepare_musan
|
|
||||||
|
|
||||||
|
|
||||||
def prepare_musan_mainfest():
|
|
||||||
corpus_dir = Path("data/musan")
|
|
||||||
output_dir = Path("data/manifests")
|
|
||||||
|
|
||||||
prepare_musan(corpus_dir=corpus_dir, output_dir=output_dir)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
prepare_musan_mainfest()
|
|
@ -2,6 +2,7 @@
|
|||||||
|
|
||||||
set -eou pipefail
|
set -eou pipefail
|
||||||
|
|
||||||
|
nj=15
|
||||||
stage=-1
|
stage=-1
|
||||||
stop_stage=100
|
stop_stage=100
|
||||||
|
|
||||||
@ -28,7 +29,7 @@ if [ $stage -le 0 ] && [ $stop_stage -ge 0 ]; then
|
|||||||
|
|
||||||
if [ ! -f data/LibriSpeech/train-other-500/.completed ]; then
|
if [ ! -f data/LibriSpeech/train-other-500/.completed ]; then
|
||||||
# It's compatible with kaldi's egs/librispeech/s5/local/download_and_untar.sh
|
# It's compatible with kaldi's egs/librispeech/s5/local/download_and_untar.sh
|
||||||
./local/download_data.py
|
lhotse download librispeech --full data
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# If you have pre-downloaded it to /path/to/musan,
|
# If you have pre-downloaded it to /path/to/musan,
|
||||||
@ -36,8 +37,8 @@ if [ $stage -le 0 ] && [ $stop_stage -ge 0 ]; then
|
|||||||
#
|
#
|
||||||
# ln -s /path/to/musan data/
|
# ln -s /path/to/musan data/
|
||||||
#
|
#
|
||||||
if [ ! -e data/musan ]; then
|
if [ ! -f data/musan/.musan_completed ]; then
|
||||||
wget https://www.openslr.org/resources/17/musan.tar.gz
|
lhotse download musan data
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
@ -46,7 +47,7 @@ if [ $stage -le 1 ] && [ $stop_stage -ge 1 ]; then
|
|||||||
# We assume that you have downloaded the librispeech corpus
|
# We assume that you have downloaded the librispeech corpus
|
||||||
# to data/LibriSpeech
|
# to data/LibriSpeech
|
||||||
mkdir -p data/manifests
|
mkdir -p data/manifests
|
||||||
./local/prepare_librispeech_manifest.py
|
lhotse prepare librispeech -j $nj data/LibriSpeech data/manifests
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ $stage -le 2 ] && [ $stop_stage -ge 2 ]; then
|
if [ $stage -le 2 ] && [ $stop_stage -ge 2 ]; then
|
||||||
@ -54,7 +55,7 @@ if [ $stage -le 2 ] && [ $stop_stage -ge 2 ]; then
|
|||||||
# We assume that you have downloaded the musan corpus
|
# We assume that you have downloaded the musan corpus
|
||||||
# to data/musan
|
# to data/musan
|
||||||
mkdir -p data/manifests
|
mkdir -p data/manifests
|
||||||
./local/prepare_musan_manifest.py
|
lhotse prepare musan data/musan data/manifests
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ $stage -le 3 ] && [ $stop_stage -ge 3 ]; then
|
if [ $stage -le 3 ] && [ $stop_stage -ge 3 ]; then
|
||||||
|
Loading…
x
Reference in New Issue
Block a user