From 42fee0228b92b468d8d83feb5369813dea86b86b Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Wed, 18 Dec 2024 16:13:09 +0800 Subject: [PATCH] download and unzip the dataset --- egs/baker_zh/TTS/.gitignore | 1 + egs/baker_zh/TTS/prepare.sh | 66 +++++++++++++++++++++++++++++++++++++ egs/baker_zh/TTS/shared | 1 + 3 files changed, 68 insertions(+) create mode 100644 egs/baker_zh/TTS/.gitignore create mode 100755 egs/baker_zh/TTS/prepare.sh create mode 120000 egs/baker_zh/TTS/shared diff --git a/egs/baker_zh/TTS/.gitignore b/egs/baker_zh/TTS/.gitignore new file mode 100644 index 000000000..9c40c787f --- /dev/null +++ b/egs/baker_zh/TTS/.gitignore @@ -0,0 +1 @@ +path.sh diff --git a/egs/baker_zh/TTS/prepare.sh b/egs/baker_zh/TTS/prepare.sh new file mode 100755 index 000000000..e6840f66a --- /dev/null +++ b/egs/baker_zh/TTS/prepare.sh @@ -0,0 +1,66 @@ +#!/usr/bin/env bash + +# fix segmentation fault reported in https://github.com/k2-fsa/icefall/issues/674 +export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python + +set -eou pipefail + +stage=-1 +stop_stage=100 + +dl_dir=$PWD/download +mkdir -p $dl_dir + +. shared/parse_options.sh || exit 1 + +# All files generated by this script are saved in "data". +# You can safely remove "data" and rerun this script to regenerate it. +mkdir -p data + +log() { + # This function is from espnet + local fname=${BASH_SOURCE[1]##*/} + echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" +} + +log "dl_dir: $dl_dir" + +if [ $stage -le -1 ] && [ $stop_stage -ge -1 ]; then + log "Stage -1: build monotonic_align lib (used by ./matcha)" + for recipe in matcha; do + if [ ! -d $recipe/monotonic_align/build ]; then + cd $recipe/monotonic_align + python3 setup.py build_ext --inplace + cd ../../ + else + log "monotonic_align lib for $recipe already built" + fi + done +fi + +if [ $stage -le 0 ] && [ $stop_stage -ge 0 ]; then + log "Stage 0: Download data" + + # The directory $dl_dir/BANSYP contains the following 3 directories + + # ls -lh $dl_dir/BZNSYP/ + # total 0 + # drwxr-xr-x 10002 kuangfangjun root 0 Jan 4 2019 PhoneLabeling + # drwxr-xr-x 3 kuangfangjun root 0 Jan 31 2019 ProsodyLabeling + # drwxr-xr-x 10003 kuangfangjun root 0 Aug 26 17:45 Wave + + # If you have trouble accessing huggingface.co, please use + # + # cd $dl_dir + # wget https://huggingface.co/openspeech/BZNSYP/resolve/main/BZNSYP.tar.bz2 + # tar xf BZNSYP.tar.bz2 + # cd .. + + # If you have pre-downloaded it to /path/to/BZNSYP, you can create a symlink + # + # ln -sfv /path/to/BZNSYP $dl_dir/BZNSYP + # + if [ ! -d $dl_dir/BZNSYP/Wave ]; then + lhotse download baker-zh $dl_dir + fi +fi diff --git a/egs/baker_zh/TTS/shared b/egs/baker_zh/TTS/shared new file mode 120000 index 000000000..4cbd91a7e --- /dev/null +++ b/egs/baker_zh/TTS/shared @@ -0,0 +1 @@ +../../../icefall/shared \ No newline at end of file