minor updates

2025-12-11 06:55:27 +00:00 · 2024-10-21 13:12:10 +08:00 · 2024-10-21 13:12:10 +08:00 · e0136d9263
commit e0136d9263
parent cbef43feb3
6 changed files with 210 additions and 2 deletions
--- a/egs/libritts/CODEC/prepare.sh
+++ b/egs/libritts/CODEC/prepare.sh
@ -45,12 +45,11 @@ if [ $stage -le 1 ] && [ $stop_stage -ge 1 ]; then
  # to $dl_dir/LibriTTS
  mkdir -p data/manifests
  if [ ! -e data/manifests/.libritts.done ]; then
-    lhotse prepare libritts --num-jobs 32 $dl_dir/LibriTTS data/manifests
+    lhotse prepare libritts --num-jobs ${nj} $dl_dir/LibriTTS data/manifests
    touch data/manifests/.libritts.done
  fi
 fi

-
 if [ $stage -le 2 ] && [ $stop_stage -ge 2 ]; then
  log "Stage 2: Compute Spectrogram for LibriTTS"
  mkdir -p data/spectrogram
--- a/egs/libritts/TTS/local/compute_spectrogram_libritts.py
+++ b/egs/libritts/TTS/local/compute_spectrogram_libritts.py
@ -0,0 +1 @@
+../../CODEC/local/compute_spectrogram_libritts.py
--- a/egs/libritts/TTS/local/prepare_token_file.py
+++ b/egs/libritts/TTS/local/prepare_token_file.py
@ -0,0 +1 @@
+../../../ljspeech/TTS/local/prepare_token_file.py
--- a/egs/libritts/TTS/local/prepare_tokens_libritts.py
+++ b/egs/libritts/TTS/local/prepare_tokens_libritts.py
@ -0,0 +1,75 @@
+#!/usr/bin/env python3
+# Copyright         2023  Xiaomi Corp.        (authors: Zengwei Yao,
+#                                                       Zengrui Jin,)
+#                   2024  Tsinghua University (authors: Zengrui Jin,)
+#
+# See ../../../../LICENSE for clarification regarding multiple authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+"""
+This file reads the texts in given manifest and save the new cuts with phoneme tokens.
+"""
+
+import logging
+from pathlib import Path
+
+import tacotron_cleaner.cleaners
+from lhotse import CutSet, load_manifest
+from piper_phonemize import phonemize_espeak
+from tqdm.auto import tqdm
+
+
+def prepare_tokens_libritts():
+    output_dir = Path("data/spectrogram")
+    prefix = "libritts"
+    suffix = "jsonl.gz"
+    partitions = (
+        "dev-clean",
+        "dev-other",
+        "test-clean",
+        "test-other",
+        "train-all-shuf",
+        "train-clean-460",
+    )
+
+    for partition in partitions:
+        cut_set = load_manifest(output_dir / f"{prefix}_cuts_{partition}.{suffix}")
+
+        new_cuts = []
+        for cut in tqdm(cut_set):
+            # Each cut only contains one supervision
+            assert len(cut.supervisions) == 1, (len(cut.supervisions), cut)
+            text = cut.supervisions[0].text
+            # Text normalization
+            text = tacotron_cleaner.cleaners.custom_english_cleaners(text)
+            # Convert to phonemes
+            tokens_list = phonemize_espeak(text, "en-us")
+            tokens = []
+            for t in tokens_list:
+                tokens.extend(t)
+            cut.tokens = tokens
+            new_cuts.append(cut)
+
+        new_cut_set = CutSet.from_cuts(new_cuts)
+        new_cut_set.to_file(
+            output_dir / f"{prefix}_cuts_with_tokens_{partition}.{suffix}"
+        )
+
+
+if __name__ == "__main__":
+    formatter = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s"
+    logging.basicConfig(format=formatter, level=logging.INFO)
+
+    prepare_tokens_libritts()
--- a/egs/libritts/TTS/local/validate_manifest.py
+++ b/egs/libritts/TTS/local/validate_manifest.py
@ -0,0 +1 @@
+../../../ljspeech/TTS/local/validate_manifest.py
--- a/egs/libritts/TTS/prepare.sh
+++ b/egs/libritts/TTS/prepare.sh
@ -0,0 +1,131 @@
+#!/usr/bin/env bash
+
+# fix segmentation fault reported in https://github.com/k2-fsa/icefall/issues/674
+export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
+
+set -eou pipefail
+
+stage=0
+stop_stage=100
+sampling_rate=24000
+nj=32
+
+dl_dir=$PWD/download
+
+. shared/parse_options.sh || exit 1
+
+# All files generated by this script are saved in "data".
+# You can safely remove "data" and rerun this script to regenerate it.
+mkdir -p data
+
+log() {
+  # This function is from espnet
+  local fname=${BASH_SOURCE[1]##*/}
+  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
+}
+
+log "dl_dir: $dl_dir"
+
+if [ $stage -le -1 ] && [ $stop_stage -ge -1 ]; then
+  log "Stage -1: build monotonic_align lib"
+  if [ ! -d vits/monotonic_align/build ]; then
+    cd vits/monotonic_align
+    python setup.py build_ext --inplace
+    cd ../../
+  else 
+    log "monotonic_align lib already built"
+  fi
+fi
+
+if [ $stage -le 0 ] && [ $stop_stage -ge 0 ]; then
+  log "Stage 0: Download data"
+
+  # If you have pre-downloaded it to /path/to/LibriTTS,
+  # you can create a symlink
+  #
+  #   ln -sfv /path/to/LibriTTS $dl_dir/LibriTTS
+  #
+  if [ ! -d $dl_dir/LibriTTS ]; then
+    lhotse download libritts $dl_dir
+  fi
+
+  if [ ! -d $dl_dir/xvector_nnet_1a_libritts_clean_460 ]; then
+    log "Downloading x-vector"
+
+    git clone https://huggingface.co/datasets/zrjin/xvector_nnet_1a_libritts_clean_460 $dl_dir/xvector_nnet_1a_libritts_clean_460
+  fi
+
+fi
+
+if [ $stage -le 1 ] && [ $stop_stage -ge 1 ]; then
+  log "Stage 1: Prepare LibriTTS manifest"
+  # We assume that you have downloaded the LibriTTS corpus
+  # to $dl_dir/LibriTTS
+  mkdir -p data/manifests
+  if [ ! -e data/manifests/.libritts.done ]; then
+    lhotse prepare libritts --num-jobs ${nj} $dl_dir/LibriTTS data/manifests
+    touch data/manifests/.libritts.done
+  fi
+fi
+
+if [ $stage -le 2 ] && [ $stop_stage -ge 2 ]; then
+  log "Stage 2: Compute Spectrogram for LibriTTS"
+  mkdir -p data/spectrogram
+  if [ ! -e data/spectrogram/.libritts.done ]; then
+    ./local/compute_spectrogram_libritts.py --sampling-rate $sampling_rate 
+    touch data/spectrogram/.libritts.done
+  fi
+
+  # Here we shuffle and combine the train-clean-100, train-clean-360 and 
+  # train-other-500 together to form the training set.
+  if [ ! -f data/spectrogram/libritts_cuts_train-all-shuf.jsonl.gz ]; then
+    cat <(gunzip -c data/spectrogram/libritts_cuts_train-clean-100.jsonl.gz) \
+      <(gunzip -c data/spectrogram/libritts_cuts_train-clean-360.jsonl.gz) \
+      <(gunzip -c /data/spectrogramlibritts_cuts_train-other-500.jsonl.gz) | \
+      shuf | gzip -c > data/spectrogram/libritts_cuts_train-all-shuf.jsonl.gz
+  fi
+
+  # Here we shuffle and combine the train-clean-100, train-clean-360 
+  # together to form the training set.
+  if [ ! -f data/spectrogram/libritts_cuts_train-clean-460.jsonl.gz ]; then
+    cat <(gunzip -c data/spectrogram/libritts_cuts_train-clean-100.jsonl.gz) \
+      <(gunzip -c data/spectrogram/libritts_cuts_train-clean-360.jsonl.gz) \
+      shuf | gzip -c > data/spectrogram/libritts_cuts_train-clean-460.jsonl.gz
+  fi
+
+  if [ ! -e data/spectrogram/.libritts-validated.done ]; then
+    log "Validating data/spectrogram for LibriTTS"
+    ./local/validate_manifest.py \
+      data/spectrogram/libritts_cuts_train-all-shuf.jsonl.gz
+    touch data/spectrogram/.libritts-validated.done
+  fi
+fi
+
+if [ $stage -le 3 ] && [ $stop_stage -ge 3 ]; then
+  log "Stage 3: Prepare phoneme tokens for LibriTTS"
+  # We assume you have installed piper_phonemize and espnet_tts_frontend.
+  # If not, please install them with:
+  #   - piper_phonemize: 
+  #       refer to https://github.com/rhasspy/piper-phonemize,
+  #       could install the pre-built wheels from https://github.com/csukuangfj/piper-phonemize/releases/tag/2023.12.5
+  #   - espnet_tts_frontend: 
+  #       `pip install espnet_tts_frontend`, refer to https://github.com/espnet/espnet_tts_frontend/
+  if [ ! -e data/spectrogram/.libritts_with_token.done ]; then
+    ./local/prepare_tokens_libritts.py
+    touch data/spectrogram/.libritts_with_token.done
+  fi
+fi
+
+if [ $stage -le 4 ] && [ $stop_stage -ge 4 ]; then
+  log "Stage 4: Generate token file"
+  # We assume you have installed piper_phonemize and espnet_tts_frontend.
+  # If not, please install them with:
+  #   - piper_phonemize: 
+  #       refer to https://github.com/rhasspy/piper-phonemize,
+  #       could install the pre-built wheels from https://github.com/csukuangfj/piper-phonemize/releases/tag/2023.12.5
+  #   - espnet_tts_frontend: 
+  #       `pip install espnet_tts_frontend`, refer to https://github.com/espnet/espnet_tts_frontend/
+  if [ ! -e data/tokens.txt ]; then
+    ./local/prepare_token_file.py --tokens data/tokens.txt
+  fi
+fi
				`@ -0,0 +1 @@`
				`../../CODEC/local/compute_spectrogram_libritts.py`
				`@ -0,0 +1 @@`
				`../../../ljspeech/TTS/local/prepare_token_file.py`
				`@ -0,0 +1 @@`
				`../../../ljspeech/TTS/local/validate_manifest.py`