minor fixes

2025-12-11 06:55:27 +00:00 · 2023-11-06 15:05:49 +08:00 · 2023-11-06 15:05:49 +08:00 · f55e80a7c5
commit f55e80a7c5
parent cd59a69957
5 changed files with 24 additions and 113 deletions
--- a/egs/ljspeech/TTS/local/compute_spectrogram_ljspeech.py
+++ b/egs/ljspeech/TTS/local/compute_spectrogram_ljspeech.py
@ -21,7 +21,7 @@
 This file computes fbank features of the LJSpeech dataset.
 It looks for manifests in the directory data/manifests.

-The generated fbank features are saved in data/spectrogram.
+The generated spectrogram features are saved in data/spectrogram.
 """

 import logging
@ -75,7 +75,7 @@ def compute_spectrogram_ljspeech():
    with get_executor() as ex:  # Initialize the executor only once.
        cuts_filename = f"{prefix}_cuts_{partition}.{suffix}"
        if (output_dir / cuts_filename).is_file():
-            logging.info(f"{partition} already exists - skipping.")
+            logging.info(f"{cuts_filename} already exists - skipping.")
            return
        logging.info(f"Processing {partition}")
        cut_set = CutSet.from_manifests(
--- a/egs/ljspeech/TTS/local/prepare_token_file.py
+++ b/egs/ljspeech/TTS/local/prepare_token_file.py
@ -17,7 +17,7 @@


 """
-This file reads the texts in given manifest and generate the file that maps tokens to IDs.
+This file reads the texts in given manifest and generates the file that maps tokens to IDs.
 """

 import argparse
@ -73,11 +73,11 @@ def write_mapping(filename: str, sym2id: Dict[str, int]) -> None:

 def get_token2id(manifest_file: Path) -> Dict[str, int]:
    """Return a dict that maps token to IDs."""
-    extra_tokens = {
-        "<blk>": 0,  # blank
-        "<sos/eos>": 1,  # sos and eos symbols.
-        "<unk>": 2,  # OOV
-    }
+    extra_tokens = [
+        ("<blk>", None),  # 0 for blank
+        ("<sos/eos>", None),  # 1 for sos and eos symbols.
+        ("<unk>", None),  # 2 for OOV
+    ]
    cut_set = load_manifest(manifest_file)
    g2p = g2p_en.G2p()
    counter = Counter()
@ -96,10 +96,10 @@ def get_token2id(manifest_file: Path) -> Dict[str, int]:
    # Sort by the number of occurrences in descending order
    tokens_and_counts = sorted(counter.items(), key=lambda x: -x[1])

-    for token, idx in extra_tokens.items():
-        tokens_and_counts.insert(idx, (token, None))
+    tokens_and_counts = extra_tokens + tokens_and_counts
+
+    token2id: Dict[str, int] = {token: i for i, (token, _) in enumerate(tokens_and_counts)}

-    token2id: Dict[str, int] = {token: i for i, (token, count) in enumerate(tokens_and_counts)}
    return token2id


--- a/egs/ljspeech/TTS/local/validate_manifest.py
+++ b/egs/ljspeech/TTS/local/validate_manifest.py
@ -57,7 +57,7 @@ def main():

    assert manifest.is_file(), f"{manifest} does not exist"
    cut_set = load_manifest_lazy(manifest)
-    assert isinstance(cut_set, CutSet)
+    assert isinstance(cut_set, CutSet), type(cut_set)

    validate_for_tts(cut_set)

--- a/egs/ljspeech/TTS/prepare.sh
+++ b/egs/ljspeech/TTS/prepare.sh
@ -28,10 +28,13 @@ log "dl_dir: $dl_dir"
 if [ $stage -le 0 ] && [ $stop_stage -ge 0 ]; then
  log "Stage 0: Download data"

-  # If you have pre-downloaded it to /path/to/LJSpeech,
-  # you can create a symlink
+  # The directory $dl_dir/LJSpeech-1.1 will contain:
+  #   - wavs, which contains the audio files
+  #   - metadata.csv, which provides the transcript text for each audio clip
+
+  # If you have pre-downloaded it to /path/to/LJSpeech-1.1, you can create a symlink
  #
-  #   ln -sfv /path/to/LJSpeech $dl_dir/LJSpeech
+  #   ln -sfv /path/to/LJSpeech-1.1 $dl_dir/LJSpeech-1.1
  #
  if [ ! -d $dl_dir/LJSpeech-1.1 ]; then
    lhotse download ljspeech $dl_dir
@ -58,7 +61,7 @@ if [ $stage -le 2 ] && [ $stop_stage -ge 2 ]; then
  fi

  if [ ! -e data/spectrogram/.ljspeech-validated.done ]; then
-    log "Validating data/fbank for LJSpeech"
+    log "Validating data/spectrogram for LJSpeech"
    python3 ./local/validate_manifest.py \
      data/spectrogram/ljspeech_cuts_all.jsonl.gz
    touch data/spectrogram/.ljspeech-validated.done
@ -90,6 +93,10 @@ fi

 if [ $stage -le 4 ] && [ $stop_stage -ge 4 ]; then
  log "Stage 4: Generate token file"
+  # We assume you have installed g2p_en and espnet_tts_frontend.
+  # If not, please install them with:
+  #   - g2p_en: `pip install g2p_en`, refer to https://github.com/Kyubyong/g2p
+  #   - espnet_tts_frontend, `pip install espnet_tts_frontend`, refer to https://github.com/espnet/espnet_tts_frontend/
  if [ ! -e data/tokens.txt ]; then
    ./local/prepare_token_file.py \
      --manifest-file data/spectrogram/ljspeech_cuts_train.jsonl.gz \
--- a/egs/ljspeech/TTS/shared/parse_options.sh
+++ b/egs/ljspeech/TTS/shared/parse_options.sh
@ -1,97 +0,0 @@
-#!/usr/bin/env bash
-
-# Copyright 2012  Johns Hopkins University (Author: Daniel Povey);
-#                 Arnab Ghoshal, Karel Vesely
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# Parse command-line options.
-# To be sourced by another script (as in ". parse_options.sh").
-# Option format is: --option-name arg
-# and shell variable "option_name" gets set to value "arg."
-# The exception is --help, which takes no arguments, but prints the
-# $help_message variable (if defined).
-
-
-###
-### The --config file options have lower priority to command line
-### options, so we need to import them first...
-###
-
-# Now import all the configs specified by command-line, in left-to-right order
-for ((argpos=1; argpos<$#; argpos++)); do
-  if [ "${!argpos}" == "--config" ]; then
-    argpos_plus1=$((argpos+1))
-    config=${!argpos_plus1}
-    [ ! -r $config ] && echo "$0: missing config '$config'" && exit 1
-    . $config  # source the config file.
-  fi
-done
-
-
-###
-### Now we process the command line options
-###
-while true; do
-  [ -z "${1:-}" ] && break;  # break if there are no arguments
-  case "$1" in
-    # If the enclosing script is called with --help option, print the help
-    # message and exit.  Scripts should put help messages in $help_message
-    --help|-h) if [ -z "$help_message" ]; then echo "No help found." 1>&2;
-      else printf "$help_message\n" 1>&2 ; fi;
-      exit 0 ;;
-    --*=*) echo "$0: options to scripts must be of the form --name value, got '$1'"
-      exit 1 ;;
-    # If the first command-line argument begins with "--" (e.g. --foo-bar),
-    # then work out the variable name as $name, which will equal "foo_bar".
-    --*) name=`echo "$1" | sed s/^--// | sed s/-/_/g`;
-      # Next we test whether the variable in question is undefned-- if so it's
-      # an invalid option and we die.  Note: $0 evaluates to the name of the
-      # enclosing script.
-      # The test [ -z ${foo_bar+xxx} ] will return true if the variable foo_bar
-      # is undefined.  We then have to wrap this test inside "eval" because
-      # foo_bar is itself inside a variable ($name).
-      eval '[ -z "${'$name'+xxx}" ]' && echo "$0: invalid option $1" 1>&2 && exit 1;
-
-      oldval="`eval echo \\$$name`";
-      # Work out whether we seem to be expecting a Boolean argument.
-      if [ "$oldval" == "true" ] || [ "$oldval" == "false" ]; then
-        was_bool=true;
-      else
-        was_bool=false;
-      fi
-
-      # Set the variable to the right value-- the escaped quotes make it work if
-      # the option had spaces, like --cmd "queue.pl -sync y"
-      eval $name=\"$2\";
-
-      # Check that Boolean-valued arguments are really Boolean.
-      if $was_bool && [[ "$2" != "true" && "$2" != "false" ]]; then
-        echo "$0: expected \"true\" or \"false\": $1 $2" 1>&2
-        exit 1;
-      fi
-      shift 2;
-      ;;
-  *) break;
-  esac
-done
-
-
-# Check for an empty argument to the --cmd option, which can easily occur as a
-# result of scripting errors.
-[ ! -z "${cmd+xxx}" ] && [ -z "$cmd" ] && echo "$0: empty argument to --cmd option" 1>&2 && exit 1;
-
-
-true; # so this script returns exit code 0.
--- a/egs/ljspeech/TTS/shared/parse_options.sh
+++ b/egs/ljspeech/TTS/shared/parse_options.sh
@ -0,0 +1 @@
+../../../librispeech/ASR/shared/parse_options.sh
				`@ -0,0 +1 @@`
				`../../../librispeech/ASR/shared/parse_options.sh`