diff --git a/egs/ljspeech/TTS/local/compute_spectrogram_ljspeech.py b/egs/ljspeech/TTS/local/compute_spectrogram_ljspeech.py index 3603af07d..edb22b276 100755 --- a/egs/ljspeech/TTS/local/compute_spectrogram_ljspeech.py +++ b/egs/ljspeech/TTS/local/compute_spectrogram_ljspeech.py @@ -21,7 +21,7 @@ This file computes fbank features of the LJSpeech dataset. It looks for manifests in the directory data/manifests. -The generated fbank features are saved in data/spectrogram. +The generated spectrogram features are saved in data/spectrogram. """ import logging @@ -75,7 +75,7 @@ def compute_spectrogram_ljspeech(): with get_executor() as ex: # Initialize the executor only once. cuts_filename = f"{prefix}_cuts_{partition}.{suffix}" if (output_dir / cuts_filename).is_file(): - logging.info(f"{partition} already exists - skipping.") + logging.info(f"{cuts_filename} already exists - skipping.") return logging.info(f"Processing {partition}") cut_set = CutSet.from_manifests( diff --git a/egs/ljspeech/TTS/local/prepare_token_file.py b/egs/ljspeech/TTS/local/prepare_token_file.py index 17a558899..167b73f2e 100755 --- a/egs/ljspeech/TTS/local/prepare_token_file.py +++ b/egs/ljspeech/TTS/local/prepare_token_file.py @@ -17,7 +17,7 @@ """ -This file reads the texts in given manifest and generate the file that maps tokens to IDs. +This file reads the texts in given manifest and generates the file that maps tokens to IDs. """ import argparse @@ -73,11 +73,11 @@ def write_mapping(filename: str, sym2id: Dict[str, int]) -> None: def get_token2id(manifest_file: Path) -> Dict[str, int]: """Return a dict that maps token to IDs.""" - extra_tokens = { - "": 0, # blank - "": 1, # sos and eos symbols. - "": 2, # OOV - } + extra_tokens = [ + ("", None), # 0 for blank + ("", None), # 1 for sos and eos symbols. + ("", None), # 2 for OOV + ] cut_set = load_manifest(manifest_file) g2p = g2p_en.G2p() counter = Counter() @@ -96,10 +96,10 @@ def get_token2id(manifest_file: Path) -> Dict[str, int]: # Sort by the number of occurrences in descending order tokens_and_counts = sorted(counter.items(), key=lambda x: -x[1]) - for token, idx in extra_tokens.items(): - tokens_and_counts.insert(idx, (token, None)) + tokens_and_counts = extra_tokens + tokens_and_counts + + token2id: Dict[str, int] = {token: i for i, (token, _) in enumerate(tokens_and_counts)} - token2id: Dict[str, int] = {token: i for i, (token, count) in enumerate(tokens_and_counts)} return token2id diff --git a/egs/ljspeech/TTS/local/validate_manifest.py b/egs/ljspeech/TTS/local/validate_manifest.py index cd466303e..68159ae03 100755 --- a/egs/ljspeech/TTS/local/validate_manifest.py +++ b/egs/ljspeech/TTS/local/validate_manifest.py @@ -57,7 +57,7 @@ def main(): assert manifest.is_file(), f"{manifest} does not exist" cut_set = load_manifest_lazy(manifest) - assert isinstance(cut_set, CutSet) + assert isinstance(cut_set, CutSet), type(cut_set) validate_for_tts(cut_set) diff --git a/egs/ljspeech/TTS/prepare.sh b/egs/ljspeech/TTS/prepare.sh index 613eb37d8..396d91b59 100755 --- a/egs/ljspeech/TTS/prepare.sh +++ b/egs/ljspeech/TTS/prepare.sh @@ -28,10 +28,13 @@ log "dl_dir: $dl_dir" if [ $stage -le 0 ] && [ $stop_stage -ge 0 ]; then log "Stage 0: Download data" - # If you have pre-downloaded it to /path/to/LJSpeech, - # you can create a symlink + # The directory $dl_dir/LJSpeech-1.1 will contain: + # - wavs, which contains the audio files + # - metadata.csv, which provides the transcript text for each audio clip + + # If you have pre-downloaded it to /path/to/LJSpeech-1.1, you can create a symlink # - # ln -sfv /path/to/LJSpeech $dl_dir/LJSpeech + # ln -sfv /path/to/LJSpeech-1.1 $dl_dir/LJSpeech-1.1 # if [ ! -d $dl_dir/LJSpeech-1.1 ]; then lhotse download ljspeech $dl_dir @@ -58,7 +61,7 @@ if [ $stage -le 2 ] && [ $stop_stage -ge 2 ]; then fi if [ ! -e data/spectrogram/.ljspeech-validated.done ]; then - log "Validating data/fbank for LJSpeech" + log "Validating data/spectrogram for LJSpeech" python3 ./local/validate_manifest.py \ data/spectrogram/ljspeech_cuts_all.jsonl.gz touch data/spectrogram/.ljspeech-validated.done @@ -90,6 +93,10 @@ fi if [ $stage -le 4 ] && [ $stop_stage -ge 4 ]; then log "Stage 4: Generate token file" + # We assume you have installed g2p_en and espnet_tts_frontend. + # If not, please install them with: + # - g2p_en: `pip install g2p_en`, refer to https://github.com/Kyubyong/g2p + # - espnet_tts_frontend, `pip install espnet_tts_frontend`, refer to https://github.com/espnet/espnet_tts_frontend/ if [ ! -e data/tokens.txt ]; then ./local/prepare_token_file.py \ --manifest-file data/spectrogram/ljspeech_cuts_train.jsonl.gz \ diff --git a/egs/ljspeech/TTS/shared/parse_options.sh b/egs/ljspeech/TTS/shared/parse_options.sh deleted file mode 100755 index 71fb9e5ea..000000000 --- a/egs/ljspeech/TTS/shared/parse_options.sh +++ /dev/null @@ -1,97 +0,0 @@ -#!/usr/bin/env bash - -# Copyright 2012 Johns Hopkins University (Author: Daniel Povey); -# Arnab Ghoshal, Karel Vesely - -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -# MERCHANTABLITY OR NON-INFRINGEMENT. -# See the Apache 2 License for the specific language governing permissions and -# limitations under the License. - - -# Parse command-line options. -# To be sourced by another script (as in ". parse_options.sh"). -# Option format is: --option-name arg -# and shell variable "option_name" gets set to value "arg." -# The exception is --help, which takes no arguments, but prints the -# $help_message variable (if defined). - - -### -### The --config file options have lower priority to command line -### options, so we need to import them first... -### - -# Now import all the configs specified by command-line, in left-to-right order -for ((argpos=1; argpos<$#; argpos++)); do - if [ "${!argpos}" == "--config" ]; then - argpos_plus1=$((argpos+1)) - config=${!argpos_plus1} - [ ! -r $config ] && echo "$0: missing config '$config'" && exit 1 - . $config # source the config file. - fi -done - - -### -### Now we process the command line options -### -while true; do - [ -z "${1:-}" ] && break; # break if there are no arguments - case "$1" in - # If the enclosing script is called with --help option, print the help - # message and exit. Scripts should put help messages in $help_message - --help|-h) if [ -z "$help_message" ]; then echo "No help found." 1>&2; - else printf "$help_message\n" 1>&2 ; fi; - exit 0 ;; - --*=*) echo "$0: options to scripts must be of the form --name value, got '$1'" - exit 1 ;; - # If the first command-line argument begins with "--" (e.g. --foo-bar), - # then work out the variable name as $name, which will equal "foo_bar". - --*) name=`echo "$1" | sed s/^--// | sed s/-/_/g`; - # Next we test whether the variable in question is undefned-- if so it's - # an invalid option and we die. Note: $0 evaluates to the name of the - # enclosing script. - # The test [ -z ${foo_bar+xxx} ] will return true if the variable foo_bar - # is undefined. We then have to wrap this test inside "eval" because - # foo_bar is itself inside a variable ($name). - eval '[ -z "${'$name'+xxx}" ]' && echo "$0: invalid option $1" 1>&2 && exit 1; - - oldval="`eval echo \\$$name`"; - # Work out whether we seem to be expecting a Boolean argument. - if [ "$oldval" == "true" ] || [ "$oldval" == "false" ]; then - was_bool=true; - else - was_bool=false; - fi - - # Set the variable to the right value-- the escaped quotes make it work if - # the option had spaces, like --cmd "queue.pl -sync y" - eval $name=\"$2\"; - - # Check that Boolean-valued arguments are really Boolean. - if $was_bool && [[ "$2" != "true" && "$2" != "false" ]]; then - echo "$0: expected \"true\" or \"false\": $1 $2" 1>&2 - exit 1; - fi - shift 2; - ;; - *) break; - esac -done - - -# Check for an empty argument to the --cmd option, which can easily occur as a -# result of scripting errors. -[ ! -z "${cmd+xxx}" ] && [ -z "$cmd" ] && echo "$0: empty argument to --cmd option" 1>&2 && exit 1; - - -true; # so this script returns exit code 0. diff --git a/egs/ljspeech/TTS/shared/parse_options.sh b/egs/ljspeech/TTS/shared/parse_options.sh new file mode 120000 index 000000000..e4665e7de --- /dev/null +++ b/egs/ljspeech/TTS/shared/parse_options.sh @@ -0,0 +1 @@ +../../../librispeech/ASR/shared/parse_options.sh \ No newline at end of file