diff --git a/egs/zipvoice/README.md b/egs/zipvoice/README.md index 27b30d611..4bca60301 100644 --- a/egs/zipvoice/README.md +++ b/egs/zipvoice/README.md @@ -105,7 +105,7 @@ pip install -r ../../requirements.txt #### 1.1. Prepare the Emilia dataset ```bash -bash scripts/prepare_emilia.sh --stage 0 --stop-stage 4 +bash scripts/prepare_emilia.sh ``` See [scripts/prepare_emilia.sh](scripts/prepare_emilia.sh) for step by step instructions. @@ -113,7 +113,7 @@ See [scripts/prepare_emilia.sh](scripts/prepare_emilia.sh) for step by step inst #### 1.2 Prepare the LibriTTS dataset ```bash -bash scripts/prepare_libritts.sh --stage 0 --stop-stage 3 +bash scripts/prepare_libritts.sh ``` See [scripts/prepare_libritts.sh](scripts/prepare_libritts.sh) for step by step instructions. @@ -139,7 +139,7 @@ python3 zipvoice/train_flow.py \ --lr-hours 30000 \ --lr-batches 7500 \ --token-file "data/tokens_emilia.txt" \ - --manifest-dir "data/fbank_emilia" \ + --manifest-dir "data/fbank" \ --num-epochs 11 \ --exp-dir zipvoice/exp_zipvoice ``` @@ -172,7 +172,7 @@ python3 zipvoice/train_distill.py \ --base-lr 0.0005 \ --max-duration 500 \ --token-file "data/tokens_emilia.txt" \ - --manifest-dir "data/fbank_emilia" \ + --manifest-dir "data/fbank" \ --teacher-model zipvoice/exp_zipvoice/epoch-11-avg-4.pt \ --num-updates 60000 \ --distill-stage "first" \ @@ -205,7 +205,7 @@ python3 zipvoice/train_distill.py \ --base-lr 0.0001 \ --max-duration 200 \ --token-file "data/tokens_emilia.txt" \ - --manifest-dir "data/fbank_emilia" \ + --manifest-dir "data/fbank" \ --teacher-model zipvoice/exp_zipvoice_distill_1stage/iter-60000-avg-7.pt \ --num-updates 2000 \ --distill-stage "second" \ @@ -233,7 +233,7 @@ python3 zipvoice/train_flow.py \ --lr-epochs 10 \ --lr-batches 7500 \ --token-file "data/tokens_libritts.txt" \ - --manifest-dir "data/fbank_libritts" \ + --manifest-dir "data/fbank" \ --num-epochs 60 \ --exp-dir zipvoice/exp_zipvoice_libritts ``` @@ -266,7 +266,7 @@ python3 zipvoice/train_distill.py \ --base-lr 0.001 \ --max-duration 250 \ --token-file "data/tokens_libritts.txt" \ - --manifest-dir "data/fbank_libritts" \ + --manifest-dir "data/fbank" \ --teacher-model zipvoice/exp_zipvoice_libritts/epoch-60-avg-10.pt \ --num-epochs 6 \ --distill-stage "first" \ @@ -299,7 +299,7 @@ python3 zipvoice/train_distill.py \ --base-lr 0.001 \ --max-duration 250 \ --token-file "data/tokens_libritts.txt" \ - --manifest-dir "data/fbank_libritts" \ + --manifest-dir "data/fbank" \ --teacher-model zipvoice/exp_zipvoice_distill_1stage_libritts/epoch-6-avg-3.pt \ --num-epochs 6 \ --distill-stage "second" \ diff --git a/egs/zipvoice/scripts/run_eval.sh b/egs/zipvoice/scripts/run_eval.sh deleted file mode 100644 index c4f00548d..000000000 --- a/egs/zipvoice/scripts/run_eval.sh +++ /dev/null @@ -1,75 +0,0 @@ -#!/usr/bin/env bash - -export PYTHONPATH=../../../:$PYTHONPATH - -stage=1 -stop_stage=10 -generated_wav_path="flow-matching/exp/generated_wavs" - -. shared/parse_options.sh || exit 1 - - -log() { - # This function is from espnet - local fname=${BASH_SOURCE[1]##*/} - echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" -} - -if [ $stage -le -2 ] && [ $stop_stage -ge -2 ]; then - log "Stage -2: Install dependencies and download models" - - pip install -r requirements-eval.txt - pip install git+https://github.com/sarulab-speech/UTMOSv2.git - modelscope download --model k2-fsa/TTS_eval_models --local_dir TTS_eval_models -fi - - -if [ $stage -le -1 ] && [ $stop_stage -ge -1 ]; then - log "Stage -1: Prepare evaluation data." - - mkdir -p data/reference/librispeech-test-clean - - gunzip -c data/fbank/librispeech_cuts_with_prompts_test-clean.jsonl.gz | \ - jq -r '"\(.recording.sources[0].source)"' | \ - awk '{split($1, a, "/"); cmd="cp "$1" data/reference/librispeech-test-clean/"a[length(a)]; print cmd; system(cmd)}' - - - mkdir -p data/reference/librispeech-test-clean-prompt - gunzip -c data/fbank/librispeech_cuts_with_prompts_test-clean.jsonl.gz | \ - jq -r '"\(.custom.prompt.recording.sources[0].source) \(.recording.sources[0].source)"' | \ - awk '{split($2, a, "/"); cmd="cp "$1" data/reference/librispeech-test-clean-prompt/"a[length(a)]; print cmd; system(cmd)}' -fi - - -if [ $stage -le 1 ] && [ $stop_stage -ge 1 ]; then - log "Stage 1: Evaluate the model with FSD." - - python local/evaluate_fsd.py --real-path data/reference/librispeech-test-clean \ - --eval-path $generated_wav_path -fi - -if [ $stage -le 2 ] && [ $stop_stage -ge 2 ]; then - log "Stage 2: Evaluate the model with SIM." - - python local/evaluate_sim.py --real-path data/reference/librispeech-test-clean \ - --eval-path $generated_wav_path -fi - -if [ $stage -le 3 ] && [ $stop_stage -ge 3 ]; then - log "Stage 3: Evaluate the model with UTMOS." - - python local/evaluate_utmos.py --wav-path $generated_wav_path -fi - -if [ $stage -le 4 ] && [ $stop_stage -ge 4 ]; then - log "Stage 4: Evaluate the model with UTMOSv2." - - python local/evaluate_utmosv2.py --wav-path $generated_wav_path -fi - -if [ $stage -le 5 ] && [ $stop_stage -ge 5 ]; then - log "Stage 5: Evaluate the model with WER." - - python local/evaluate_wer_hubert.py --wav-path $generated_wav_path \ - --decode-path $generated_wav_path/decode -fi