mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-08-09 01:52:41 +00:00
Minor fixes
This commit is contained in:
parent
dc731ea089
commit
e45da09009
@ -105,7 +105,7 @@ pip install -r ../../requirements.txt
|
|||||||
#### 1.1. Prepare the Emilia dataset
|
#### 1.1. Prepare the Emilia dataset
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
bash scripts/prepare_emilia.sh --stage 0 --stop-stage 4
|
bash scripts/prepare_emilia.sh
|
||||||
```
|
```
|
||||||
|
|
||||||
See [scripts/prepare_emilia.sh](scripts/prepare_emilia.sh) for step by step instructions.
|
See [scripts/prepare_emilia.sh](scripts/prepare_emilia.sh) for step by step instructions.
|
||||||
@ -113,7 +113,7 @@ See [scripts/prepare_emilia.sh](scripts/prepare_emilia.sh) for step by step inst
|
|||||||
#### 1.2 Prepare the LibriTTS dataset
|
#### 1.2 Prepare the LibriTTS dataset
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
bash scripts/prepare_libritts.sh --stage 0 --stop-stage 3
|
bash scripts/prepare_libritts.sh
|
||||||
```
|
```
|
||||||
|
|
||||||
See [scripts/prepare_libritts.sh](scripts/prepare_libritts.sh) for step by step instructions.
|
See [scripts/prepare_libritts.sh](scripts/prepare_libritts.sh) for step by step instructions.
|
||||||
@ -139,7 +139,7 @@ python3 zipvoice/train_flow.py \
|
|||||||
--lr-hours 30000 \
|
--lr-hours 30000 \
|
||||||
--lr-batches 7500 \
|
--lr-batches 7500 \
|
||||||
--token-file "data/tokens_emilia.txt" \
|
--token-file "data/tokens_emilia.txt" \
|
||||||
--manifest-dir "data/fbank_emilia" \
|
--manifest-dir "data/fbank" \
|
||||||
--num-epochs 11 \
|
--num-epochs 11 \
|
||||||
--exp-dir zipvoice/exp_zipvoice
|
--exp-dir zipvoice/exp_zipvoice
|
||||||
```
|
```
|
||||||
@ -172,7 +172,7 @@ python3 zipvoice/train_distill.py \
|
|||||||
--base-lr 0.0005 \
|
--base-lr 0.0005 \
|
||||||
--max-duration 500 \
|
--max-duration 500 \
|
||||||
--token-file "data/tokens_emilia.txt" \
|
--token-file "data/tokens_emilia.txt" \
|
||||||
--manifest-dir "data/fbank_emilia" \
|
--manifest-dir "data/fbank" \
|
||||||
--teacher-model zipvoice/exp_zipvoice/epoch-11-avg-4.pt \
|
--teacher-model zipvoice/exp_zipvoice/epoch-11-avg-4.pt \
|
||||||
--num-updates 60000 \
|
--num-updates 60000 \
|
||||||
--distill-stage "first" \
|
--distill-stage "first" \
|
||||||
@ -205,7 +205,7 @@ python3 zipvoice/train_distill.py \
|
|||||||
--base-lr 0.0001 \
|
--base-lr 0.0001 \
|
||||||
--max-duration 200 \
|
--max-duration 200 \
|
||||||
--token-file "data/tokens_emilia.txt" \
|
--token-file "data/tokens_emilia.txt" \
|
||||||
--manifest-dir "data/fbank_emilia" \
|
--manifest-dir "data/fbank" \
|
||||||
--teacher-model zipvoice/exp_zipvoice_distill_1stage/iter-60000-avg-7.pt \
|
--teacher-model zipvoice/exp_zipvoice_distill_1stage/iter-60000-avg-7.pt \
|
||||||
--num-updates 2000 \
|
--num-updates 2000 \
|
||||||
--distill-stage "second" \
|
--distill-stage "second" \
|
||||||
@ -233,7 +233,7 @@ python3 zipvoice/train_flow.py \
|
|||||||
--lr-epochs 10 \
|
--lr-epochs 10 \
|
||||||
--lr-batches 7500 \
|
--lr-batches 7500 \
|
||||||
--token-file "data/tokens_libritts.txt" \
|
--token-file "data/tokens_libritts.txt" \
|
||||||
--manifest-dir "data/fbank_libritts" \
|
--manifest-dir "data/fbank" \
|
||||||
--num-epochs 60 \
|
--num-epochs 60 \
|
||||||
--exp-dir zipvoice/exp_zipvoice_libritts
|
--exp-dir zipvoice/exp_zipvoice_libritts
|
||||||
```
|
```
|
||||||
@ -266,7 +266,7 @@ python3 zipvoice/train_distill.py \
|
|||||||
--base-lr 0.001 \
|
--base-lr 0.001 \
|
||||||
--max-duration 250 \
|
--max-duration 250 \
|
||||||
--token-file "data/tokens_libritts.txt" \
|
--token-file "data/tokens_libritts.txt" \
|
||||||
--manifest-dir "data/fbank_libritts" \
|
--manifest-dir "data/fbank" \
|
||||||
--teacher-model zipvoice/exp_zipvoice_libritts/epoch-60-avg-10.pt \
|
--teacher-model zipvoice/exp_zipvoice_libritts/epoch-60-avg-10.pt \
|
||||||
--num-epochs 6 \
|
--num-epochs 6 \
|
||||||
--distill-stage "first" \
|
--distill-stage "first" \
|
||||||
@ -299,7 +299,7 @@ python3 zipvoice/train_distill.py \
|
|||||||
--base-lr 0.001 \
|
--base-lr 0.001 \
|
||||||
--max-duration 250 \
|
--max-duration 250 \
|
||||||
--token-file "data/tokens_libritts.txt" \
|
--token-file "data/tokens_libritts.txt" \
|
||||||
--manifest-dir "data/fbank_libritts" \
|
--manifest-dir "data/fbank" \
|
||||||
--teacher-model zipvoice/exp_zipvoice_distill_1stage_libritts/epoch-6-avg-3.pt \
|
--teacher-model zipvoice/exp_zipvoice_distill_1stage_libritts/epoch-6-avg-3.pt \
|
||||||
--num-epochs 6 \
|
--num-epochs 6 \
|
||||||
--distill-stage "second" \
|
--distill-stage "second" \
|
||||||
|
@ -1,75 +0,0 @@
|
|||||||
#!/usr/bin/env bash
|
|
||||||
|
|
||||||
export PYTHONPATH=../../../:$PYTHONPATH
|
|
||||||
|
|
||||||
stage=1
|
|
||||||
stop_stage=10
|
|
||||||
generated_wav_path="flow-matching/exp/generated_wavs"
|
|
||||||
|
|
||||||
. shared/parse_options.sh || exit 1
|
|
||||||
|
|
||||||
|
|
||||||
log() {
|
|
||||||
# This function is from espnet
|
|
||||||
local fname=${BASH_SOURCE[1]##*/}
|
|
||||||
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
|
|
||||||
}
|
|
||||||
|
|
||||||
if [ $stage -le -2 ] && [ $stop_stage -ge -2 ]; then
|
|
||||||
log "Stage -2: Install dependencies and download models"
|
|
||||||
|
|
||||||
pip install -r requirements-eval.txt
|
|
||||||
pip install git+https://github.com/sarulab-speech/UTMOSv2.git
|
|
||||||
modelscope download --model k2-fsa/TTS_eval_models --local_dir TTS_eval_models
|
|
||||||
fi
|
|
||||||
|
|
||||||
|
|
||||||
if [ $stage -le -1 ] && [ $stop_stage -ge -1 ]; then
|
|
||||||
log "Stage -1: Prepare evaluation data."
|
|
||||||
|
|
||||||
mkdir -p data/reference/librispeech-test-clean
|
|
||||||
|
|
||||||
gunzip -c data/fbank/librispeech_cuts_with_prompts_test-clean.jsonl.gz | \
|
|
||||||
jq -r '"\(.recording.sources[0].source)"' | \
|
|
||||||
awk '{split($1, a, "/"); cmd="cp "$1" data/reference/librispeech-test-clean/"a[length(a)]; print cmd; system(cmd)}'
|
|
||||||
|
|
||||||
|
|
||||||
mkdir -p data/reference/librispeech-test-clean-prompt
|
|
||||||
gunzip -c data/fbank/librispeech_cuts_with_prompts_test-clean.jsonl.gz | \
|
|
||||||
jq -r '"\(.custom.prompt.recording.sources[0].source) \(.recording.sources[0].source)"' | \
|
|
||||||
awk '{split($2, a, "/"); cmd="cp "$1" data/reference/librispeech-test-clean-prompt/"a[length(a)]; print cmd; system(cmd)}'
|
|
||||||
fi
|
|
||||||
|
|
||||||
|
|
||||||
if [ $stage -le 1 ] && [ $stop_stage -ge 1 ]; then
|
|
||||||
log "Stage 1: Evaluate the model with FSD."
|
|
||||||
|
|
||||||
python local/evaluate_fsd.py --real-path data/reference/librispeech-test-clean \
|
|
||||||
--eval-path $generated_wav_path
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ $stage -le 2 ] && [ $stop_stage -ge 2 ]; then
|
|
||||||
log "Stage 2: Evaluate the model with SIM."
|
|
||||||
|
|
||||||
python local/evaluate_sim.py --real-path data/reference/librispeech-test-clean \
|
|
||||||
--eval-path $generated_wav_path
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ $stage -le 3 ] && [ $stop_stage -ge 3 ]; then
|
|
||||||
log "Stage 3: Evaluate the model with UTMOS."
|
|
||||||
|
|
||||||
python local/evaluate_utmos.py --wav-path $generated_wav_path
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ $stage -le 4 ] && [ $stop_stage -ge 4 ]; then
|
|
||||||
log "Stage 4: Evaluate the model with UTMOSv2."
|
|
||||||
|
|
||||||
python local/evaluate_utmosv2.py --wav-path $generated_wav_path
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ $stage -le 5 ] && [ $stop_stage -ge 5 ]; then
|
|
||||||
log "Stage 5: Evaluate the model with WER."
|
|
||||||
|
|
||||||
python local/evaluate_wer_hubert.py --wav-path $generated_wav_path \
|
|
||||||
--decode-path $generated_wav_path/decode
|
|
||||||
fi
|
|
Loading…
x
Reference in New Issue
Block a user