icefall/egs/zipvoice/scripts/evaluate.sh

export CUDA_VISIBLE_DEVICES="0"
export PYTHONWARNINGS=ignore
export PYTHONPATH=../../:$PYTHONPATH

# Uncomment this if you have trouble connecting to HuggingFace
# export HF_ENDPOINT=https://hf-mirror.com

start_stage=1
end_stage=3

# Models used for SIM-o evaluation.
# SV model wavlm_large_finetune.pth is downloaded from https://github.com/microsoft/UniSpeech/tree/main/downstreams/speaker_verification
# SSL model wavlm_large.pt is downloaded from https://huggingface.co/s3prl/converted_ckpts/resolve/main/wavlm_large.pt
sv_model_path=model/UniSpeech/wavlm_large_finetune.pth
wavlm_model_path=model/s3prl/wavlm_large.pt

# Models used for UTMOS evaluation.
# wget https://huggingface.co/spaces/sarulab-speech/UTMOS-demo/resolve/main/epoch%3D3-step%3D7459.ckpt -P model/huggingface/utmos/utmos.pt
# wget https://huggingface.co/spaces/sarulab-speech/UTMOS-demo/resolve/main/wav2vec_small.pt -P model/huggingface/utmos/wav2vec_small.pt
utmos_model_path=model/huggingface/utmos/utmos.pt
wav2vec_model_path=model/huggingface/utmos/wav2vec_small.pt


if [ $start_stage -le 1 ] && [ $end_stage -ge 1 ]; then

        echo "=====Evaluate for Seed-TTS test-en======="
        test_list=testset/test_seedtts_en.tsv
        wav_path=results/zipvoice_seedtts_en

        echo $wav_path
        echo "-----Computing SIM-o-----"
        python3 local/evaluate_sim.py \
                --sv-model-path ${sv_model_path} \
                --ssl-model-path ${wavlm_model_path} \
                --eval-path ${wav_path} \
                --test-list ${test_list}

        echo "-----Computing WER-----"
        python3 local/evaluate_wer_seedtts.py \
                --test-list ${test_list} \
                --wav-path ${wav_path} \
                --lang "en"

        echo "-----Computing UTSMOS-----"
        python3 local/evaluate_utmos.py \
                --wav-path ${wav_path} \
                --utmos-model-path ${utmos_model_path} \
                --ssl-model-path ${wav2vec_model_path}

fi

if [ $start_stage -le 2 ] && [ $end_stage -ge 2 ]; then
        echo "=====Evaluate for Seed-TTS test-zh======="
        test_list=testset/test_seedtts_zh.tsv
        wav_path=results/zipvoice_seedtts_zh

        echo $wav_path
        echo "-----Computing SIM-o-----"
        python3 local/evaluate_sim.py \
                --sv-model-path ${sv_model_path} \
                --ssl-model-path ${wavlm_model_path} \
                --eval-path ${wav_path} \
                --test-list ${test_list}

        echo "-----Computing WER-----"
        python3 local/evaluate_wer_seedtts.py \
                --test-list ${test_list} \
                --wav-path ${wav_path} \
                --lang "zh"

        echo "-----Computing UTSMOS-----"
        python3 local/evaluate_utmos.py \
                --wav-path ${wav_path} \
                --utmos-model-path ${utmos_model_path} \
                --ssl-model-path ${wav2vec_model_path}
fi

if [ $start_stage -le 3 ] && [ $end_stage -ge 3 ]; then
        echo "=====Evaluate for Librispeech test-clean======="
        test_list=testset/test_librispeech_pc_test_clean.tsv
        wav_path=results/zipvoice_librispeech_test_clean

        echo $wav_path
        echo "-----Computing SIM-o-----"
        python3 local/evaluate_sim.py \
                --sv-model-path ${sv_model_path} \
                --ssl-model-path ${wavlm_model_path} \
                --eval-path ${wav_path} \
                --test-list ${test_list}

        echo "-----Computing WER-----"
        python3 local/evaluate_wer_hubert.py \
                --test-list ${test_list} \
                --wav-path ${wav_path} \

        echo "-----Computing UTSMOS-----"
        python3 local/evaluate_utmos.py \
                --wav-path ${wav_path} \
                --utmos-model-path ${utmos_model_path} \
                --ssl-model-path ${wav2vec_model_path}

fi