From 5c21518870064d7393d96a294ff4e908d6a334f2 Mon Sep 17 00:00:00 2001 From: root Date: Fri, 21 Feb 2025 02:31:40 +0000 Subject: [PATCH] add requirements.txt --- .../TTS/f5-tts/requirements.txt | 36 +++++++++++++++++++ egs/wenetspeech4tts/TTS/prepare.sh | 1 - 2 files changed, 36 insertions(+), 1 deletion(-) create mode 100644 egs/wenetspeech4tts/TTS/f5-tts/requirements.txt diff --git a/egs/wenetspeech4tts/TTS/f5-tts/requirements.txt b/egs/wenetspeech4tts/TTS/f5-tts/requirements.txt new file mode 100644 index 000000000..63f1e237c --- /dev/null +++ b/egs/wenetspeech4tts/TTS/f5-tts/requirements.txt @@ -0,0 +1,36 @@ +# F5-TTS +accelerate>=0.33.0 +bitsandbytes>0.37.0 +cached_path +click +datasets +ema_pytorch>=0.5.2 +gradio>=3.45.2 +hydra-core>=1.3.0 +jieba +librosa +matplotlib +numpy<=1.26.4 +pydub +pypinyin +safetensors +soundfile +tomli +torch>=2.0.0 +torchaudio>=2.0.0 +torchdiffeq +tqdm>=4.65.0 +transformers +x_transformers>=1.31.14 + +# icefall +kaldialign +lhotse +tensorboard +bigvganinference +sentencepiece +sherpa-onnx +k2 + +# semantic experiment +s3tokenizer diff --git a/egs/wenetspeech4tts/TTS/prepare.sh b/egs/wenetspeech4tts/TTS/prepare.sh index 7b7cd24b7..f1daa0e62 100755 --- a/egs/wenetspeech4tts/TTS/prepare.sh +++ b/egs/wenetspeech4tts/TTS/prepare.sh @@ -142,7 +142,6 @@ fi if [ $stage -le 7 ] && [ $stop_stage -ge 7 ]; then log "Stage 7: Extract cosyvoice2 FSQ token (used by ./f5-tts semantic token experiment)" - pip install s3tokenizer split_name=("valid" "test" "train") for split in "${split_name[@]}"; do echo "Processing $split"