From 5ec95e5482867de02eece6615ff51662c7ed9b37 Mon Sep 17 00:00:00 2001 From: Yifan Yang <64255737+yfyeung@users.noreply.github.com> Date: Wed, 23 Apr 2025 16:18:38 +0800 Subject: [PATCH] Fix SpeechLLM recipe (#1926) --- egs/speech_llm/ASR_LLM/RESULTS.md | 8 ++++---- egs/speech_llm/ASR_LLM/prepare.sh | 13 ++++++++----- egs/speech_llm/ASR_LLM/shared | 1 + 3 files changed, 13 insertions(+), 9 deletions(-) mode change 100644 => 100755 egs/speech_llm/ASR_LLM/prepare.sh create mode 120000 egs/speech_llm/ASR_LLM/shared diff --git a/egs/speech_llm/ASR_LLM/RESULTS.md b/egs/speech_llm/ASR_LLM/RESULTS.md index 830c70397..01c48a82e 100644 --- a/egs/speech_llm/ASR_LLM/RESULTS.md +++ b/egs/speech_llm/ASR_LLM/RESULTS.md @@ -42,8 +42,8 @@ huggingface-cli download --local-dir models/whisper yuekai/icefall_asr_aishel # For multi-hans fine-tuned whisper model # huggingface-cli download --local-dir models/whisper yuekai/icefall_asr_multi-hans-zh_whisper v1.1/whisper-large-v2-multi-hans-zh-epoch-3-avg-10.pt -# huggingface-clie download --local-dir models/qwen Qwen/Qwen2-7B-Instruct -huggingface-clie download --local-dir models/qwen Qwen/Qwen2-1.5B-Instruct +# huggingface-cli download --local-dir models/qwen Qwen/Qwen2-7B-Instruct +huggingface-cli download --local-dir models/qwen Qwen/Qwen2-1.5B-Instruct # First, we only train the projector and freeze other modules. torchrun --nproc_per_node 8 ./whisper_llm_zh/train.py \ @@ -57,7 +57,7 @@ torchrun --nproc_per_node 8 ./whisper_llm_zh/train.py \ --use-flash-attn True \ --use-lora False --unfreeze-llm False -# Then we jointly train the projector and LLM LoRA modules. +# Then, we jointly train the projector and LLM LoRA modules. torchrun --nproc_per_node 8 ./whisper_llm_zh/train.py \ --max-duration 200 \ --exp-dir ./whisper_llm_zh/exp_test \ @@ -81,7 +81,7 @@ huggingface-cli download --local-dir models/whisper yuekai/icefall_asr_aishel # For multi-hans fine-tuned whisper model # huggingface-cli download --local-dir models/whisper yuekai/icefall_asr_multi-hans-zh_whisper v1.1/whisper-large-v2-multi-hans-zh-epoch-3-avg-10.pt -huggingface-clie download --local-dir models/qwen Qwen/Qwen2-7B-Instruct +huggingface-cli download --local-dir models/qwen Qwen/Qwen2-7B-Instruct mkdir -p whisper_llm_zh/exp_aishell_whisper_qwen2_1.5B ln -s models/checkpoint/epoch-10-avg-5.pt whisper_llm_zh/exp_aishell_whisper_qwen2_1.5B/epoch-999.pt diff --git a/egs/speech_llm/ASR_LLM/prepare.sh b/egs/speech_llm/ASR_LLM/prepare.sh old mode 100644 new mode 100755 index 6f5ed5448..8ca3c1c36 --- a/egs/speech_llm/ASR_LLM/prepare.sh +++ b/egs/speech_llm/ASR_LLM/prepare.sh @@ -7,6 +7,9 @@ set -eou pipefail stage=0 stop_stage=0 + +. shared/parse_options.sh || exit 1 + # All files generated by this script are saved in "data". # You can safely remove "data" and rerun this script to regenerate it. mkdir -p data @@ -23,7 +26,7 @@ if [ $stage -le 0 ] && [ $stop_stage -ge 0 ]; then # pip install huggingface_hub['cli'] # for aishell 1 - huggingface-cli download --local-dir data yuekai/aishell_whisper_fbank_lhotse + huggingface-cli download --repo-type dataset --local-dir data yuekai/aishell_whisper_fbank_lhotse fi @@ -31,9 +34,9 @@ if [ $stage -le 1 ] && [ $stop_stage -ge 1 ]; then log "stage 1: Download whisper-large-v2 multi-hans-zh fbank feature from huggingface" # for multi-hans-zh - huggingface-cli download --local-dir data/fbank yuekai/wenetspeech_whisper_fbank_lhotse - huggingface-cli download --local-dir data/fbank yuekai/multi_hans_zh_whisper_fbank_lhotse - huggingface-cli download --local-dir data/fbank yuekai/alimeeting_aishell4_training_whisper_fbank_lhotse + huggingface-cli download --repo-type dataset --local-dir data/fbank yuekai/wenetspeech_whisper_fbank_lhotse + huggingface-cli download --repo-type dataset --local-dir data/fbank yuekai/multi_hans_zh_whisper_fbank_lhotse + huggingface-cli download --repo-type dataset --local-dir data/fbank yuekai/alimeeting_aishell4_training_whisper_fbank_lhotse fi if [ $stage -le 2 ] && [ $stop_stage -ge 2 ]; then @@ -41,6 +44,6 @@ if [ $stage -le 2 ] && [ $stop_stage -ge 2 ]; then # for speechio test sets mkdir data_speechio - huggingface-cli download --local-dir data_speechio yuekai/icefall_asr_speechio + huggingface-cli download --repo-type model --local-dir data_speechio yuekai/icefall_asr_speechio mv data_speechio/fbank/* data/fbank fi diff --git a/egs/speech_llm/ASR_LLM/shared b/egs/speech_llm/ASR_LLM/shared new file mode 120000 index 000000000..4cbd91a7e --- /dev/null +++ b/egs/speech_llm/ASR_LLM/shared @@ -0,0 +1 @@ +../../../icefall/shared \ No newline at end of file