From ec6c8f748d7e91f3477e081554bf3b39b55287ea Mon Sep 17 00:00:00 2001 From: yfyeung Date: Fri, 9 May 2025 17:18:22 +0000 Subject: [PATCH] fix data prepare update --- egs/speech_llm/ASR_LLM/prepare.sh | 10 ++++++++++ egs/speech_llm/ASR_LLM/zipformer_llm_zh/train.py | 2 +- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/egs/speech_llm/ASR_LLM/prepare.sh b/egs/speech_llm/ASR_LLM/prepare.sh index 8ca3c1c36..d602ce194 100755 --- a/egs/speech_llm/ASR_LLM/prepare.sh +++ b/egs/speech_llm/ASR_LLM/prepare.sh @@ -37,6 +37,15 @@ if [ $stage -le 1 ] && [ $stop_stage -ge 1 ]; then huggingface-cli download --repo-type dataset --local-dir data/fbank yuekai/wenetspeech_whisper_fbank_lhotse huggingface-cli download --repo-type dataset --local-dir data/fbank yuekai/multi_hans_zh_whisper_fbank_lhotse huggingface-cli download --repo-type dataset --local-dir data/fbank yuekai/alimeeting_aishell4_training_whisper_fbank_lhotse + mkdir data/fbank/wenetspeech + mv data/fbank/cuts_L_fixed.jsonl.gz data/fbank/wenetspeech/ + mv data/fbank/cuts_DEV_fixed.jsonl.gz data/fbank/wenetspeech/ + mv data/fbank/cuts_TEST_MEETING.jsonl.gz data/fbank/wenetspeech/ + mv data/fbank/cuts_TEST_NET.jsonl.gz data/fbank/wenetspeech/ + mv data/fbank/L_split_100 data/fbank/wenetspeech/ + mv data/fbank/feats_DEV.lca data/fbank/wenetspeech/ + mv data/fbank/feats_TEST_MEETING.lca data/fbank/wenetspeech/ + mv data/fbank/feats_TEST_NET.lca data/fbank/wenetspeech/ fi if [ $stage -le 2 ] && [ $stop_stage -ge 2 ]; then @@ -46,4 +55,5 @@ if [ $stage -le 2 ] && [ $stop_stage -ge 2 ]; then mkdir data_speechio huggingface-cli download --repo-type model --local-dir data_speechio yuekai/icefall_asr_speechio mv data_speechio/fbank/* data/fbank + rm -rf data_speechio fi diff --git a/egs/speech_llm/ASR_LLM/zipformer_llm_zh/train.py b/egs/speech_llm/ASR_LLM/zipformer_llm_zh/train.py index 77c6a9b95..82ba1abb3 100755 --- a/egs/speech_llm/ASR_LLM/zipformer_llm_zh/train.py +++ b/egs/speech_llm/ASR_LLM/zipformer_llm_zh/train.py @@ -364,7 +364,7 @@ def get_parser(): parser.add_argument( "--use-aishell", type=str2bool, - default=True, + default=False, help="Whether to only use aishell1 dataset for training.", )