From df41e7686bfbdf754ec9bf7dfe87ff863c075bac Mon Sep 17 00:00:00 2001 From: hediehloo Date: Sun, 16 Nov 2025 15:27:32 +0000 Subject: [PATCH 1/4] add train qwen --- .gitignore | 3 ++- requirements.txt | 7 ++++++- train/qwen/a.sh | 28 ++++++++++++++++++++++++++++ 3 files changed, 36 insertions(+), 2 deletions(-) create mode 100644 train/qwen/a.sh diff --git a/.gitignore b/.gitignore index b5e0f9d..dd95723 100644 --- a/.gitignore +++ b/.gitignore @@ -6,4 +6,5 @@ data *.json models *.log -research_notebook/data \ No newline at end of file +research_notebook/data +train/qwen/output diff --git a/requirements.txt b/requirements.txt index 25de51a..40b45ae 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,4 +2,9 @@ python-dotenv==1.1.1 hazm=0.10.0 faiss-cpu==1.12.0 sentence-transformers==5.1.2 -einops==0.8.1 \ No newline at end of file +einops==0.8.1 +ms-swift +mteb==1.39.7 +transformers +liger-kernel +deepspeed \ No newline at end of file diff --git a/train/qwen/a.sh b/train/qwen/a.sh new file mode 100644 index 0000000..b874d66 --- /dev/null +++ b/train/qwen/a.sh @@ -0,0 +1,28 @@ +# pip install flash-attn --no-build-isolation + + +nproc_per_node=1 +CUDA_VISIBLE_DEVICES=0 \ +NPROC_PER_NODE=$nproc_per_node \ +swift sft \ + --model $(pwd)/../../data/models/Qwen3-Embedding-0.6B/model \ + --task_type embedding \ + --model_type qwen3_emb \ + --train_type full \ + --dataset my_local_dataset \ + --custom_register_path /home/hediehloo/codes/embedding/embedding_model/data/dataset/my_dataset_register.py \ + --split_dataset_ratio 0.005 \ + --eval_strategy steps \ + --output_dir output \ + --eval_steps 1000 \ + --num_train_epochs 1 \ + --save_steps 1000 \ + --save_total_limit 10 \ + --per_device_train_batch_size 16 \ + --per_device_eval_batch_size 16 \ + --gradient_accumulation_steps 4 \ + --learning_rate 2.4e-5 \ + --loss_type infonce \ + --label_names labels \ + --dataloader_drop_last true \ + --deepspeed zero3 \ No newline at end of file From 4fb3504b33913b5f8810a61658c06fa6bc3f3928 Mon Sep 17 00:00:00 2001 From: "a.hediehloo" Date: Thu, 20 Nov 2025 06:48:29 +0000 Subject: [PATCH 2/4] qwen train: lora --- train/qwen/a.sh | 11 ++++++++--- train/qwen/merge_model.py | 24 ++++++++++++++++++++++++ 2 files changed, 32 insertions(+), 3 deletions(-) create mode 100644 train/qwen/merge_model.py diff --git a/train/qwen/a.sh b/train/qwen/a.sh index b874d66..49617b6 100644 --- a/train/qwen/a.sh +++ b/train/qwen/a.sh @@ -2,15 +2,20 @@ nproc_per_node=1 + +INFONCE_USE_BATCH=False \ CUDA_VISIBLE_DEVICES=0 \ NPROC_PER_NODE=$nproc_per_node \ swift sft \ --model $(pwd)/../../data/models/Qwen3-Embedding-0.6B/model \ --task_type embedding \ --model_type qwen3_emb \ - --train_type full \ + --train_type lora \ + --lora_rank 8 \ + --lora_alpha 16 \ + --target_modules all-linear \ --dataset my_local_dataset \ - --custom_register_path /home/hediehloo/codes/embedding/embedding_model/data/dataset/my_dataset_register.py \ + --custom_register_path $(pwd)/../../data/dataset/my_dataset_register.py \ --split_dataset_ratio 0.005 \ --eval_strategy steps \ --output_dir output \ @@ -25,4 +30,4 @@ swift sft \ --loss_type infonce \ --label_names labels \ --dataloader_drop_last true \ - --deepspeed zero3 \ No newline at end of file + --deepspeed zero3 diff --git a/train/qwen/merge_model.py b/train/qwen/merge_model.py new file mode 100644 index 0000000..1ff9091 --- /dev/null +++ b/train/qwen/merge_model.py @@ -0,0 +1,24 @@ +import json +import numpy as np +import os +from peft import PeftModel +import torch +from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig + +def merge(base_model_path, peft_model_path, save_path): + base_model = AutoModelForCausalLM.from_pretrained(base_model_path, torch_dtype="bfloat16") + ft_model = PeftModel.from_pretrained(base_model, peft_model_path) + ft_model = ft_model.merge_and_unload() + ft_model.save_pretrained(save_path) + +def main(): + file_path = os.path.dirname(__file__) + + base_model_path = file_path + "/../../data/models/Qwen3-Embedding-0.6B/model" + peft_model_path = file_path + "/output/v0-20251118-115015/checkpoint-3434" + save_path = file_path + "/output/v0-20251118-115015/merged_checkpoint-3434" + merge(base_model_path, peft_model_path, save_path) + + +if __name__ == "__main__": + main() \ No newline at end of file From 6aa4f8c0ea71f75595273c09af586f17c065f885 Mon Sep 17 00:00:00 2001 From: "a.hediehloo" Date: Sun, 23 Nov 2025 08:40:24 +0000 Subject: [PATCH 3/4] merged model --- train/qwen/merge_model.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/train/qwen/merge_model.py b/train/qwen/merge_model.py index 1ff9091..1aff32c 100644 --- a/train/qwen/merge_model.py +++ b/train/qwen/merge_model.py @@ -15,8 +15,8 @@ def main(): file_path = os.path.dirname(__file__) base_model_path = file_path + "/../../data/models/Qwen3-Embedding-0.6B/model" - peft_model_path = file_path + "/output/v0-20251118-115015/checkpoint-3434" - save_path = file_path + "/output/v0-20251118-115015/merged_checkpoint-3434" + peft_model_path = file_path + "/output/v1-20251122-184545/checkpoint-3434" + save_path = file_path + "/output/v1-20251122-184545/merged_checkpoint-3434" merge(base_model_path, peft_model_path, save_path) From 7c945a16038f14c6588e9394fd775de3a274154b Mon Sep 17 00:00:00 2001 From: "a.hediehloo" Date: Sun, 23 Nov 2025 10:46:27 +0000 Subject: [PATCH 4/4] add mlflow --- .gitignore | 1 + train/qwen/a.sh | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index dd95723..b84bb2a 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,4 @@ models *.log research_notebook/data train/qwen/output +train/qwen/mlruns diff --git a/train/qwen/a.sh b/train/qwen/a.sh index 49617b6..bfbce74 100644 --- a/train/qwen/a.sh +++ b/train/qwen/a.sh @@ -3,6 +3,7 @@ nproc_per_node=1 +MLFLOW_TRACKING_URI=http://0.0.0.0:5004 \ INFONCE_USE_BATCH=False \ CUDA_VISIBLE_DEVICES=0 \ NPROC_PER_NODE=$nproc_per_node \ @@ -30,4 +31,5 @@ swift sft \ --loss_type infonce \ --label_names labels \ --dataloader_drop_last true \ - --deepspeed zero3 + --deepspeed zero3 \ + --report_to mlflow