diff --git a/.gitignore b/.gitignore index b5e0f9d..b84bb2a 100644 --- a/.gitignore +++ b/.gitignore @@ -6,4 +6,6 @@ data *.json models *.log -research_notebook/data \ No newline at end of file +research_notebook/data +train/qwen/output +train/qwen/mlruns diff --git a/requirements.txt b/requirements.txt index 25de51a..40b45ae 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,4 +2,9 @@ python-dotenv==1.1.1 hazm=0.10.0 faiss-cpu==1.12.0 sentence-transformers==5.1.2 -einops==0.8.1 \ No newline at end of file +einops==0.8.1 +ms-swift +mteb==1.39.7 +transformers +liger-kernel +deepspeed \ No newline at end of file diff --git a/train/qwen/a.sh b/train/qwen/a.sh new file mode 100644 index 0000000..bfbce74 --- /dev/null +++ b/train/qwen/a.sh @@ -0,0 +1,35 @@ +# pip install flash-attn --no-build-isolation + + +nproc_per_node=1 + +MLFLOW_TRACKING_URI=http://0.0.0.0:5004 \ +INFONCE_USE_BATCH=False \ +CUDA_VISIBLE_DEVICES=0 \ +NPROC_PER_NODE=$nproc_per_node \ +swift sft \ + --model $(pwd)/../../data/models/Qwen3-Embedding-0.6B/model \ + --task_type embedding \ + --model_type qwen3_emb \ + --train_type lora \ + --lora_rank 8 \ + --lora_alpha 16 \ + --target_modules all-linear \ + --dataset my_local_dataset \ + --custom_register_path $(pwd)/../../data/dataset/my_dataset_register.py \ + --split_dataset_ratio 0.005 \ + --eval_strategy steps \ + --output_dir output \ + --eval_steps 1000 \ + --num_train_epochs 1 \ + --save_steps 1000 \ + --save_total_limit 10 \ + --per_device_train_batch_size 16 \ + --per_device_eval_batch_size 16 \ + --gradient_accumulation_steps 4 \ + --learning_rate 2.4e-5 \ + --loss_type infonce \ + --label_names labels \ + --dataloader_drop_last true \ + --deepspeed zero3 \ + --report_to mlflow diff --git a/train/qwen/merge_model.py b/train/qwen/merge_model.py new file mode 100644 index 0000000..1aff32c --- /dev/null +++ b/train/qwen/merge_model.py @@ -0,0 +1,24 @@ +import json +import numpy as np +import os +from peft import PeftModel +import torch +from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig + +def merge(base_model_path, peft_model_path, save_path): + base_model = AutoModelForCausalLM.from_pretrained(base_model_path, torch_dtype="bfloat16") + ft_model = PeftModel.from_pretrained(base_model, peft_model_path) + ft_model = ft_model.merge_and_unload() + ft_model.save_pretrained(save_path) + +def main(): + file_path = os.path.dirname(__file__) + + base_model_path = file_path + "/../../data/models/Qwen3-Embedding-0.6B/model" + peft_model_path = file_path + "/output/v1-20251122-184545/checkpoint-3434" + save_path = file_path + "/output/v1-20251122-184545/merged_checkpoint-3434" + merge(base_model_path, peft_model_path, save_path) + + +if __name__ == "__main__": + main() \ No newline at end of file