# pip install flash-attn --no-build-isolation nproc_per_node=1 MLFLOW_TRACKING_URI=http://0.0.0.0:5004 \ INFONCE_USE_BATCH=True \ CUDA_VISIBLE_DEVICES=0 \ NPROC_PER_NODE=$nproc_per_node \ swift sft \ --model $(pwd)/../../data/models/Qwen3-Embedding-0.6B/model \ --task_type embedding \ --model_type qwen3_emb \ --train_type lora \ --lora_rank 16 \ --lora_alpha 32 \ --target_modules all-linear \ --max_length 2048 \ --dataset v11_dataset_hn \ --custom_register_path $(pwd)/../../data/dataset/v11_dataset_hn/generated.py \ --split_dataset_ratio 0.005 \ --eval_strategy steps \ --output_dir output \ --eval_steps 1000 \ --num_train_epochs 1 \ --save_steps 1000 \ --save_total_limit 10 \ --per_device_train_batch_size 16 \ --per_device_eval_batch_size 16 \ --gradient_accumulation_steps 4 \ --learning_rate 3.0e-6 \ --lr_scheduler_type constant \ --loss_type infonce \ --label_names labels \ --dataloader_drop_last true \ --deepspeed zero3 \ --report_to mlflow