2025-03-03 05:40:38 +00:00

28 lines
787 B
JSON

{
"llm_model_name_or_path": "./Qwen2.5-0.5B-Instruct",
"data_path": ["./emilia_cosyvoice_v2_token/cosy_v2_tokens_ZH.jsonl"],
"bf16": false,
"output_dir": "./exp_zh",
"num_train_epochs": 3,
"per_device_train_batch_size": 8,
"per_device_eval_batch_size": 8,
"gradient_accumulation_steps": 1,
"evaluation_strategy": "steps",
"eval_steps": 1000,
"save_strategy": "steps",
"save_steps": 5000,
"save_total_limit": 100,
"learning_rate": 0.00005,
"weight_decay": 0.01,
"adam_beta2": 0.95,
"warmup_ratio": 0.03,
"lr_scheduler_type": "cosine",
"logging_steps": 100,
"report_to": "wandb",
"model_max_length": 2048,
"gradient_checkpointing": false,
"dataloader_num_workers": 4,
"dataloader_prefetch_factor": 4,
"deepspeed": "ds_config_zero2.json"
}