{ "output_dir": "output/firefly-minicpm-2b-dpo-full", "model_name_or_path": "openbmb/MiniCPM-2B-dpo-fp16", "train_file": "./data/dummy_dpo.jsonl", "template_name": "minicpm", "train_mode": "full", "task_type": "dpo", "beta": 0.1, "num_train_epochs": 1, "per_device_train_batch_size": 1, "gradient_accumulation_steps": 16, "learning_rate": 5e-7, "max_seq_length": 1024, "max_prompt_length": 300, "logging_steps": 100, "save_steps": 100, "save_total_limit": 1, "lr_scheduler_type": "constant_with_warmup", "warmup_steps": 100, "lora_rank": 64, "lora_alpha": 16, "lora_dropout": 0.05, "gradient_checkpointing": true, "disable_tqdm": false, "optim": "paged_adamw_32bit", "seed": 42, "fp16": true, "report_to": "tensorboard", "dataloader_num_workers": 0, "save_strategy": "steps", "weight_decay": 0, "max_grad_norm": 0.3, "remove_unused_columns": false }