{ "model_name_or_path": "facebook/llama-13b", "dataset_name_or_path": "./data", "output_dir": "./checkpoints/llama_sft_ckpts", "per_device_train_batch_size": 1, "gradient_accumulation_steps": 4, "per_device_eval_batch_size": 4, "eval_accumulation_steps":16, "num_train_epochs": 3, "learning_rate": 3e-05, "warmup_steps": 30, "logging_steps": 1, "evaluation_strategy": "epoch", "save_strategy": "epoch", "src_length": 256, "max_length": 512, "fp16": true, "fp16_opt_level": "O2", "do_train": true, "do_eval": true, "disable_tqdm": true, "load_best_model_at_end": true, "eval_with_do_generation": false, "metric_for_best_model": "accuracy", "recompute": true, "save_total_limit": 1, "tensor_parallel_degree": 8 }