minicpm-2b-sft-full.json 867 Bytes
Newer Older
chenzk's avatar
v1.0  
chenzk committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
{
    "output_dir": "output/firefly-minicpm-2b-sft-full",
    "model_name_or_path": "openbmb/MiniCPM-2B-dpo-fp16",
    "deepspeed": "./train_args/ds_z3_config.json",
    "train_file": "./data/dummy_data.jsonl",
    "template_name": "minicpm",
    "train_mode": "full",
    "num_train_epochs": 1,
    "per_device_train_batch_size": 1,
    "gradient_accumulation_steps": 16,
    "learning_rate": 1e-5,
    "max_seq_length": 1024,
    "logging_steps": 100,
    "save_steps": 100,
    "save_total_limit": 1,
    "lr_scheduler_type": "cosine",
    "warmup_steps": 100,

    "gradient_checkpointing": true,
    "disable_tqdm": false,
    "optim": "adamw_hf",
    "seed": 42,
    "fp16": true,
    "report_to": "tensorboard",
    "dataloader_num_workers": 0,
    "save_strategy": "steps",
    "weight_decay": 0,
    "max_grad_norm": 1.0,
    "remove_unused_columns": false
}