config.yaml 2.24 KB
Newer Older
chenych's avatar
chenych committed
1
2
3
4
5
6
7
8
9
data:
  train_files: hiyouga/math12k@train
  val_files: hiyouga/math12k@test
  prompt_key: problem
  answer_key: answer
  image_key: images
  max_prompt_length: 2048
  max_response_length: 2048
  rollout_batch_size: 512
chenych's avatar
chenych committed
10
  val_batch_size: 1024
chenych's avatar
update  
chenych committed
11
  format_prompt: ./examples/format_prompt/math_format.jinja
chenych's avatar
chenych committed
12
  override_chat_template: null
chenych's avatar
chenych committed
13
14
15
16
  shuffle: true
  seed: 1
  max_pixels: 4194304
  min_pixels: 262144
chenych's avatar
update  
chenych committed
17
  filter_overlong_prompts: true
chenych's avatar
chenych committed
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53

algorithm:
  adv_estimator: grpo
  disable_kl: false
  use_kl_loss: true
  kl_penalty: low_var_kl
  kl_coef: 1.0e-2

worker:
  actor:
    global_batch_size: 128
    micro_batch_size_per_device_for_update: 4
    micro_batch_size_per_device_for_experience: 16
    max_grad_norm: 1.0
    padding_free: true
    ulysses_sequence_parallel_size: 1
    model:
      model_path: Qwen/Qwen2.5-7B-Instruct
      enable_gradient_checkpointing: true
      trust_remote_code: false
      freeze_vision_tower: false
    optim:
      lr: 1.0e-6
      weight_decay: 1.0e-2
      strategy: adamw  # {adamw, adamw_bf16}
      lr_warmup_ratio: 0.0
    fsdp:
      enable_full_shard: true
      enable_cpu_offload: false
      enable_rank0_init: true
    offload:
      offload_params: true  # true: more CPU memory; false: more GPU memory
      offload_optimizer: true  # true: more CPU memory; false: more GPU memory

  rollout:
    n: 5
chenych's avatar
update  
chenych committed
54
55
    temperature: 1.0
    top_p: 0.99
chenych's avatar
chenych committed
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
    gpu_memory_utilization: 0.6
    enforce_eager: false
    enable_chunked_prefill: false
    tensor_parallel_size: 2
    limit_images: 0
    val_override_config:
      temperature: 0.5
      n: 1

  ref:
    fsdp:
      enable_full_shard: true
      enable_cpu_offload: true  # true: more CPU memory; false: more GPU memory
      enable_rank0_init: true
    offload:
      offload_params: false

  reward:
chenych's avatar
chenych committed
74
    reward_type: batch
chenych's avatar
chenych committed
75
    reward_function: ./examples/reward_function/math.py:compute_score
chenych's avatar
chenych committed
76
77

trainer:
chenych's avatar
chenych committed
78
79
  total_epochs: 15
  max_steps: null
chenych's avatar
chenych committed
80
81
  project_name: easy_r1
  experiment_name: qwen2_5_7b_math_grpo
chenych's avatar
chenych committed
82
  logger: ["console", "wandb"]
chenych's avatar
chenych committed
83
  nnodes: 1
chenych's avatar
chenych committed
84
  n_gpus_per_node: 8
chenych's avatar
chenych committed
85
86
87
  val_freq: 5  # -1 to disable
  val_before_train: true
  val_only: false
chenych's avatar
Update  
chenych committed
88
  val_generations_to_log: 3
chenych's avatar
chenych committed
89
90
91
92
  save_freq: 5  # -1 to disable
  save_limit: 3  # -1 to disable
  save_checkpoint_path: null
  load_checkpoint_path: null