config.yaml 2.19 KB
Newer Older
chenych's avatar
chenych committed
1
2
3
4
5
6
7
8
9
10
data:
  train_files: hiyouga/math12k@train
  val_files: hiyouga/math12k@test
  prompt_key: problem
  answer_key: answer
  image_key: images
  max_prompt_length: 2048
  max_response_length: 2048
  rollout_batch_size: 512
  val_batch_size: -1
chenych's avatar
update  
chenych committed
11
  format_prompt: ./examples/format_prompt/math_format.jinja
chenych's avatar
chenych committed
12
13
14
15
  shuffle: true
  seed: 1
  max_pixels: 4194304
  min_pixels: 262144
chenych's avatar
update  
chenych committed
16
  filter_overlong_prompts: true
chenych's avatar
chenych committed
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52

algorithm:
  adv_estimator: grpo
  disable_kl: false
  use_kl_loss: true
  kl_penalty: low_var_kl
  kl_coef: 1.0e-2

worker:
  actor:
    global_batch_size: 128
    micro_batch_size_per_device_for_update: 4
    micro_batch_size_per_device_for_experience: 16
    max_grad_norm: 1.0
    padding_free: true
    ulysses_sequence_parallel_size: 1
    model:
      model_path: Qwen/Qwen2.5-7B-Instruct
      enable_gradient_checkpointing: true
      trust_remote_code: false
      freeze_vision_tower: false
    optim:
      lr: 1.0e-6
      weight_decay: 1.0e-2
      strategy: adamw  # {adamw, adamw_bf16}
      lr_warmup_ratio: 0.0
    fsdp:
      enable_full_shard: true
      enable_cpu_offload: false
      enable_rank0_init: true
    offload:
      offload_params: true  # true: more CPU memory; false: more GPU memory
      offload_optimizer: true  # true: more CPU memory; false: more GPU memory

  rollout:
    n: 5
chenych's avatar
update  
chenych committed
53
54
    temperature: 1.0
    top_p: 0.99
chenych's avatar
chenych committed
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
    gpu_memory_utilization: 0.6
    enforce_eager: false
    enable_chunked_prefill: false
    tensor_parallel_size: 2
    limit_images: 0
    val_override_config:
      temperature: 0.5
      n: 1

  ref:
    fsdp:
      enable_full_shard: true
      enable_cpu_offload: true  # true: more CPU memory; false: more GPU memory
      enable_rank0_init: true
    offload:
      offload_params: false

  reward:
    reward_type: function
chenych's avatar
update  
chenych committed
74
    score_function: ./examples/score_function/math.py:compute_score
chenych's avatar
chenych committed
75
76
77
78
79
80
81
82
83
84
85

trainer:
  total_episodes: 15
  logger: ["console", "wandb"]
  project_name: easy_r1
  experiment_name: qwen2_5_7b_math_grpo
  n_gpus_per_node: 8
  nnodes: 1
  val_freq: 5  # -1 to disable
  val_before_train: true
  val_only: false
chenych's avatar
Update  
chenych committed
86
  val_generations_to_log: 3
chenych's avatar
chenych committed
87
88
89
90
  save_freq: 5  # -1 to disable
  save_limit: 3  # -1 to disable
  save_checkpoint_path: null
  load_checkpoint_path: null