dapo_trainer.yaml 628 Bytes
Newer Older
jerrrrry's avatar
jerrrrry committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
hydra:
  searchpath:
    - file://verl/trainer/config

defaults:
  - ppo_trainer
  - _self_

data:
  gen_batch_size: ${data.train_batch_size}

reward_model:
  reward_manager: dapo
  overlong_buffer: 
    enable: False # We try to avoid forgetting to set enable
    len: 0
    penalty_factor: 0.0
    log: False

algorithm:
  filter_groups:
    _target_: verl.trainer.config.FilterGroupsConfig
    enable: False # We try to avoid forgetting to set enable
    metric: null # acc / score / seq_reward / seq_final_reward / ...
    max_num_gen_batches: 0 # Non-positive values mean no upper limit

trainer:
  project_name: verl-dapo