hydra: searchpath: - file://verl/trainer/config defaults: - ppo_trainer - _self_ data: gen_batch_size: ${data.train_batch_size} reward_model: reward_manager: dapo overlong_buffer: enable: False # We try to avoid forgetting to set enable len: 0 penalty_factor: 0.0 log: False algorithm: filter_groups: _target_: verl.trainer.config.FilterGroupsConfig enable: False # We try to avoid forgetting to set enable metric: null # acc / score / seq_reward / seq_final_reward / ... max_num_gen_batches: 0 # Non-positive values mean no upper limit trainer: project_name: verl-dapo