dapo.yaml 256 Bytes
Newer Older
jerrrrry's avatar
jerrrrry committed
1
2
3
4
5
6
7
8
9
- agent_type: examples.rl.environments.math.dapo_agent.DAPOAgent
  agent_args:
    format_reward: 0.0
  weight: 1.0
- agent_type: examples.rl.environments.math.aime_agent.AIMEAgent
  agent_args:
    format_reward: 0.0
  weight: 0.0
  evaluation_only: true