hydra: searchpath: - file://verl/trainer/config defaults: - ppo_trainer - _self_ data: max_prompt_length: 1024 max_response_length: 1024 train_batch_size: 256 return_raw_chat: True actor_rollout_ref: hybrid_engine: True rollout: name: sglang multi_turn: enable: True max_assistant_turns: 5 tool_config_path: "./config/tool_config/sandbox_fusion_tool_config.yaml"