# the sppo config will override default ppo_trainer.yaml hydra: searchpath: - file://verl/trainer/config defaults: - ppo_trainer - _self_ actor_rollout_ref: actor: dpo_beta: 0.1 optim: lr_warmup_steps: 15 rollout: name: sglang tensor_model_parallel_size: 2 gpu_memory_utilization: 0.5 val_kwargs: n: 2 # 2 will trigger validation, 1 will bypass algorithm: adv_estimator: null trainer: log_val_generations: 0 ref_update_freq: 1