# the sppo config will override default ppo_trainer.yaml hydra: searchpath: - file://verl/trainer/config defaults: - ppo_trainer - _self_ actor_rollout_ref: actor: _target_: recipe.sppo.config.SPPOActorConfig # sppo_eta is an additional hyperparameter for SPPO, not available in # verl core. specifying _target_ with SPPOActorConfig is needed to # extend verl ActorConfig with custom fields. # additional, it is also possible to use the `extra` field natively supported # by all verl core dataclasses, without having to define SPPOActorConfig # extra: # sppo_eta: 1.0 sppo_eta: 1.0 optim: lr_warmup_steps: 15 rollout: name: sglang tensor_model_parallel_size: 2 gpu_memory_utilization: 0.5 val_kwargs: n: 2 # 2 will trigger validation, 1 will bypass algorithm: adv_estimator: null sppo_eta: 1.0 trainer: log_val_generations: 0