# actor_rollout_ref.ref: FSDP config same as actor. For models larger than 7B, it’s recommended to turn on offload for ref by default strategy: ${actor_rollout_ref.actor.strategy} # whether to enable torch.compile # same as actor_rollout_ref.actor.use_torch_compile if it exists, otherwise 1 use_torch_compile: ${oc.select:actor_rollout_ref.actor.use_torch_compile,true} # [Will be deprecated, use log_prob_micro_batch_size_per_gpu] # The batch size for one forward pass in the computation of log_prob. Global batch size. log_prob_micro_batch_size: null # The batch size for one forward pass in the computation of log_prob. Local batch size per GPU. log_prob_micro_batch_size_per_gpu: null # enable dynamic batch size (sequence packing) for log_prob computation # same as actor_rollout_ref.actor.use_dynamic_bsz if it exists, otherwise false log_prob_use_dynamic_bsz: ${oc.select:actor_rollout_ref.actor.use_dynamic_bsz,false} # the max token length per GPU # same as actor_rollout_ref.actor.ppo_max_token_len_per_gpu if it exists, otherwise 16384 log_prob_max_token_len_per_gpu: ${oc.select:actor_rollout_ref.actor.ppo_max_token_len_per_gpu,16384}