trainer: nnodes: 1 n_gpus_per_node: 8 device: cuda data: path: ~/data/rlhf/math/test.parquet prompt_key: prompt n_samples: 5 output_path: /opt/tiger/math_Qwen2-7B-Instruct.parquet batch_size: 128 model: path: ~/models/Qwen2-7B-Instruct external_lib: null rollout: name: vllm mode: sync # sync: LLM, async: AsyncLLM temperature: 1.0 top_k: 50 # 0 for hf rollout, -1 for vllm rollout top_p: 0.7 prompt_length: 1536 response_length: 512 # for vllm rollout dtype: bfloat16 # should align with FSDP gpu_memory_utilization: 0.5 ignore_eos: False enforce_eager: True free_cache_engine: True load_format: dummy_dtensor tensor_model_parallel_size: 1 max_num_batched_tokens: 8192 max_model_len: null max_num_seqs: 1024 log_prob_micro_batch_size: null # will be deprecated, use log_prob_micro_batch_size_per_gpu log_prob_micro_batch_size_per_gpu: 8 # for hf rollout do_sample: True disable_log_stats: True enable_chunked_prefill: True n: 1 # support logging rollout prob for debugging purpose calculate_log_probs: False actor: strategy: fsdp # This is for backward-compatibility ulysses_sequence_parallel_size: 1 # sp size entropy_from_logits_with_chunking: False # calculate entropy with chunking to reduce memory peak entropy_checkpointing: False # recompute entropy fsdp_config: fsdp_size: -1 forward_prefetch: False # FSDP1 forward_prefetch configuration ray_init: num_cpus: null # `None` means using all CPUs, which might cause hang if limited in systems like SLURM. Please set to a number allowed then. timeline_json_file: null