trainer: nnodes: 1 n_gpus_per_node: 8 data: path: ~/data/rlhf/math/test.parquet prompt_key: prompt n_samples: 5 output_path: /opt/tiger/math_Qwen2-7B-Instruct.parquet batch_size: 128 model: path: ~/models/Qwen2-7B-Instruct external_lib: null rollout: name: vllm temperature: 1.0 top_k: 50 # 0 for hf rollout, -1 for vllm rollout top_p: 0.7 prompt_length: 1536 response_length: 512 # for vllm rollout dtype: bfloat16 # should align with FSDP gpu_memory_utilization: 0.5 ignore_eos: False enforce_eager: True free_cache_engine: True load_format: dummy_dtensor tensor_model_parallel_size: 1 max_num_batched_tokens: 8192 max_model_len: null max_num_seqs: 1024 log_prob_micro_batch_size: null # will be deprecated, use log_prob_micro_batch_size_per_gpu log_prob_micro_batch_size_per_gpu: 8 # for fire vllm rollout use_fire_sampling: False # enable FIRE https://arxiv.org/abs/2410.21236 # for hf rollout do_sample: True disable_log_stats: True enable_chunked_prefill: True n: 1 actor: strategy: fsdp # This is for backward-compatibility ulysses_sequence_parallel_size: 1 # sp size fsdp_config: fsdp_size: -1