data: train_files: hiyouga/math12k@train val_files: hiyouga/math12k@test prompt_key: problem answer_key: answer image_key: images max_prompt_length: 2048 max_response_length: 2048 rollout_batch_size: 512 val_batch_size: -1 shuffle: true seed: 1 max_pixels: 4194304 min_pixels: 262144 algorithm: adv_estimator: grpo disable_kl: false use_kl_loss: true kl_penalty: low_var_kl kl_coef: 1.0e-2 worker: actor: global_batch_size: 128 micro_batch_size_per_device_for_update: 4 micro_batch_size_per_device_for_experience: 16 max_grad_norm: 1.0 padding_free: true ulysses_sequence_parallel_size: 1 model: model_path: Qwen/Qwen2.5-7B-Instruct enable_gradient_checkpointing: true trust_remote_code: false freeze_vision_tower: false optim: lr: 1.0e-6 weight_decay: 1.0e-2 strategy: adamw # {adamw, adamw_bf16} lr_warmup_ratio: 0.0 fsdp: enable_full_shard: true enable_cpu_offload: false enable_rank0_init: true offload: offload_params: true # true: more CPU memory; false: more GPU memory offload_optimizer: true # true: more CPU memory; false: more GPU memory rollout: temperature: 1.0 n: 5 gpu_memory_utilization: 0.6 enforce_eager: false enable_chunked_prefill: false tensor_parallel_size: 2 limit_images: 0 val_override_config: temperature: 0.5 n: 1 ref: fsdp: enable_full_shard: true enable_cpu_offload: true # true: more CPU memory; false: more GPU memory enable_rank0_init: true offload: offload_params: false reward: reward_type: function score_function: math skip_special_tokens: true trainer: total_episodes: 15 logger: ["console", "wandb"] project_name: easy_r1 experiment_name: qwen2_5_7b_math_grpo n_gpus_per_node: 8 nnodes: 1 val_freq: 5 # -1 to disable val_before_train: true val_only: false val_generations_to_log: 3 save_freq: 5 # -1 to disable save_limit: 3 # -1 to disable save_checkpoint_path: null load_checkpoint_path: null