#!/bin/bash set -x export PYTHONUNBUFFERED=1 MODEL_PATH=Qwen/Qwen2.5-VL-3B-Instruct # replace it with your local file path python3 -m verl.trainer.main \ config=examples/config.yaml \ data.train_files=BUAADreamer/clevr_count_70k@train \ data.val_files=BUAADreamer/clevr_count_70k@test \ data.format_prompt=./examples/format_prompt/r1v_format.jinja \ worker.actor.model.model_path=${MODEL_PATH} \ worker.rollout.tensor_parallel_size=1 \ worker.reward.reward_type=sequential \ worker.reward.reward_function=./examples/reward_function/r1v.py:compute_score \ trainer.experiment_name=qwen2_5_vl_3b_clevr \ trainer.n_gpus_per_node=2