Commit ff7fb65e authored by chenych's avatar chenych
Browse files

Update

parent c132cbcb
......@@ -49,7 +49,7 @@ EasyR1基于 **[HybirdEngine](https://arxiv.org/abs/2409.19256)** 和最新发
### 环境准备
-v 路径docker_nameimageID根据实际情况修改
`-v 路径``docker_name``imageID`根据实际情况修改
#### Docker(方法一)
......@@ -62,7 +62,7 @@ docker run -it --shm-size 200g --network=host --name docker_name --privileged --
## 安装所需环境包
cd EasyR1
pip install vllm-0.8.2+das.opt1.fe6d3b0.dtk2504-cp310-cp310-linux_x86_64.whl
pip install -r requirements.txt --no-deps
## 注释掉accelerate、liger-kernel、tensordict之后再执行以下步骤
pip install -r requirements.txt
......@@ -78,7 +78,7 @@ docker run -it --shm-size 200g --network=host --name docker_name --privileged --
## 安装所需环境包
cd EasyR1
pip install vllm-0.8.2+das.opt1.fe6d3b0.dtk2504-cp310-cp310-linux_x86_64.whl
pip install -r requirements.txt --no-deps
## 注释掉accelerate、liger-kernel、tensordict之后再执行以下步骤
pip install -r requirements.txt
......@@ -99,7 +99,7 @@ flash-attn: 2.6.1+das.opt4.dtk2504
```bash
cd EasyR1
pip install vllm-0.8.2+das.opt1.fe6d3b0.dtk2504-cp310-cp310-linux_x86_64.whl
pip install -r requirements.txt --no-deps
## 注释掉accelerate、liger-kernel、tensordict之后再执行以下步骤
pip install -r requirements.txt
......
......@@ -144,6 +144,12 @@ These features are temporarily disabled for now, we plan to fix them one-by-one
👋 Join our [WeChat group](assets/wechat.jpg).
## FAQs
> RuntimeError: CUDA Error: out of memory at /workspace/csrc/cumem_allocator.cpp:62
Reduce the `worker.rollout.gpu_memory_utilization`.
## Citation
Core contributors: [Yaowei Zheng](https://github.com/hiyouga), [Junting Lu](https://github.com/AL-377), [Shenzhi Wang](https://github.com/Shenzhi-Wang), [Zhangchi Feng](https://github.com/BUAADreamer), [Dongdong Kuang](https://github.com/Kuangdd01) and Yuwen Xiong
......
assets/wechat.jpg

157 KB | W: | H:

assets/wechat.jpg

164 KB | W: | H:

assets/wechat.jpg
assets/wechat.jpg
assets/wechat.jpg
assets/wechat.jpg
  • 2-up
  • Swipe
  • Onion skin
......@@ -2,7 +2,7 @@ set -x
MODEL_PATH=Qwen/Qwen2.5-VL-3B-Instruct # replace it with your local file path
SYSTEM_PROMPT="""A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant
FORMAT_PROMPT="""A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant
first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning
process and answer are enclosed within <think> </think> and <answer> </answer> tags, respectively, i.e.,
<think> reasoning process here </think><answer> answer here </answer>"""
......@@ -11,10 +11,9 @@ python3 -m verl.trainer.main \
config=examples/config.yaml \
data.train_files=BUAADreamer/clevr_count_70k@train \
data.val_files=BUAADreamer/clevr_count_70k@test \
data.system_prompt="${SYSTEM_PROMPT}" \
data.format_prompt="${FORMAT_PROMPT}" \
worker.actor.model.model_path=${MODEL_PATH} \
worker.rollout.tensor_parallel_size=1 \
worker.rollout.enable_chunked_prefill=false \
worker.reward.compute_score=r1v \
worker.reward.score_function=r1v \
trainer.experiment_name=qwen2_5_vl_3b_clevr \
trainer.n_gpus_per_node=2
......@@ -2,7 +2,7 @@ set -x
MODEL_PATH=Qwen/Qwen2.5-VL-3B-Instruct # replace it with your local file path
SYSTEM_PROMPT="""A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant
FORMAT_PROMPT="""A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant
first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning
process and answer are enclosed within <think> </think> and <answer> </answer> tags, respectively, i.e.,
<think> reasoning process here </think><answer> answer here </answer>"""
......@@ -11,10 +11,9 @@ python3 -m verl.trainer.main \
config=examples/config.yaml \
data.train_files=leonardPKU/GEOQA_8K_R1V@train \
data.val_files=leonardPKU/GEOQA_8K_R1V@test \
data.system_prompt="${SYSTEM_PROMPT}" \
data.format_prompt="${FORMAT_PROMPT}" \
worker.actor.model.model_path=${MODEL_PATH} \
worker.rollout.tensor_parallel_size=1 \
worker.rollout.enable_chunked_prefill=false \
worker.reward.compute_score=r1v \
worker.reward.score_function=r1v \
trainer.experiment_name=qwen2_5_vl_3b_geoqa8k \
trainer.n_gpus_per_node=8
......@@ -68,7 +68,8 @@ worker:
reward:
reward_type: function
compute_score: math
score_function: math
skip_special_tokens: true
trainer:
total_episodes: 15
......@@ -80,7 +81,7 @@ trainer:
val_freq: 5 # -1 to disable
val_before_train: true
val_only: false
val_generations_to_log: 1
val_generations_to_log: 3
save_freq: 5 # -1 to disable
save_limit: 3 # -1 to disable
save_checkpoint_path: null
......
data:
train_files: hiyouga/math12k@train
val_files: hiyouga/math12k@test
prompt_key: problem
max_prompt_length: 1024
max_response_length: 1024
rollout_batch_size: 512
shuffle: true
seed: 1
max_pixels: 4194304
min_pixels: 262144
algorithm:
adv_estimator: grpo
kl_coef: 0.0
worker:
actor:
global_batch_size: 128
micro_batch_size_per_device_for_update: 1
micro_batch_size_per_device_for_experience: 2
max_grad_norm: 1.0
use_kl_loss: true
kl_loss_coef: 1.0e-3
kl_loss_type: low_var_kl
model:
model_path: Qwen/Qwen2.5-7B-Instruct
enable_gradient_checkpointing: true
optim:
lr: 1.0e-6
weight_decay: 1.0e-2
fsdp:
param_offload: false
optimizer_offload: false
torch_dtype: null
offload:
param_offload: true
optimizer_offload: true
rollout:
temperature: 1.0
tensor_parallel_size: 2
gpu_memory_utilization: 0.6
n: 5
enable_chunked_prefill: true
ref:
offload:
param_offload: true
reward:
reward_type: function
compute_score: math
trainer:
total_episodes: 15
logger: ["console", "wandb"]
project_name: easy_r1
experiment_name: qwen2_5_7b_math
n_gpus_per_node: 8
nnodes: 1
save_freq: 5
test_freq: 5
val_before_train: true
val_only: false
save_checkpoint_path: null
......@@ -2,14 +2,14 @@ set -x
MODEL_PATH=Qwen/Qwen2.5-7B-Instruct # replace it with your local file path
SYSTEM_PROMPT="""You FIRST think about the reasoning process as an internal monologue and then provide the final answer.
FORMAT_PROMPT="""You FIRST think about the reasoning process as an internal monologue and then provide the final answer.
The reasoning process MUST BE enclosed within <think> </think> tags. The final answer MUST BE put in \boxed{}."""
python3 -m verl.trainer.main \
config=examples/config.yaml \
data.train_files=hiyouga/math12k@train \
data.val_files=hiyouga/math12k@test \
data.system_prompt="${SYSTEM_PROMPT}" \
data.format_prompt="${FORMAT_PROMPT}" \
worker.actor.model.model_path=${MODEL_PATH} \
trainer.experiment_name=qwen2_5_7b_math_grpo \
trainer.n_gpus_per_node=8
......@@ -2,20 +2,19 @@ set -x
MODEL_PATH=Qwen/Qwen2.5-VL-32B-Instruct # replace it with your local file path
SYSTEM_PROMPT="""You FIRST think about the reasoning process as an internal monologue and then provide the final answer.
FORMAT_PROMPT="""You FIRST think about the reasoning process as an internal monologue and then provide the final answer.
The reasoning process MUST BE enclosed within <think> </think> tags. The final answer MUST BE put in \boxed{}."""
python3 -m verl.trainer.main \
config=examples/config.yaml \
data.train_files=hiyouga/geometry3k@train \
data.val_files=hiyouga/geometry3k@test \
data.system_prompt="${SYSTEM_PROMPT}" \
data.format_prompt="${FORMAT_PROMPT}" \
worker.actor.model.model_path=${MODEL_PATH} \
worker.actor.micro_batch_size_per_device_for_update=1 \
worker.actor.micro_batch_size_per_device_for_experience=8 \
worker.actor.fsdp.torch_dtype=bf16 \
worker.actor.optim.strategy=adamw_bf16 \
worker.rollout.tensor_parallel_size=8 \
worker.rollout.enable_chunked_prefill=false \
trainer.experiment_name=qwen2_5_vl_32b_geo_grpo \
trainer.n_gpus_per_node=8
......@@ -2,16 +2,15 @@ set -x
MODEL_PATH=Qwen/Qwen2.5-VL-3B-Instruct # replace it with your local file path
SYSTEM_PROMPT="""You FIRST think about the reasoning process as an internal monologue and then provide the final answer.
FORMAT_PROMPT="""You FIRST think about the reasoning process as an internal monologue and then provide the final answer.
The reasoning process MUST BE enclosed within <think> </think> tags. The final answer MUST BE put in \boxed{}."""
python3 -m verl.trainer.main \
config=examples/config.yaml \
data.train_files=hiyouga/geometry3k@train \
data.val_files=hiyouga/geometry3k@test \
data.system_prompt="${SYSTEM_PROMPT}" \
data.format_prompt="${FORMAT_PROMPT}" \
worker.actor.model.model_path=${MODEL_PATH} \
worker.rollout.tensor_parallel_size=1 \
worker.rollout.enable_chunked_prefill=false \
trainer.experiment_name=qwen2_5_vl_3b_geo_grpo \
trainer.n_gpus_per_node=2
......@@ -2,15 +2,14 @@ set -x
MODEL_PATH=Qwen/Qwen2.5-VL-7B-Instruct # replace it with your local file path
SYSTEM_PROMPT="""You FIRST think about the reasoning process as an internal monologue and then provide the final answer.
FORMAT_PROMPT="""You FIRST think about the reasoning process as an internal monologue and then provide the final answer.
The reasoning process MUST BE enclosed within <think> </think> tags. The final answer MUST BE put in \boxed{}."""
python3 -m verl.trainer.main \
config=examples/config.yaml \
data.train_files=hiyouga/geometry3k@train \
data.val_files=hiyouga/geometry3k@test \
data.system_prompt="${SYSTEM_PROMPT}" \
data.format_prompt="${FORMAT_PROMPT}" \
worker.actor.model.model_path=${MODEL_PATH} \
worker.rollout.enable_chunked_prefill=false \
trainer.experiment_name=qwen2_5_vl_7b_geo_grpo \
trainer.n_gpus_per_node=8
......@@ -2,16 +2,18 @@ set -x
MODEL_PATH=Qwen/Qwen2.5-VL-7B-Instruct # replace it with your local file path
SYSTEM_PROMPT="""You FIRST think about the reasoning process as an internal monologue and then provide the final answer.
FORMAT_PROMPT="""You FIRST think about the reasoning process as an internal monologue and then provide the final answer.
The reasoning process MUST BE enclosed within <think> </think> tags. The final answer MUST BE put in \boxed{}."""
python3 -m verl.trainer.main \
config=examples/config.yaml \
data.train_files=hiyouga/geometry3k@train \
data.val_files=hiyouga/geometry3k@test \
data.system_prompt="${SYSTEM_PROMPT}" \
data.format_prompt="${FORMAT_PROMPT}" \
worker.actor.model.model_path=${MODEL_PATH} \
worker.rollout.enable_chunked_prefill=false \
algorithm.adv_estimator=reinforce_plus_plus \
algorithm.use_kl_loss=false \
algorithm.kl_penalty=kl \
algorithm.kl_coef=1.0e-3 \
trainer.experiment_name=qwen2_5_vl_7b_geo_reinforce_pp \
trainer.n_gpus_per_node=8
......@@ -2,16 +2,15 @@ set -x
MODEL_PATH=Qwen/Qwen2.5-VL-7B-Instruct # replace it with your local file path
SYSTEM_PROMPT="""You FIRST think about the reasoning process as an internal monologue and then provide the final answer.
FORMAT_PROMPT="""You FIRST think about the reasoning process as an internal monologue and then provide the final answer.
The reasoning process MUST BE enclosed within <think> </think> tags. The final answer MUST BE put in \boxed{}."""
python3 -m verl.trainer.main \
config=examples/config.yaml \
data.train_files=hiyouga/geometry3k@train \
data.val_files=hiyouga/geometry3k@test \
data.system_prompt="${SYSTEM_PROMPT}" \
data.format_prompt="${FORMAT_PROMPT}" \
worker.actor.model.model_path=${MODEL_PATH} \
worker.rollout.enable_chunked_prefill=false \
trainer.experiment_name=qwen2_5_vl_7b_geo_grpo \
trainer.logger=['console','swanlab'] \
trainer.n_gpus_per_node=8
data:
train_files: hiyouga/math12k@train
val_files: hiyouga/math12k@test
prompt_key: problem
max_prompt_length: 1024
max_response_length: 1024
rollout_batch_size: 512
shuffle: true
seed: 1
max_pixels: 4194304
min_pixels: 262144
algorithm:
adv_estimator: remax
kl_coef: 0.0
worker:
actor:
global_batch_size: 128
micro_batch_size_per_device_for_update: 1
micro_batch_size_per_device_for_experience: 2
max_grad_norm: 1.0
use_kl_loss: true
kl_loss_coef: 1.0e-3
kl_loss_type: low_var_kl
model:
model_path: Qwen/Qwen2.5-7B-Instruct
enable_gradient_checkpointing: true
optim:
lr: 1.0e-6
weight_decay: 1.0e-2
fsdp:
param_offload: false
optimizer_offload: false
torch_dtype: null
offload:
param_offload: true
optimizer_offload: true
rollout:
temperature: 1.0
tensor_parallel_size: 2
gpu_memory_utilization: 0.6
n: 5
enable_chunked_prefill: true
ref:
offload:
param_offload: true
reward:
reward_type: function
compute_score: math
trainer:
total_episodes: 15
logger: ["console", "wandb"]
project_name: easy_r1
experiment_name: qwen2_5_7b_remax_math
n_gpus_per_node: 8
nnodes: 1
save_freq: 5
test_freq: 5
val_before_train: true
val_only: false
save_checkpoint_path: null
set -x
export VLLM_ATTENTION_BACKEND=XFORMERS
MODEL_PATH=Qwen/Qwen2.5-7B-Instruct # replace it with your local file path
python3 -m verl.trainer.main \
config=examples/grpo_example.yaml \
worker.actor.model.model_path=${MODEL_PATH} \
trainer.n_gpus_per_node=4
set -x
export VLLM_ATTENTION_BACKEND=XFORMERS
MODEL_PATH=Qwen/Qwen2.5-7B-Instruct # replace it with your local file path
python3 -m verl.trainer.main \
config=examples/grpo_example.yaml \
worker.actor.model.model_path=${MODEL_PATH} \
trainer.logger=['console','swanlab'] \
trainer.n_gpus_per_node=4
set -x
export VLLM_ATTENTION_BACKEND=XFORMERS
MODEL_PATH=Qwen/Qwen2.5-VL-3B-Instruct # replace it with your local file path
SYSTEM_PROMPT="""A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant
first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning
process and answer are enclosed within <think> </think> and <answer> </answer> tags, respectively, i.e.,
<think> reasoning process here </think><answer> answer here </answer>"""
python3 -m verl.trainer.main \
config=examples/grpo_example.yaml \
data.train_files=BUAADreamer/clevr_count_70k@train \
data.val_files=BUAADreamer/clevr_count_70k@test \
data.system_prompt="${SYSTEM_PROMPT}" \
worker.actor.model.model_path=${MODEL_PATH} \
worker.rollout.tensor_parallel_size=1 \
worker.rollout.enable_chunked_prefill=false \
worker.reward.compute_score=r1v \
trainer.experiment_name=qwen2_5_vl_3b_clevr \
trainer.n_gpus_per_node=2
set -x
export VLLM_ATTENTION_BACKEND=XFORMERS
MODEL_PATH=Qwen/Qwen2.5-VL-3B-Instruct # replace it with your local file path
python3 -m verl.trainer.main \
config=examples/grpo_example.yaml \
data.train_files=hiyouga/geometry3k@train \
data.val_files=hiyouga/geometry3k@test \
worker.actor.model.model_path=${MODEL_PATH} \
worker.rollout.tensor_parallel_size=1 \
worker.rollout.enable_chunked_prefill=false \
trainer.experiment_name=qwen2_5_vl_3b_geo \
trainer.n_gpus_per_node=2
set -x
export VLLM_ATTENTION_BACKEND=XFORMERS
MODEL_PATH=Qwen/Qwen2.5-VL-7B-Instruct # replace it with your local file path
python3 -m verl.trainer.main \
config=examples/grpo_example.yaml \
data.train_files=hiyouga/geometry3k@train \
data.val_files=hiyouga/geometry3k@test \
worker.actor.model.model_path=${MODEL_PATH} \
worker.rollout.enable_chunked_prefill=false \
trainer.experiment_name=qwen2_5_vl_7b_geo \
trainer.n_gpus_per_node=4
set -x
export VLLM_ATTENTION_BACKEND=XFORMERS
MODEL_PATH=Qwen/Qwen2.5-VL-7B-Instruct # replace it with your local file path
python3 -m verl.trainer.main \
config=examples/grpo_example.yaml \
data.train_files=hiyouga/geometry3k@train \
data.val_files=hiyouga/geometry3k@test \
worker.actor.model.model_path=${MODEL_PATH} \
worker.rollout.enable_chunked_prefill=false \
trainer.experiment_name=qwen2_5_vl_7b_geo \
trainer.logger=['console','swanlab'] \
trainer.n_gpus_per_node=4
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment