Commit 496acb03 authored by chenych's avatar chenych
Browse files

v0.3.0

parent d8de2ca8
# Start from the NVIDIA official image (ubuntu-22.04 + python-3.10)
# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-24-08.html
FROM nvcr.io/nvidia/pytorch:24.08-py3
# Define environments
ENV MAX_JOBS=32
ENV VLLM_WORKER_MULTIPROC_METHOD=spawn
ENV DEBIAN_FRONTEND=noninteractive
ENV NODE_OPTIONS=""
ENV HF_HUB_ENABLE_HF_TRANSFER="1"
# Define installation arguments
ARG APT_SOURCE=https://mirrors.tuna.tsinghua.edu.cn/ubuntu/
ARG PIP_INDEX=https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
ARG VLLM_COMMIT=227578480d71fc94ef46ca77fb69496412158d68
# Set apt source
RUN cp /etc/apt/sources.list /etc/apt/sources.list.bak && \
{ \
echo "deb ${APT_SOURCE} jammy main restricted universe multiverse"; \
echo "deb ${APT_SOURCE} jammy-updates main restricted universe multiverse"; \
echo "deb ${APT_SOURCE} jammy-backports main restricted universe multiverse"; \
echo "deb ${APT_SOURCE} jammy-security main restricted universe multiverse"; \
} > /etc/apt/sources.list
# Install systemctl
RUN apt-get update && \
apt-get install -y -o Dpkg::Options::="--force-confdef" systemd && \
apt-get clean
# Install tini
RUN apt-get update && \
apt-get install -y tini && \
apt-get clean
# Change pip source
RUN pip config set global.index-url "${PIP_INDEX}" && \
pip config set global.extra-index-url "${PIP_INDEX}" && \
python -m pip install --upgrade pip
# Uninstall nv-pytorch fork
RUN pip uninstall -y torch torchvision torchaudio \
pytorch-quantization pytorch-triton torch-tensorrt \
xgboost transformer_engine flash_attn apex megatron-core
# Install vllm-0.7.4-nightly
RUN pip install --no-cache-dir vllm --pre --extra-index-url "https://wheels.vllm.ai/${VLLM_COMMIT}" && \
git clone -b verl_v1 https://github.com/hiyouga/vllm.git && \
cp -r vllm/vllm/ /usr/local/lib/python3.10/dist-packages/
# Install torch-2.5.1
RUN pip install --no-cache-dir torch==2.5.1 torchvision==0.20.1 torchaudio==2.5.1 tensordict torchdata \
transformers>=4.49.0 accelerate datasets peft hf-transfer \
ray[default] codetiming hydra-core pandas pyarrow>=15.0.0 pylatexenc qwen-vl-utils wandb liger-kernel mathruler \
pytest yapf py-spy pyext pre-commit ruff
# Install flash_attn-2.7.4.post1
RUN wget -nv https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.5cxx11abiFALSE-cp310-cp310-linux_x86_64.whl && \
pip install --no-cache-dir flash_attn-2.7.4.post1+cu12torch2.5cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
# Fix cv2
RUN pip uninstall -y pynvml nvidia-ml-py && \
pip install --no-cache-dir nvidia-ml-py>=12.560.30 opencv-python-headless==4.8.0.74 fastapi==0.115.6 && \
pip install --no-cache-dir --upgrade optree>=0.13.0
# Reset pip config
RUN pip config unset global.index-url && \
pip config unset global.extra-index-url
# Start from the NVIDIA official image (ubuntu-22.04 + python-3.10)
# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-24-08.html
FROM nvcr.io/nvidia/pytorch:24.08-py3
# Define environments
ENV MAX_JOBS=32
ENV VLLM_WORKER_MULTIPROC_METHOD=spawn
ENV DEBIAN_FRONTEND=noninteractive
ENV NODE_OPTIONS=""
ENV HF_HUB_ENABLE_HF_TRANSFER="1"
# Define installation arguments
ARG APT_SOURCE=https://mirrors.tuna.tsinghua.edu.cn/ubuntu/
ARG PIP_INDEX=https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
# Set apt source
RUN cp /etc/apt/sources.list /etc/apt/sources.list.bak && \
{ \
echo "deb ${APT_SOURCE} jammy main restricted universe multiverse"; \
echo "deb ${APT_SOURCE} jammy-updates main restricted universe multiverse"; \
echo "deb ${APT_SOURCE} jammy-backports main restricted universe multiverse"; \
echo "deb ${APT_SOURCE} jammy-security main restricted universe multiverse"; \
} > /etc/apt/sources.list
# Install systemctl
RUN apt-get update && \
apt-get install -y -o Dpkg::Options::="--force-confdef" systemd && \
apt-get clean
# Install tini
RUN apt-get update && \
apt-get install -y tini && \
apt-get clean
# Change pip source
RUN pip config set global.index-url "${PIP_INDEX}" && \
pip config set global.extra-index-url "${PIP_INDEX}" && \
python -m pip install --upgrade pip
# Uninstall nv-pytorch fork
RUN pip uninstall -y torch torchvision torchaudio \
pytorch-quantization pytorch-triton torch-tensorrt \
xgboost transformer_engine flash_attn apex megatron-core
# Install torch-2.6.0 + vllm-0.8.2
RUN pip install --no-cache-dir vllm==0.8.2 torch==2.6.0 torchvision==0.21.0 torchaudio==2.6.0 tensordict torchdata \
transformers>=4.49.0 accelerate datasets peft hf-transfer \
ray[default] codetiming hydra-core pandas pyarrow>=15.0.0 pylatexenc qwen-vl-utils wandb liger-kernel mathruler \
pytest yapf py-spy pyext pre-commit ruff
# Install flash_attn-2.7.4.post1
RUN wget -nv https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp310-cp310-linux_x86_64.whl && \
pip install --no-cache-dir flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
# Fix cv2
RUN pip uninstall -y pynvml nvidia-ml-py && \
pip install --no-cache-dir nvidia-ml-py>=12.560.30 opencv-python-headless==4.8.0.74 fastapi==0.115.6 && \
pip install --no-cache-dir --upgrade optree>=0.13.0
# Reset pip config
RUN pip config unset global.index-url && \
pip config unset global.extra-index-url
...@@ -27,7 +27,7 @@ EasyR1基于 **[HybirdEngine](https://arxiv.org/abs/2409.19256)** 和最新发 ...@@ -27,7 +27,7 @@ EasyR1基于 **[HybirdEngine](https://arxiv.org/abs/2409.19256)** 和最新发
- Python 3.10+ - Python 3.10+
- transformers>=4.49.0 - transformers>=4.49.0
- flash-attn==2.6.1+das.opt4.dtk2504 - flash-attn==2.6.1+das.opt4.dtk2504
- vllm>=0.7.3 - vllm>=0.8.5
### 硬件依赖 ### 硬件依赖
...@@ -52,10 +52,10 @@ EasyR1基于 **[HybirdEngine](https://arxiv.org/abs/2409.19256)** 和最新发 ...@@ -52,10 +52,10 @@ EasyR1基于 **[HybirdEngine](https://arxiv.org/abs/2409.19256)** 和最新发
#### Docker(方法一) #### Docker(方法一)
基于光源pytorch2.4.1+dtk25.04基础镜像环境:镜像下载地址:[https://sourcefind.cn/#/image/dcu/pytorch](https://sourcefind.cn/#/image/dcu/pytorch),根据pytorch2.4.1、python、dtk及系统下载对应的镜像版本。 基于光源pytorch2.4.1+dtk25.04.1基础镜像环境:镜像下载地址:[https://sourcefind.cn/#/image/dcu/pytorch](https://sourcefind.cn/#/image/dcu/pytorch),根据pytorch2.4.1、python、dtk及系统下载对应的镜像版本。
```bash ```bash
docker pull image.sourcefind.cn:5000/dcu/admin/base/pytorch:2.4.1-ubuntu22.04-dtk25.04-py3.10 docker pull image.sourcefind.cn:5000/dcu/admin/base/vllm:0.8.5-ubuntu22.04-dtk25.04.1-rc5-das1.6-py3.10-20250705
docker run -it --shm-size 200g --network=host --name docker_name --privileged --device=/dev/kfd --device=/dev/dri --device=/dev/mkfd --group-add video --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -u root -v /path/your_code_data/:/path/your_code_data/ -v /opt/hyhal/:/opt/hyhal/:ro imageID bash docker run -it --shm-size 200g --network=host --name docker_name --privileged --device=/dev/kfd --device=/dev/dri --device=/dev/mkfd --group-add video --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -u root -v /path/your_code_data/:/path/your_code_data/ -v /opt/hyhal/:/opt/hyhal/:ro imageID bash
...@@ -71,7 +71,7 @@ export LLAMA_NN=0 ...@@ -71,7 +71,7 @@ export LLAMA_NN=0
```bash ```bash
cd docker cd docker
docker build --no-cache -t llama-factory:latest . docker build --no-cache -t easyR1:latest .
docker run -it --shm-size 200g --network=host --name docker_name --privileged --device=/dev/kfd --device=/dev/dri --device=/dev/mkfd --group-add video --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -u root -v /path/your_code_data/:/path/your_code_data/ -v /opt/hyhal/:/opt/hyhal/:ro imageID bash docker run -it --shm-size 200g --network=host --name docker_name --privileged --device=/dev/kfd --device=/dev/dri --device=/dev/mkfd --group-add video --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -u root -v /path/your_code_data/:/path/your_code_data/ -v /opt/hyhal/:/opt/hyhal/:ro imageID bash
## 安装所需环境包 ## 安装所需环境包
...@@ -86,12 +86,12 @@ export LLAMA_NN=0 ...@@ -86,12 +86,12 @@ export LLAMA_NN=0
#### Anaconda(方法三) #### Anaconda(方法三)
关于本项目DCU显卡所需的特殊深度学习库可从[光合](https://developer.hpccube.com/tool/)开发者社区下载安装。 关于本项目DCU显卡所需的特殊深度学习库可从[光合](https://developer.hpccube.com/tool/)开发者社区下载安装。
```bash ```bash
DTK驱动: dtk25.04 DTK驱动: dtk25.04.1
python: 3.10 python: 3.10
torch: 2.4.1 torch: 2.4.1+das.opt1.dtk25041
deepspeed: 0.14.2+das.opt2.dtk2504 deepspeed: 0.14.2+das.opt1.dtk25041
flash-attn: 2.6.1+das.opt4.dtk2504 flash-attn: 2.6.1+das.opt1.dtk25041
vllm: 0.7.2+das.opt1.c137085.dtk2504 vllm: 0.8.5.post1+das.opt2.dtk25041
``` ```
`Tips:以上dtk驱动、python、torch等DCU相关工具版本需要严格一一对应` `Tips:以上dtk驱动、python、torch等DCU相关工具版本需要严格一一对应`
...@@ -114,6 +114,7 @@ export LLAMA_NN=0 ...@@ -114,6 +114,7 @@ export LLAMA_NN=0
- Multi-image-text dataset: https://huggingface.co/datasets/hiyouga/journeybench-multi-image-vqa - Multi-image-text dataset: https://huggingface.co/datasets/hiyouga/journeybench-multi-image-vqa
### GRPO 训练 ### GRPO 训练
如果无法连接到Hugging Face,请先安装`pip install -U huggingface_hub hf_transfer`,再在启动前增加 `export HF_ENDPOINT=https://hf-mirror.com`命令 如果无法连接到Hugging Face,请先安装`pip install -U huggingface_hub hf_transfer`,再在启动前增加 `export HF_ENDPOINT=https://hf-mirror.com`命令
```bash ```bash
......
...@@ -42,7 +42,7 @@ We provide a [Dockerfile](./Dockerfile) to easily build environments. ...@@ -42,7 +42,7 @@ We provide a [Dockerfile](./Dockerfile) to easily build environments.
We recommend using the [pre-built docker image](https://hub.docker.com/r/hiyouga/verl) in EasyR1. We recommend using the [pre-built docker image](https://hub.docker.com/r/hiyouga/verl) in EasyR1.
```bash ```bash
docker pull hiyouga/verl:ngc-th2.6.0-cu126-vllm0.8.4-flashinfer0.2.2-cxx11abi0 docker pull hiyouga/verl:ngc-th2.6.0-cu126-vllm0.8.3-flashinfer0.2.2-cxx11abi0
``` ```
### Hardware Requirements ### Hardware Requirements
...@@ -136,10 +136,6 @@ We also reproduced the following two baselines of the [R1-V](https://github.com/ ...@@ -136,10 +136,6 @@ We also reproduced the following two baselines of the [R1-V](https://github.com/
- [CLEVR-70k-Counting](examples/baselines/qwen2_5_vl_3b_clevr.sh): Train the Qwen2.5-VL-3B-Instruct model on counting problem. - [CLEVR-70k-Counting](examples/baselines/qwen2_5_vl_3b_clevr.sh): Train the Qwen2.5-VL-3B-Instruct model on counting problem.
- [GeoQA-8k](examples/baselines/qwen2_5_vl_3b_geoqa8k.sh): Train the Qwen2.5-VL-3B-Instruct model on GeoQA problem. - [GeoQA-8k](examples/baselines/qwen2_5_vl_3b_geoqa8k.sh): Train the Qwen2.5-VL-3B-Instruct model on GeoQA problem.
## Performance Baselines
See [baselines.md](assets/baselines.md).
## Awesome Work using EasyR1 ## Awesome Work using EasyR1
- **MMR1**: Advancing the Frontiers of Multimodal Reasoning. [![[code]](https://img.shields.io/github/stars/LengSicong/MMR1)](https://github.com/LengSicong/MMR1) - **MMR1**: Advancing the Frontiers of Multimodal Reasoning. [![[code]](https://img.shields.io/github/stars/LengSicong/MMR1)](https://github.com/LengSicong/MMR1)
...@@ -147,8 +143,6 @@ See [baselines.md](assets/baselines.md). ...@@ -147,8 +143,6 @@ See [baselines.md](assets/baselines.md).
- **Seg-Zero**: Reasoning-Chain Guided Segmentation via Cognitive Reinforcement. [![[code]](https://img.shields.io/github/stars/dvlab-research/Seg-Zero)](https://github.com/dvlab-research/Seg-Zero) [![[arxiv]](https://img.shields.io/badge/arxiv-2503.06520-blue)](https://arxiv.org/abs/2503.06520) - **Seg-Zero**: Reasoning-Chain Guided Segmentation via Cognitive Reinforcement. [![[code]](https://img.shields.io/github/stars/dvlab-research/Seg-Zero)](https://github.com/dvlab-research/Seg-Zero) [![[arxiv]](https://img.shields.io/badge/arxiv-2503.06520-blue)](https://arxiv.org/abs/2503.06520)
- **MetaSpatial**: Reinforcing 3D Spatial Reasoning in VLMs for the Metaverse. [![[code]](https://img.shields.io/github/stars/PzySeere/MetaSpatial)](https://github.com/PzySeere/MetaSpatial) [![[arxiv]](https://img.shields.io/badge/arxiv-2503.18470-blue)](https://arxiv.org/abs/2503.18470) - **MetaSpatial**: Reinforcing 3D Spatial Reasoning in VLMs for the Metaverse. [![[code]](https://img.shields.io/github/stars/PzySeere/MetaSpatial)](https://github.com/PzySeere/MetaSpatial) [![[arxiv]](https://img.shields.io/badge/arxiv-2503.18470-blue)](https://arxiv.org/abs/2503.18470)
- **Temporal-R1**: Envolving Temporal Reasoning Capability into LMMs via Temporal Consistent Reward. [![[code]](https://img.shields.io/github/stars/appletea233/Temporal-R1)](https://github.com/appletea233/Temporal-R1) - **Temporal-R1**: Envolving Temporal Reasoning Capability into LMMs via Temporal Consistent Reward. [![[code]](https://img.shields.io/github/stars/appletea233/Temporal-R1)](https://github.com/appletea233/Temporal-R1)
- **NoisyRollout**: Reinforcing Visual Reasoning with Data Augmentation. [![[code]](https://img.shields.io/github/stars/John-AI-Lab/NoisyRollout)](https://github.com/John-AI-Lab/NoisyRollout) [![[arxiv]](https://img.shields.io/badge/arxiv-2504.13055-blue)](https://arxiv.org/pdf/2504.13055)
- **GUI-R1**: A Generalist R1-Style Vision-Language Action Model For GUI Agents. [![[code]](https://img.shields.io/github/stars/ritzz-ai/GUI-R1)](https://github.com/ritzz-ai/GUI-R1) [![[arxiv]](https://img.shields.io/badge/arxiv-2504.10458-blue)](https://arxiv.org/abs/2504.10458)
## TODO ## TODO
......
# Baselines
Environment: [hiyouga/verl:ngc-th2.6.0-cu126-vllm0.8.3-flashinfer0.2.2-cxx11abi0](https://hub.docker.com/layers/hiyouga/verl/ngc-th2.6.0-cu126-vllm0.8.3-flashinfer0.2.2-cxx11abi0/images/sha256-335ed6cd1fe73090e458409cfa4394d6abf4cd0503ca44dbafdc28ff72e5ed20)
EasyR1 version: [v0.3.0](https://github.com/hiyouga/EasyR1/tree/v0.3.0)
Welcome to contribute new data points!
## Algorithm Baselines
### [Qwen2.5-Instruct](https://huggingface.co/Qwen/Qwen2.5-7B-Instruct) on [Math12k](https://huggingface.co/datasets/hiyouga/math12k)
| Size | Algorithm | Bits | LR | KL | Test Score |
| ---- | ----------- | ---- | ---- | ---- | ---------- |
| 7B | GRPO | AMP | 1e-6 | 1e-2 | 0.73->0.79 |
### [Qwen2.5-VL-Instruct](https://huggingface.co/Qwen/Qwen2.5-VL-7B-Instruct) on [Geometry3k](https://huggingface.co/datasets/hiyouga/geometry3k)
| Size | Algorithm | Bits | LR | KL | Test Score |
| ---- | ----------- | ---- | ---- | ---- | ---------- |
| 7B | GRPO | AMP | 1e-6 | 1e-2 | 0.39->0.52 |
| 7B | GRPO | BF16 | 1e-6 | 1e-2 | 0.39->0.52 |
| 7B | GRPO | AMP | 1e-6 | 1e-3 | 0.39->0.52 |
| 7B | RLOO | AMP | 1e-6 | 1e-2 | 0.39->0.53 |
| 3B | GRPO | AMP | 1e-6 | 1e-2 | 0.27->0.44 |
| 32B | GRPO | BF16 | 1e-6 | 1e-2 | 0.46->0.61 |
> [!NOTE]
> The hyper-parameters not listed are all the same as the default values.
## Performance Baselines
### [Qwen2.5-VL-Instruct](https://huggingface.co/Qwen/Qwen2.5-VL-7B-Instruct) on [Geometry3k](https://huggingface.co/datasets/hiyouga/geometry3k)
| Size | GPU Type | Bits | Batch Size | vLLM Util | vLLM TP | Peak Mem | Peak VRAM | Throughput | Sec per step | Actor MFU |
| ---- | ------------- | ---- | ---------- | --------- | ------- | -------- | --------- | ---------- | ------------ | --------- |
| 3B | 8 * H100 80GB | AMP | 4 / 16 | 0.6 | 2 | 120GB | 35GB | 1200 | 180s | 6.3% |
| 7B | 8 * H100 80GB | AMP | 4 / 16 | 0.6 | 2 | 140GB | 60GB | 1200 | 180s | 13.6% |
| 7B | 8 * H100 80GB | AMP | 10 / 20 | 0.6 | 2 | 150GB | 75GB | 1400 | 170s | 19.2% |
| 7B | 8 * L20 48GB | AMP | 4 / 16 | 0.6 | 2 | 150GB | 44GB | 410 | 580s | 26.5% |
| 7B | 8 * H100 80GB | BF16 | 4 / 16 | 0.6 | 2 | 150GB | 50GB | 1280 | 190s | 13.9% |
| 32B | 8 * H100 80GB | BF16 | 1 / 8 | 0.6 | 8 | 240GB | 68GB | 360 | 860s | 11.2% |
- Batch Size: micro_batch_size_per_device_for_update / micro_batch_size_per_device_for_experience
- vLLM Util: rollout.gpu_memory_utilization
- vLLM TP: rollout.tensor_parallel_size
- Peak Mem: Peak CPU memory usage
- Peak VRAM: Peak GPU memory usage
- Throughput: Number of tokens per second per GPU by one training step
- Sec per step: Average time per step in seconds
> [!NOTE]
> The hyper-parameters not listed are all the same as the default values.
assets/wechat.jpg

113 KB | W: | H:

assets/wechat.jpg

111 KB | W: | H:

assets/wechat.jpg
assets/wechat.jpg
assets/wechat.jpg
assets/wechat.jpg
  • 2-up
  • Swipe
  • Onion skin
FROM image.sourcefind.cn:5000/dcu/admin/base/pytorch:2.4.1-ubuntu22.04-dtk25.04-py3.10 FROM image.sourcefind.cn:5000/dcu/admin/base/vllm:0.8.5-ubuntu22.04-dtk25.04.1-rc5-das1.6-py3.10-20250705
\ No newline at end of file \ No newline at end of file
#!/bin/bash
set -x set -x
export PYTHONUNBUFFERED=1
MODEL_PATH=Qwen/Qwen2.5-VL-3B-Instruct # replace it with your local file path MODEL_PATH=Qwen/Qwen2.5-VL-3B-Instruct # replace it with your local file path
python3 -m verl.trainer.main \ python3 -m verl.trainer.main \
...@@ -13,7 +9,6 @@ python3 -m verl.trainer.main \ ...@@ -13,7 +9,6 @@ python3 -m verl.trainer.main \
data.format_prompt=./examples/format_prompt/r1v_format.jinja \ data.format_prompt=./examples/format_prompt/r1v_format.jinja \
worker.actor.model.model_path=${MODEL_PATH} \ worker.actor.model.model_path=${MODEL_PATH} \
worker.rollout.tensor_parallel_size=1 \ worker.rollout.tensor_parallel_size=1 \
worker.reward.reward_type=sequential \ worker.reward.score_function=./examples/score_function/r1v.py:compute_score \
worker.reward.reward_function=./examples/reward_function/r1v.py:compute_score \
trainer.experiment_name=qwen2_5_vl_3b_clevr \ trainer.experiment_name=qwen2_5_vl_3b_clevr \
trainer.n_gpus_per_node=2 trainer.n_gpus_per_node=2
#!/bin/bash
set -x set -x
export PYTHONUNBUFFERED=1
MODEL_PATH=Qwen/Qwen2.5-VL-3B-Instruct # replace it with your local file path MODEL_PATH=Qwen/Qwen2.5-VL-3B-Instruct # replace it with your local file path
python3 -m verl.trainer.main \ python3 -m verl.trainer.main \
...@@ -13,7 +9,6 @@ python3 -m verl.trainer.main \ ...@@ -13,7 +9,6 @@ python3 -m verl.trainer.main \
data.format_prompt=./examples/format_prompt/r1v_format.jinja \ data.format_prompt=./examples/format_prompt/r1v_format.jinja \
worker.actor.model.model_path=${MODEL_PATH} \ worker.actor.model.model_path=${MODEL_PATH} \
worker.rollout.tensor_parallel_size=1 \ worker.rollout.tensor_parallel_size=1 \
worker.reward.reward_type=sequential \ worker.reward.score_function=./examples/score_function/r1v.py:compute_score \
worker.reward.reward_function=./examples/reward_function/r1v.py:compute_score \
trainer.experiment_name=qwen2_5_vl_3b_geoqa8k \ trainer.experiment_name=qwen2_5_vl_3b_geoqa8k \
trainer.n_gpus_per_node=8 trainer.n_gpus_per_node=8
...@@ -7,9 +7,8 @@ data: ...@@ -7,9 +7,8 @@ data:
max_prompt_length: 2048 max_prompt_length: 2048
max_response_length: 2048 max_response_length: 2048
rollout_batch_size: 512 rollout_batch_size: 512
val_batch_size: 1024 val_batch_size: -1
format_prompt: ./examples/format_prompt/math_format.jinja format_prompt: ./examples/format_prompt/math_format.jinja
override_chat_template: null
shuffle: true shuffle: true
seed: 1 seed: 1
max_pixels: 4194304 max_pixels: 4194304
...@@ -71,17 +70,16 @@ worker: ...@@ -71,17 +70,16 @@ worker:
offload_params: false offload_params: false
reward: reward:
reward_type: batch reward_type: function
reward_function: ./examples/reward_function/math.py:compute_score score_function: ./examples/score_function/math.py:compute_score
trainer: trainer:
total_epochs: 15 total_episodes: 15
max_steps: null logger: ["console", "wandb"]
project_name: easy_r1 project_name: easy_r1
experiment_name: qwen2_5_7b_math_grpo experiment_name: qwen2_5_7b_math_grpo
logger: ["console", "wandb"]
nnodes: 1
n_gpus_per_node: 8 n_gpus_per_node: 8
nnodes: 1
val_freq: 5 # -1 to disable val_freq: 5 # -1 to disable
val_before_train: true val_before_train: true
val_only: false val_only: false
......
#!/bin/bash
set -x set -x
export PYTHONUNBUFFERED=1
MODEL_PATH=Qwen/Qwen2.5-7B-Instruct # replace it with your local file path MODEL_PATH=Qwen/Qwen2.5-7B-Instruct # replace it with your local file path
python3 -m verl.trainer.main \ python3 -m verl.trainer.main \
config=examples/config.yaml \ config=examples/config.yaml \
worker.actor.model.model_path=${MODEL_PATH} data.train_files=hiyouga/math12k@train \
data.val_files=hiyouga/math12k@test \
worker.actor.model.model_path=${MODEL_PATH} \
trainer.experiment_name=qwen2_5_7b_math_grpo \
trainer.n_gpus_per_node=8
#!/bin/bash
set -x set -x
export PYTHONUNBUFFERED=1
MODEL_PATH=Qwen/Qwen2.5-VL-32B-Instruct # replace it with your local file path MODEL_PATH=Qwen/Qwen2.5-VL-32B-Instruct # replace it with your local file path
python3 -m verl.trainer.main \ python3 -m verl.trainer.main \
......
#!/bin/bash
set -x set -x
export PYTHONUNBUFFERED=1
MODEL_PATH=Qwen/Qwen2.5-VL-3B-Instruct # replace it with your local file path MODEL_PATH=Qwen/Qwen2.5-VL-3B-Instruct # replace it with your local file path
python3 -m verl.trainer.main \ python3 -m verl.trainer.main \
......
#!/bin/bash
set -x set -x
export PYTHONUNBUFFERED=1
MODEL_PATH=Qwen/Qwen2.5-VL-7B-Instruct # replace it with your local file path MODEL_PATH=Qwen/Qwen2.5-VL-7B-Instruct # replace it with your local file path
python3 -m verl.trainer.main \ python3 -m verl.trainer.main \
......
#!/bin/bash
set -x set -x
export PYTHONUNBUFFERED=1
MODEL_PATH=Qwen/Qwen2.5-VL-7B-Instruct # replace it with your local file path MODEL_PATH=Qwen/Qwen2.5-VL-7B-Instruct # replace it with your local file path
python3 -m verl.trainer.main \ python3 -m verl.trainer.main \
......
#!/bin/bash
set -x set -x
export PYTHONUNBUFFERED=1
MODEL_PATH=Qwen/Qwen2.5-VL-7B-Instruct # replace it with your local file path MODEL_PATH=Qwen/Qwen2.5-VL-7B-Instruct # replace it with your local file path
python3 -m verl.trainer.main \ python3 -m verl.trainer.main \
......
#!/bin/bash
# REMINDER: this script uses test data split and should ONLY be used for debugging. DO NOT use for training. # REMINDER: this script uses test data split and should ONLY be used for debugging. DO NOT use for training.
set -x set -x
export PYTHONUNBUFFERED=1
MODEL_PATH=Qwen/Qwen2.5-VL-7B-Instruct # replace it with your local file path MODEL_PATH=Qwen/Qwen2.5-VL-7B-Instruct # replace it with your local file path
python3 -m verl.trainer.main \ python3 -m verl.trainer.main \
......
#!/bin/bash
set -x
export PYTHONUNBUFFERED=1
MODEL_PATH=Qwen/Qwen3-4B # replace it with your local file path
python3 -m verl.trainer.main \
config=examples/config.yaml \
data.max_response_length=4096 \
worker.actor.model.model_path=${MODEL_PATH} \
trainer.experiment_name=qwen3_4b_math_grpo
working_dir: ./ working_dir: ./
excludes: ["/.git/"] excludes: ["/.git/"]
env_vars: env_vars:
TOKENIZERS_PARALLELISM: "true" TOKENIZERS_PARALLELISM: true
NCCL_DEBUG: "WARN" NCCL_DEBUG: "WARN"
VLLM_LOGGING_LEVEL: "WARN" VLLM_LOGGING_LEVEL: "INFO"
TORCH_NCCL_AVOID_RECORD_STREAMS: "1" TORCH_NCCL_AVOID_RECORD_STREAMS: "1"
PYTORCH_CUDA_ALLOC_CONF: "expandable_segments:False" PYTORCH_CUDA_ALLOC_CONF: "expandable_segments:False"
PYTHONUNBUFFERED: "1"
...@@ -13,34 +13,28 @@ ...@@ -13,34 +13,28 @@
# limitations under the License. # limitations under the License.
import re import re
from typing import Dict, List from typing import Dict
from mathruler.grader import extract_boxed_content, grade_answer from mathruler.grader import extract_boxed_content, grade_answer
def format_reward(predict: str) -> float: def format_reward(predict_str: str) -> float:
pattern = re.compile(r"<think>.*</think>.*\\boxed\{.*\}.*", re.DOTALL) pattern = re.compile(r"<think>.*</think>.*\\boxed\{.*\}.*", re.DOTALL)
format_match = re.fullmatch(pattern, predict) format_match = re.fullmatch(pattern, predict_str)
return 1.0 if format_match else 0.0 return 1.0 if format_match else 0.0
def accuracy_reward(predict: str, ground_truth: str) -> float: def accuracy_reward(predict_str: str, ground_truth: str) -> float:
answer = extract_boxed_content(predict) answer = extract_boxed_content(predict_str)
return 1.0 if grade_answer(answer, ground_truth) else 0.0 return 1.0 if grade_answer(answer, ground_truth) else 0.0
def compute_score(predicts: List[str], ground_truths: List[str], format_weight: float = 0.1) -> List[Dict[str, float]]: def compute_score(predict_str: str, ground_truth: str, format_weight: float = 0.1) -> Dict[str, float]:
scores = [] predict_str = re.sub(r"\s*(<|>|/)\s*", r"\1", predict_str) # handle qwen2.5vl-32b format
for predict, ground_truth in zip(predicts, ground_truths): format_score = format_reward(predict_str)
predict = re.sub(r"\s*(<|>|/)\s*", r"\1", predict) # handle qwen2.5vl-32b format accuracy_score = accuracy_reward(predict_str, ground_truth)
format_score = format_reward(predict) return {
accuracy_score = accuracy_reward(predict, ground_truth) "overall": (1 - format_weight) * accuracy_score + format_weight * format_score,
scores.append( "format": format_score,
{ "accuracy": accuracy_score,
"overall": (1 - format_weight) * accuracy_score + format_weight * format_score, }
"format": format_score,
"accuracy": accuracy_score,
}
)
return scores
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment