Commit c132cbcb authored by chenych's avatar chenych
Browse files

0402 update

parent f92481f0
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v5.0.0
hooks:
- id: check-ast
- id: check-added-large-files
args: ['--maxkb=25000']
- id: check-merge-conflict
- id: check-yaml
- id: debug-statements
- id: end-of-file-fixer
- id: requirements-txt-fixer
- id: trailing-whitespace
args: [--markdown-linebreak-ext=md]
- id: no-commit-to-branch
args: ['--branch', 'main']
- repo: https://github.com/asottile/pyupgrade
rev: v3.17.0
hooks:
- id: pyupgrade
args: [--py38-plus]
# Start from the NVIDIA official image (ubuntu-22.04 + python-3.10)
# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-24-08.html
FROM nvcr.io/nvidia/pytorch:24.08-py3
# Define environments
ENV MAX_JOBS=32
ENV VLLM_WORKER_MULTIPROC_METHOD=spawn
ENV DEBIAN_FRONTEND=noninteractive
ENV NODE_OPTIONS=""
ENV HF_HUB_ENABLE_HF_TRANSFER="1"
# Define installation arguments
ARG APT_SOURCE=https://mirrors.tuna.tsinghua.edu.cn/ubuntu/
ARG PIP_INDEX=https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
ARG VLLM_COMMIT=227578480d71fc94ef46ca77fb69496412158d68
# Set apt source
RUN cp /etc/apt/sources.list /etc/apt/sources.list.bak && \
{ \
echo "deb ${APT_SOURCE} jammy main restricted universe multiverse"; \
echo "deb ${APT_SOURCE} jammy-updates main restricted universe multiverse"; \
echo "deb ${APT_SOURCE} jammy-backports main restricted universe multiverse"; \
echo "deb ${APT_SOURCE} jammy-security main restricted universe multiverse"; \
} > /etc/apt/sources.list
# Install systemctl
RUN apt-get update && \
apt-get install -y -o Dpkg::Options::="--force-confdef" systemd && \
apt-get clean
# Install tini
RUN apt-get update && \
apt-get install -y tini && \
apt-get clean
# Change pip source
RUN pip config set global.index-url "${PIP_INDEX}" && \
pip config set global.extra-index-url "${PIP_INDEX}" && \
python -m pip install --upgrade pip
# Uninstall nv-pytorch fork
RUN pip uninstall -y torch torchvision torchaudio \
pytorch-quantization pytorch-triton torch-tensorrt \
xgboost transformer_engine flash_attn apex megatron-core
# Install vllm-0.7.4-nightly
RUN pip install --no-cache-dir vllm --pre --extra-index-url "https://wheels.vllm.ai/${VLLM_COMMIT}" && \
git clone -b verl_v1 https://github.com/hiyouga/vllm.git && \
cp -r vllm/vllm/ /usr/local/lib/python3.10/dist-packages/
# Install torch-2.5.1
RUN pip install --no-cache-dir torch==2.5.1 torchvision==0.20.1 torchaudio==2.5.1 tensordict torchdata \
transformers>=4.49.0 accelerate datasets peft hf-transfer \
ray[default] codetiming hydra-core pandas pyarrow>=15.0.0 pylatexenc qwen-vl-utils wandb liger-kernel mathruler \
pytest yapf py-spy pyext pre-commit ruff
# Install flash_attn-2.7.4.post1
RUN wget -nv https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.5cxx11abiFALSE-cp310-cp310-linux_x86_64.whl && \
pip install --no-cache-dir flash_attn-2.7.4.post1+cu12torch2.5cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
# Fix cv2
RUN pip uninstall -y pynvml nvidia-ml-py && \
pip install --no-cache-dir nvidia-ml-py>=12.560.30 opencv-python-headless==4.8.0.74 fastapi==0.115.6 && \
pip install --no-cache-dir --upgrade optree>=0.13.0
# Reset pip config
RUN pip config unset global.index-url && \
pip config unset global.extra-index-url
# Start from the NVIDIA official image (ubuntu-22.04 + python-3.10)
# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-24-08.html
FROM nvcr.io/nvidia/pytorch:24.08-py3
# Define environments
ENV MAX_JOBS=32
ENV VLLM_WORKER_MULTIPROC_METHOD=spawn
ENV DEBIAN_FRONTEND=noninteractive
ENV NODE_OPTIONS=""
ENV HF_HUB_ENABLE_HF_TRANSFER="1"
# Define installation arguments
ARG APT_SOURCE=https://mirrors.tuna.tsinghua.edu.cn/ubuntu/
ARG PIP_INDEX=https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
# Set apt source
RUN cp /etc/apt/sources.list /etc/apt/sources.list.bak && \
{ \
echo "deb ${APT_SOURCE} jammy main restricted universe multiverse"; \
echo "deb ${APT_SOURCE} jammy-updates main restricted universe multiverse"; \
echo "deb ${APT_SOURCE} jammy-backports main restricted universe multiverse"; \
echo "deb ${APT_SOURCE} jammy-security main restricted universe multiverse"; \
} > /etc/apt/sources.list
# Install systemctl
RUN apt-get update && \
apt-get install -y -o Dpkg::Options::="--force-confdef" systemd && \
apt-get clean
# Install tini
RUN apt-get update && \
apt-get install -y tini && \
apt-get clean
# Change pip source
RUN pip config set global.index-url "${PIP_INDEX}" && \
pip config set global.extra-index-url "${PIP_INDEX}" && \
python -m pip install --upgrade pip
# Uninstall nv-pytorch fork
RUN pip uninstall -y torch torchvision torchaudio \
pytorch-quantization pytorch-triton torch-tensorrt \
xgboost transformer_engine flash_attn apex megatron-core
# Install torch-2.6.0 + vllm-0.8.2
RUN pip install --no-cache-dir vllm==0.8.2 torch==2.6.0 torchvision==0.21.0 torchaudio==2.6.0 tensordict torchdata \
transformers>=4.49.0 accelerate datasets peft hf-transfer \
ray[default] codetiming hydra-core pandas pyarrow>=15.0.0 pylatexenc qwen-vl-utils wandb liger-kernel mathruler \
pytest yapf py-spy pyext pre-commit ruff
# Install flash_attn-2.7.4.post1
RUN wget -nv https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp310-cp310-linux_x86_64.whl && \
pip install --no-cache-dir flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
# Fix cv2
RUN pip uninstall -y pynvml nvidia-ml-py && \
pip install --no-cache-dir nvidia-ml-py>=12.560.30 opencv-python-headless==4.8.0.74 fastapi==0.115.6 && \
pip install --no-cache-dir --upgrade optree>=0.13.0
# Reset pip config
RUN pip config unset global.index-url && \
pip config unset global.extra-index-url
......@@ -13,7 +13,9 @@ EasyR1基于 **[HybirdEngine](https://arxiv.org/abs/2409.19256)** 和最新发
- 支持的算法
- GRPO
- others RL algorithms (comming soon)
- Reinforce++
- ReMax
- RLOO
- 支持的数据集
- Any text, vision-text dataset in a [specific format](#custom-dataset).
......@@ -22,40 +24,93 @@ EasyR1基于 **[HybirdEngine](https://arxiv.org/abs/2409.19256)** 和最新发
### 软件依赖
- Python 3.9+
- Python 3.10+
- transformers>=4.49.0
- flash-attn>=2.4.3
- flash-attn==2.6.1+das.opt4.dtk2504
- vllm>=0.7.3
We provide a [Dockerfile](./Dockerfile) to easily build environments.
### 硬件依赖
\* *估算值*
| 方法 | 精度 | 1.5B | 3B | 7B |
| ------------------------ | ---- | ------ | ------ | ------ |
| GRPO Full Fine-Tuning | AMP | 2*24GB | 4*40GB | 8*40GB |
| Method | Bits | 1.5B | 3B | 7B | 32B |
| ------------------------ | ---- | ------ | ------ | ------ | ------- |
| GRPO Full Fine-Tuning | AMP | 2*24GB | 4*40GB | 8*40GB | 16*80GB |
| GRPO Full Fine-Tuning | BF16 | 1*24GB | 1*40GB | 4*40GB | 8*80GB |
> [!NOTE]
> 我们正在努力减少RL训练中的VRAM, LoRA支持将在下一次更新中集成。
> 使用 `worker.actor.fsdp.torch_dtype=bf16` 和 `worker.actor.optim.strategy=adamw_bf16`参数确保使用 bf16 类型训练。
>
> 我们正在努力减少RL训练中的VRAM,LoRA支持将在下一次更新中集成。
## 教程: 只需三步, 在 [Geometry3K](https://huggingface.co/datasets/hiyouga/geometry3k) 数据集上基于GRPO算法训练Qwen2.5-VL。
## 教程: 只需三步,在 [Geometry3K](https://huggingface.co/datasets/hiyouga/geometry3k) 数据集上基于GRPO算法训练Qwen2.5-VL。
![image](assets/qwen2_5_vl_7b_geo.png)
### 如何使用
### 环境准备
-v 路径、docker_name和imageID根据实际情况修改
#### Docker(方法一)
基于光源pytorch2.4.1+dtk25.04基础镜像环境:镜像下载地址:[https://sourcefind.cn/#/image/dcu/pytorch](https://sourcefind.cn/#/image/dcu/pytorch),根据pytorch2.4.1、python、dtk及系统下载对应的镜像版本。
```bash
docker pull image.sourcefind.cn:5000/dcu/admin/base/pytorch:2.4.1-ubuntu22.04-dtk25.04-py3.10
docker run -it --shm-size 200g --network=host --name docker_name --privileged --device=/dev/kfd --device=/dev/dri --device=/dev/mkfd --group-add video --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -u root -v /path/your_code_data/:/path/your_code_data/ -v /opt/hyhal/:/opt/hyhal/:ro imageID bash
## 安装所需环境包
cd EasyR1
pip install vllm-0.8.2+das.opt1.fe6d3b0.dtk2504-cp310-cp310-linux_x86_64.whl
pip install -r requirements.txt --no-deps
## 注释掉accelerate、liger-kernel、tensordict之后再执行以下步骤
pip install -r requirements.txt
# 编译
pip install -e .
```
#### Dockerfile(方法二)
```bash
cd docker
docker build --no-cache -t llama-factory:latest .
docker run -it --shm-size 200g --network=host --name docker_name --privileged --device=/dev/kfd --device=/dev/dri --device=/dev/mkfd --group-add video --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -u root -v /path/your_code_data/:/path/your_code_data/ -v /opt/hyhal/:/opt/hyhal/:ro imageID bash
## 安装所需环境包
cd EasyR1
pip install vllm-0.8.2+das.opt1.fe6d3b0.dtk2504-cp310-cp310-linux_x86_64.whl
pip install -r requirements.txt --no-deps
## 注释掉accelerate、liger-kernel、tensordict之后再执行以下步骤
pip install -r requirements.txt
# 编译
pip install -e .
```
#### Anaconda(方法三)
关于本项目DCU显卡所需的特殊深度学习库可从[光合](https://developer.hpccube.com/tool/)开发者社区下载安装。
```bash
DTK驱动: dtk25.04
python: 3.10
torch: 2.4.1
deepspeed: 0.14.2+das.opt2.dtk2504
flash-attn: 2.6.1+das.opt4.dtk2504
```
`Tips:以上dtk驱动、python、torch等DCU相关工具版本需要严格一一对应`
```bash
git clone https://github.com/hiyouga/EasyR1.git
cd EasyR1
pip install vllm-0.8.2+das.opt1.fe6d3b0.dtk2504-cp310-cp310-linux_x86_64.whl
pip install -r requirements.txt --no-deps
## 注释掉accelerate、liger-kernel、tensordict之后再执行以下步骤
pip install -r requirements.txt
# 编译
pip install -e .
```
### GRPO 训练
```bash
bash examples/run_qwen2_5_vl_7b_geo.sh
bash examples/qwen2_5_7b_math_grpo.sh
```
### 基于Hugging Face Format融合Checkpoint
......@@ -65,9 +120,8 @@ python3 scripts/model_merger.py --local_dir path_to_your_last_actor_checkpoint
```
> [!NOTE]
>如果您在连接“Hugging Face”时遇到问题,请考虑使用 `export HF_ENDPOINT=https://hf-mirror.com`.
>
> 如果您想使用SwanLab日志记录器,请考虑使用 `bash examples/run_qwen2_5_vl_7b_geo_swanlab.sh`.
> 如果您想使用SwanLab日志记录器,请考虑使用 `bash examples/qwen2_5_vl_7b_geo3k_swanlab.sh`.
## 自定义数据集
......@@ -88,5 +142,3 @@ python3 scripts/model_merger.py --local_dir path_to_your_last_actor_checkpoint
这些功能目前暂时禁用,我们计划在未来的更新中逐一修复。
- 视觉语言模型目前不兼容 padding-free 训练和 DeepSpeed Ulysses并行方法。
- 视觉语言模型目前不兼容 `enable_chunked_prefill` 除非 [vLLM v1](https://blog.vllm.ai/2025/01/27/v1-alpha-release.html) 已支持。
# EasyR1: An Efficient, Scalable, Multi-Modality RL Training Framework
[![GitHub Repo stars](https://img.shields.io/github/stars/hiyouga/EasyR1)](https://github.com/hiyouga/EasyR1/stargazers)
[![Twitter](https://img.shields.io/twitter/follow/llamafactory_ai)](https://twitter.com/llamafactory_ai)
This project is a clean fork of the original [veRL](https://github.com/volcengine/verl) project to support vision language models, we thank all the authors for providing such a high-performance RL training framework.
EasyR1 is efficient and scalable due to the design of **[HybirdEngine](https://arxiv.org/abs/2409.19256)** and the latest release of **[vLLM](https://github.com/vllm-project/vllm)**'s SPMD mode.
......@@ -7,16 +10,23 @@ EasyR1 is efficient and scalable due to the design of **[HybirdEngine](https://a
## Features
- Supported models
- Qwen2/Qwen2.5 language models
- Llama3/Qwen2/Qwen2.5 language models
- Qwen2/Qwen2.5-VL vision language models
- DeepSeek-R1 distill models
- Supported algorithms
- GRPO
- others RL algorithms (comming soon)
- Reinforce++
- ReMax
- RLOO
- Supported datasets
- Any text, vision-text dataset in a [specific format](#custom-dataset).
- Any text, vision-text dataset in a [specific format](#custom-dataset)
- Supported tricks
- Padding-free training
- Resuming from checkpoint
- Wandb & SwanLab & Mlflow & Tensorboard tracking
## Requirements
......@@ -29,15 +39,27 @@ EasyR1 is efficient and scalable due to the design of **[HybirdEngine](https://a
We provide a [Dockerfile](./Dockerfile) to easily build environments.
We recommend using the [pre-built docker image](https://hub.docker.com/r/hiyouga/verl) in EasyR1.
```bash
# stable
docker pull hiyouga/verl:ngc-th2.5.1-cu120-vllm0.7.4-hotfix
# nightly
docker pull hiyouga/verl:ngc-th2.6.0-cu120-vllm0.8.2
```
### Hardware Requirements
\* *estimated*
| Method | Bits | 1.5B | 3B | 7B |
| ------------------------ | ---- | ------ | ------ | ------ |
| GRPO Full Fine-Tuning | AMP | 2*24GB | 4*40GB | 8*40GB |
| Method | Bits | 1.5B | 3B | 7B | 32B |
| ------------------------ | ---- | ------ | ------ | ------ | ------- |
| GRPO Full Fine-Tuning | AMP | 2*24GB | 4*40GB | 8*40GB | 16*80GB |
| GRPO Full Fine-Tuning | BF16 | 1*24GB | 1*40GB | 4*40GB | 8*80GB |
> [!NOTE]
> Use `worker.actor.fsdp.torch_dtype=bf16` and `worker.actor.optim.strategy=adamw_bf16` to enable bf16 training.
>
> We are working hard to reduce the VRAM in RL training, LoRA support will be integrated in next updates.
## Tutorial: Run Qwen2.5-VL GRPO on [Geometry3K](https://huggingface.co/datasets/hiyouga/geometry3k) Dataset in Just 3 Steps
......@@ -55,47 +77,68 @@ pip install -e .
### GRPO Training
```bash
bash examples/run_qwen2_5_vl_7b_geo.sh
bash examples/qwen2_5_vl_7b_geo3k_grpo.sh
```
### Merge Checkpoint in Hugging Face Format
```bash
python3 scripts/model_merger.py --local_dir path_to_your_last_actor_checkpoint
python3 scripts/model_merger.py --local_dir checkpoints/easy_r1/exp_name/global_step_1/actor
```
> [!NOTE]
> [!TIP]
> If you encounter issues with connecting to Hugging Face, consider using `export HF_ENDPOINT=https://hf-mirror.com`.
>
> If you want to use SwanLab logger, consider using `bash examples/run_qwen2_5_vl_7b_geo_swanlab.sh`.
> If you want to use SwanLab logger, consider using `bash examples/qwen2_5_vl_7b_geo3k_swanlab.sh`.
## Custom Dataset
The dataset should strictly follow the example data format.
Please refer to the example datasets to prepare your own dataset.
- Text dataset: https://huggingface.co/datasets/hiyouga/math12k
- Required columns: problem, answer
- Vision-text dataset: https://huggingface.co/datasets/hiyouga/geometry3k
- Required columns: images, problem, answer
> [!TIP]
> EasyR1 already supports multi-image dataset.
## How to Understand GRPO in EasyR1
![image](assets/easyr1_grpo.png)
- To learn about the GRPO algorithm, you can refer to [Hugging Face's blog](https://huggingface.co/docs/trl/v0.15.2/en/grpo_trainer).
## How to Run 70B+ Model in Multi-node Environment
Please see the **[veRL's official doc](https://verl.readthedocs.io/en/latest/start/multinode.html)** for multi-node training and Ray debugger.
## Other Baselines
- [CLEVR-70k-Counting](examples/run_qwen2_5_vl_2b_clevr.sh): Train the Qwen2.5-VL-3B-Instruct model on counting problem.
We also reproduced the following two baselines of the [R1-V](https://github.com/deep-agent/R1-V) project.
- [CLEVR-70k-Counting](examples/baselines/qwen2_5_vl_3b_clevr.sh): Train the Qwen2.5-VL-3B-Instruct model on counting problem.
- [GeoQA-8k](examples/baselines/qwen2_5_vl_3b_geoqa8k.sh): Train the Qwen2.5-VL-3B-Instruct model on GeoQA problem.
## Awesome Work using EasyR1
- **MMR1**: Advancing the Frontiers of Multimodal Reasoning. [![[code]](https://img.shields.io/github/stars/LengSicong/MMR1)](https://github.com/LengSicong/MMR1)
- **Vision-R1**: Incentivizing Reasoning Capability in Multimodal Large Language Models. [![[code]](https://img.shields.io/github/stars/Osilly/Vision-R1)](https://github.com/Osilly/Vision-R1) [![[arxiv]](https://img.shields.io/badge/arxiv-2503.06749-blue)](https://arxiv.org/abs/2503.06749)
- **Seg-Zero**: Reasoning-Chain Guided Segmentation via Cognitive Reinforcement. [![[code]](https://img.shields.io/github/stars/dvlab-research/Seg-Zero)](https://github.com/dvlab-research/Seg-Zero) [![[arxiv]](https://img.shields.io/badge/arxiv-2503.06520-blue)](https://arxiv.org/abs/2503.06520)
- **MetaSpatial**: Reinforcing 3D Spatial Reasoning in VLMs for the Metaverse. [![[code]](https://img.shields.io/github/stars/PzySeere/MetaSpatial)](https://github.com/PzySeere/MetaSpatial) [![[arxiv]](https://img.shields.io/badge/arxiv-2503.18470-blue)](https://arxiv.org/abs/2503.18470)
- **Temporal-R1**: Envolving Temporal Reasoning Capability into LMMs via Temporal Consistent Reward
[![[code]](https://img.shields.io/github/stars/appletea233/Temporal-R1)](https://github.com/appletea233/Temporal-R1)
## TODO
- Support PPO, Reinforce++ and RLOO for VLMs.
- Support padding-free training for VLMs.
- Support ulysses parallelism for VLMs.
- Support LoRA (high priority).
- Support ulysses parallelism for VLMs (middle priority).
- Support more VLM architectures.
> [!NOTE]
> We will not provide scripts for supervised fine-tuning and inference in this project. If you have such requirements, we recommend using [LLaMA-Factory](https://github.com/hiyouga/LLaMA-Factory).
### Known bugs
These features are temporarily disabled for now, we plan to fix them one-by-one in the future updates.
- Vision language models are not compatible with padding-free training and ulysses parallelism yet.
- Vision language models are not compatible with `enable_chunked_prefill` unless [vLLM v1](https://blog.vllm.ai/2025/01/27/v1-alpha-release.html) is supported.
- Vision language models are not compatible with ulysses parallelism yet.
## Discussion Group
......
assets/qwen2_5_vl_7b_geo.png

71.9 KB | W: | H:

assets/qwen2_5_vl_7b_geo.png

81 KB | W: | H:

assets/qwen2_5_vl_7b_geo.png
assets/qwen2_5_vl_7b_geo.png
assets/qwen2_5_vl_7b_geo.png
assets/qwen2_5_vl_7b_geo.png
  • 2-up
  • Swipe
  • Onion skin
assets/wechat.jpg

162 KB | W: | H:

assets/wechat.jpg

157 KB | W: | H:

assets/wechat.jpg
assets/wechat.jpg
assets/wechat.jpg
assets/wechat.jpg
  • 2-up
  • Swipe
  • Onion skin
# Start from the NVIDIA official image (ubuntu-22.04 + python-3.10)
# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-24-08.html
FROM nvcr.io/nvidia/pytorch:24.08-py3
# Define environments
ENV MAX_JOBS=32
ENV VLLM_WORKER_MULTIPROC_METHOD=spawn
ENV DEBIAN_FRONTEND=noninteractive
ENV NODE_OPTIONS=""
# Define installation arguments
ARG APT_SOURCE=https://mirrors.tuna.tsinghua.edu.cn/ubuntu/
ARG PIP_INDEX=https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
# Set apt source
RUN cp /etc/apt/sources.list /etc/apt/sources.list.bak && \
{ \
echo "deb ${APT_SOURCE} jammy main restricted universe multiverse"; \
echo "deb ${APT_SOURCE} jammy-updates main restricted universe multiverse"; \
echo "deb ${APT_SOURCE} jammy-backports main restricted universe multiverse"; \
echo "deb ${APT_SOURCE} jammy-security main restricted universe multiverse"; \
} > /etc/apt/sources.list
# Install systemctl
RUN apt-get update && \
apt-get install -y -o Dpkg::Options::="--force-confdef" systemd && \
apt-get clean
# Install tini
RUN apt-get update && \
apt-get install -y tini && \
apt-get clean
# Change pip source
RUN pip config set global.index-url "${PIP_INDEX}" && \
pip config set global.extra-index-url "${PIP_INDEX}" && \
python -m pip install --upgrade pip
# Install torch-2.5.1 + vllm-0.7.3
RUN pip install --no-cache-dir vllm==0.7.3 torch==2.5.1 torchvision==0.20.1 torchaudio==2.5.1 tensordict \
transformers>=4.49.0 accelerate datasets peft \
ray codetiming hydra-core pandas pyarrow>=15.0.0 pylatexenc qwen-vl-utils
# Install flash_attn-2.7.4.post1
RUN pip uninstall -y transformer-engine flash-attn && \
wget -nv https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.5cxx11abiFALSE-cp310-cp310-linux_x86_64.whl && \
pip install --no-cache-dir flash_attn-2.7.4.post1+cu12torch2.5cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
FROM image.sourcefind.cn:5000/dcu/admin/base/pytorch:2.4.1-ubuntu22.04-dtk25.04-py3.10
\ No newline at end of file
set -x
MODEL_PATH=Qwen/Qwen2.5-VL-3B-Instruct # replace it with your local file path
SYSTEM_PROMPT="""A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant
first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning
process and answer are enclosed within <think> </think> and <answer> </answer> tags, respectively, i.e.,
<think> reasoning process here </think><answer> answer here </answer>"""
python3 -m verl.trainer.main \
config=examples/config.yaml \
data.train_files=BUAADreamer/clevr_count_70k@train \
data.val_files=BUAADreamer/clevr_count_70k@test \
data.system_prompt="${SYSTEM_PROMPT}" \
worker.actor.model.model_path=${MODEL_PATH} \
worker.rollout.tensor_parallel_size=1 \
worker.rollout.enable_chunked_prefill=false \
worker.reward.compute_score=r1v \
trainer.experiment_name=qwen2_5_vl_3b_clevr \
trainer.n_gpus_per_node=2
set -x
MODEL_PATH=Qwen/Qwen2.5-VL-3B-Instruct # replace it with your local file path
SYSTEM_PROMPT="""A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant
first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning
process and answer are enclosed within <think> </think> and <answer> </answer> tags, respectively, i.e.,
<think> reasoning process here </think><answer> answer here </answer>"""
python3 -m verl.trainer.main \
config=examples/config.yaml \
data.train_files=leonardPKU/GEOQA_8K_R1V@train \
data.val_files=leonardPKU/GEOQA_8K_R1V@test \
data.system_prompt="${SYSTEM_PROMPT}" \
worker.actor.model.model_path=${MODEL_PATH} \
worker.rollout.tensor_parallel_size=1 \
worker.rollout.enable_chunked_prefill=false \
worker.reward.compute_score=r1v \
trainer.experiment_name=qwen2_5_vl_3b_geoqa8k \
trainer.n_gpus_per_node=8
data:
train_files: hiyouga/math12k@train
val_files: hiyouga/math12k@test
prompt_key: problem
answer_key: answer
image_key: images
max_prompt_length: 2048
max_response_length: 2048
rollout_batch_size: 512
val_batch_size: -1
shuffle: true
seed: 1
max_pixels: 4194304
min_pixels: 262144
algorithm:
adv_estimator: grpo
disable_kl: false
use_kl_loss: true
kl_penalty: low_var_kl
kl_coef: 1.0e-2
worker:
actor:
global_batch_size: 128
micro_batch_size_per_device_for_update: 4
micro_batch_size_per_device_for_experience: 16
max_grad_norm: 1.0
padding_free: true
ulysses_sequence_parallel_size: 1
model:
model_path: Qwen/Qwen2.5-7B-Instruct
enable_gradient_checkpointing: true
trust_remote_code: false
freeze_vision_tower: false
optim:
lr: 1.0e-6
weight_decay: 1.0e-2
strategy: adamw # {adamw, adamw_bf16}
lr_warmup_ratio: 0.0
fsdp:
enable_full_shard: true
enable_cpu_offload: false
enable_rank0_init: true
offload:
offload_params: true # true: more CPU memory; false: more GPU memory
offload_optimizer: true # true: more CPU memory; false: more GPU memory
rollout:
temperature: 1.0
n: 5
gpu_memory_utilization: 0.6
enforce_eager: false
enable_chunked_prefill: false
tensor_parallel_size: 2
limit_images: 0
val_override_config:
temperature: 0.5
n: 1
ref:
fsdp:
enable_full_shard: true
enable_cpu_offload: true # true: more CPU memory; false: more GPU memory
enable_rank0_init: true
offload:
offload_params: false
reward:
reward_type: function
compute_score: math
trainer:
total_episodes: 15
logger: ["console", "wandb"]
project_name: easy_r1
experiment_name: qwen2_5_7b_math_grpo
n_gpus_per_node: 8
nnodes: 1
val_freq: 5 # -1 to disable
val_before_train: true
val_only: false
val_generations_to_log: 1
save_freq: 5 # -1 to disable
save_limit: 3 # -1 to disable
save_checkpoint_path: null
load_checkpoint_path: null
set -x
MODEL_PATH=Qwen/Qwen2.5-7B-Instruct # replace it with your local file path
SYSTEM_PROMPT="""You FIRST think about the reasoning process as an internal monologue and then provide the final answer.
The reasoning process MUST BE enclosed within <think> </think> tags. The final answer MUST BE put in \boxed{}."""
python3 -m verl.trainer.main \
config=examples/config.yaml \
data.train_files=hiyouga/math12k@train \
data.val_files=hiyouga/math12k@test \
data.system_prompt="${SYSTEM_PROMPT}" \
worker.actor.model.model_path=${MODEL_PATH} \
trainer.experiment_name=qwen2_5_7b_math_grpo \
trainer.n_gpus_per_node=8
set -x
MODEL_PATH=Qwen/Qwen2.5-VL-32B-Instruct # replace it with your local file path
SYSTEM_PROMPT="""You FIRST think about the reasoning process as an internal monologue and then provide the final answer.
The reasoning process MUST BE enclosed within <think> </think> tags. The final answer MUST BE put in \boxed{}."""
python3 -m verl.trainer.main \
config=examples/config.yaml \
data.train_files=hiyouga/geometry3k@train \
data.val_files=hiyouga/geometry3k@test \
data.system_prompt="${SYSTEM_PROMPT}" \
worker.actor.model.model_path=${MODEL_PATH} \
worker.actor.micro_batch_size_per_device_for_update=1 \
worker.actor.micro_batch_size_per_device_for_experience=8 \
worker.actor.fsdp.torch_dtype=bf16 \
worker.actor.optim.strategy=adamw_bf16 \
worker.rollout.tensor_parallel_size=8 \
worker.rollout.enable_chunked_prefill=false \
trainer.experiment_name=qwen2_5_vl_32b_geo_grpo \
trainer.n_gpus_per_node=8
set -x
MODEL_PATH=Qwen/Qwen2.5-VL-3B-Instruct # replace it with your local file path
SYSTEM_PROMPT="""You FIRST think about the reasoning process as an internal monologue and then provide the final answer.
The reasoning process MUST BE enclosed within <think> </think> tags. The final answer MUST BE put in \boxed{}."""
python3 -m verl.trainer.main \
config=examples/config.yaml \
data.train_files=hiyouga/geometry3k@train \
data.val_files=hiyouga/geometry3k@test \
data.system_prompt="${SYSTEM_PROMPT}" \
worker.actor.model.model_path=${MODEL_PATH} \
worker.rollout.tensor_parallel_size=1 \
worker.rollout.enable_chunked_prefill=false \
trainer.experiment_name=qwen2_5_vl_3b_geo_grpo \
trainer.n_gpus_per_node=2
set -x
MODEL_PATH=Qwen/Qwen2.5-VL-7B-Instruct # replace it with your local file path
SYSTEM_PROMPT="""You FIRST think about the reasoning process as an internal monologue and then provide the final answer.
The reasoning process MUST BE enclosed within <think> </think> tags. The final answer MUST BE put in \boxed{}."""
python3 -m verl.trainer.main \
config=examples/config.yaml \
data.train_files=hiyouga/geometry3k@train \
data.val_files=hiyouga/geometry3k@test \
data.system_prompt="${SYSTEM_PROMPT}" \
worker.actor.model.model_path=${MODEL_PATH} \
worker.rollout.enable_chunked_prefill=false \
trainer.experiment_name=qwen2_5_vl_7b_geo_grpo \
trainer.n_gpus_per_node=8
set -x
MODEL_PATH=Qwen/Qwen2.5-VL-7B-Instruct # replace it with your local file path
SYSTEM_PROMPT="""You FIRST think about the reasoning process as an internal monologue and then provide the final answer.
The reasoning process MUST BE enclosed within <think> </think> tags. The final answer MUST BE put in \boxed{}."""
python3 -m verl.trainer.main \
config=examples/config.yaml \
data.train_files=hiyouga/geometry3k@train \
data.val_files=hiyouga/geometry3k@test \
data.system_prompt="${SYSTEM_PROMPT}" \
worker.actor.model.model_path=${MODEL_PATH} \
worker.rollout.enable_chunked_prefill=false \
algorithm.adv_estimator=reinforce_plus_plus \
trainer.experiment_name=qwen2_5_vl_7b_geo_reinforce_pp \
trainer.n_gpus_per_node=8
set -x
MODEL_PATH=Qwen/Qwen2.5-VL-7B-Instruct # replace it with your local file path
SYSTEM_PROMPT="""You FIRST think about the reasoning process as an internal monologue and then provide the final answer.
The reasoning process MUST BE enclosed within <think> </think> tags. The final answer MUST BE put in \boxed{}."""
python3 -m verl.trainer.main \
config=examples/config.yaml \
data.train_files=hiyouga/geometry3k@train \
data.val_files=hiyouga/geometry3k@test \
data.system_prompt="${SYSTEM_PROMPT}" \
worker.actor.model.model_path=${MODEL_PATH} \
worker.rollout.enable_chunked_prefill=false \
trainer.experiment_name=qwen2_5_vl_7b_geo_grpo \
trainer.logger=['console','swanlab'] \
trainer.n_gpus_per_node=8
......@@ -2,4 +2,3 @@ working_dir: ./
excludes: ["/.git/"]
env_vars:
TORCH_NCCL_AVOID_RECORD_STREAMS: "1"
VLLM_ATTENTION_BACKEND: "XFORMERS"
......@@ -4,17 +4,25 @@ build-backend = "setuptools.build_meta"
[project]
name = "verl"
dynamic = ["version", "dependencies", "optional-dependencies", "readme", "license"]
requires-python = ">=3.8"
dynamic = [
"version",
"dependencies",
"optional-dependencies",
"requires-python",
"authors",
"description",
"readme",
"license"
]
[tool.ruff]
target-version = "py38"
target-version = "py39"
line-length = 119
indent-width = 4
[tool.ruff.lint]
ignore = ["C901", "E501", "E741", "W605", "C408"]
select = ["C", "E", "F", "I", "W"]
select = ["C", "E", "F", "I", "W", "RUF022"]
[tool.ruff.lint.per-file-ignores]
"__init__.py" = ["E402", "F401", "F403", "F811"]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment