0402 update

c132cbcb · chenych · f92481f0 · c132cbcb · c132cbcb · c132cbcb
Commit c132cbcb authored Apr 02, 2025 by chenych
20 changed files
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
+repos:
+-   repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v5.0.0
+    hooks:
+    -   id: check-ast
+    -   id: check-added-large-files
+        args: ['--maxkb=25000']
+    -   id: check-merge-conflict
+    -   id: check-yaml
+    -   id: debug-statements
+    -   id: end-of-file-fixer
+    -   id: requirements-txt-fixer
+    -   id: trailing-whitespace
+        args: [--markdown-linebreak-ext=md]
+    -   id: no-commit-to-branch
+        args: ['--branch', 'main']
+
+-   repo: https://github.com/asottile/pyupgrade
+    rev: v3.17.0
+    hooks:
+    -   id: pyupgrade
+        args: [--py38-plus]
--- a/Dockerfile
+++ b/Dockerfile
+# Start from the NVIDIA official image (ubuntu-22.04 + python-3.10)
+# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-24-08.html
+FROM nvcr.io/nvidia/pytorch:24.08-py3
+
+# Define environments
+ENV MAX_JOBS=32
+ENV VLLM_WORKER_MULTIPROC_METHOD=spawn
+ENV DEBIAN_FRONTEND=noninteractive
+ENV NODE_OPTIONS=""
+ENV HF_HUB_ENABLE_HF_TRANSFER="1"
+
+# Define installation arguments
+ARG APT_SOURCE=https://mirrors.tuna.tsinghua.edu.cn/ubuntu/
+ARG PIP_INDEX=https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
+ARG VLLM_COMMIT=227578480d71fc94ef46ca77fb69496412158d68
+
+# Set apt source
+RUN cp /etc/apt/sources.list /etc/apt/sources.list.bak && \
+    { \
+    echo "deb ${APT_SOURCE} jammy main restricted universe multiverse"; \
+    echo "deb ${APT_SOURCE} jammy-updates main restricted universe multiverse"; \
+    echo "deb ${APT_SOURCE} jammy-backports main restricted universe multiverse"; \
+    echo "deb ${APT_SOURCE} jammy-security main restricted universe multiverse"; \
+    } > /etc/apt/sources.list
+
+# Install systemctl
+RUN apt-get update && \
+    apt-get install -y -o Dpkg::Options::="--force-confdef" systemd && \
+    apt-get clean
+
+# Install tini
+RUN apt-get update && \
+    apt-get install -y tini && \
+    apt-get clean
+
+# Change pip source
+RUN pip config set global.index-url "${PIP_INDEX}" && \
+    pip config set global.extra-index-url "${PIP_INDEX}" && \
+    python -m pip install --upgrade pip
+
+# Uninstall nv-pytorch fork
+RUN pip uninstall -y torch torchvision torchaudio \
+    pytorch-quantization pytorch-triton torch-tensorrt \
+    xgboost transformer_engine flash_attn apex megatron-core
+
+# Install vllm-0.7.4-nightly
+RUN pip install --no-cache-dir vllm --pre --extra-index-url "https://wheels.vllm.ai/${VLLM_COMMIT}" && \
+    git clone -b verl_v1 https://github.com/hiyouga/vllm.git && \
+    cp -r vllm/vllm/ /usr/local/lib/python3.10/dist-packages/
+
+# Install torch-2.5.1
+RUN pip install --no-cache-dir torch==2.5.1 torchvision==0.20.1 torchaudio==2.5.1 tensordict torchdata \
+    transformers>=4.49.0 accelerate datasets peft hf-transfer \
+    ray[default] codetiming hydra-core pandas pyarrow>=15.0.0 pylatexenc qwen-vl-utils wandb liger-kernel mathruler \
+    pytest yapf py-spy pyext pre-commit ruff
+
+# Install flash_attn-2.7.4.post1
+RUN wget -nv https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.5cxx11abiFALSE-cp310-cp310-linux_x86_64.whl && \
+    pip install --no-cache-dir flash_attn-2.7.4.post1+cu12torch2.5cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
+
+# Fix cv2
+RUN pip uninstall -y pynvml nvidia-ml-py && \
+    pip install --no-cache-dir nvidia-ml-py>=12.560.30 opencv-python-headless==4.8.0.74 fastapi==0.115.6 && \
+    pip install --no-cache-dir --upgrade optree>=0.13.0
+
+# Reset pip config
+RUN pip config unset global.index-url && \
+    pip config unset global.extra-index-url
--- a/Dockerfile.nightly
+++ b/Dockerfile.nightly
+# Start from the NVIDIA official image (ubuntu-22.04 + python-3.10)
+# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-24-08.html
+FROM nvcr.io/nvidia/pytorch:24.08-py3
+
+# Define environments
+ENV MAX_JOBS=32
+ENV VLLM_WORKER_MULTIPROC_METHOD=spawn
+ENV DEBIAN_FRONTEND=noninteractive
+ENV NODE_OPTIONS=""
+ENV HF_HUB_ENABLE_HF_TRANSFER="1"
+
+# Define installation arguments
+ARG APT_SOURCE=https://mirrors.tuna.tsinghua.edu.cn/ubuntu/
+ARG PIP_INDEX=https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
+
+# Set apt source
+RUN cp /etc/apt/sources.list /etc/apt/sources.list.bak && \
+    { \
+    echo "deb ${APT_SOURCE} jammy main restricted universe multiverse"; \
+    echo "deb ${APT_SOURCE} jammy-updates main restricted universe multiverse"; \
+    echo "deb ${APT_SOURCE} jammy-backports main restricted universe multiverse"; \
+    echo "deb ${APT_SOURCE} jammy-security main restricted universe multiverse"; \
+    } > /etc/apt/sources.list
+
+# Install systemctl
+RUN apt-get update && \
+    apt-get install -y -o Dpkg::Options::="--force-confdef" systemd && \
+    apt-get clean
+
+# Install tini
+RUN apt-get update && \
+    apt-get install -y tini && \
+    apt-get clean
+
+# Change pip source
+RUN pip config set global.index-url "${PIP_INDEX}" && \
+    pip config set global.extra-index-url "${PIP_INDEX}" && \
+    python -m pip install --upgrade pip
+
+# Uninstall nv-pytorch fork
+RUN pip uninstall -y torch torchvision torchaudio \
+    pytorch-quantization pytorch-triton torch-tensorrt \
+    xgboost transformer_engine flash_attn apex megatron-core
+
+# Install torch-2.6.0 + vllm-0.8.2
+RUN pip install --no-cache-dir vllm==0.8.2 torch==2.6.0 torchvision==0.21.0 torchaudio==2.6.0 tensordict torchdata \
+    transformers>=4.49.0 accelerate datasets peft hf-transfer \
+    ray[default] codetiming hydra-core pandas pyarrow>=15.0.0 pylatexenc qwen-vl-utils wandb liger-kernel mathruler \
+    pytest yapf py-spy pyext pre-commit ruff
+
+# Install flash_attn-2.7.4.post1
+RUN wget -nv https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp310-cp310-linux_x86_64.whl && \
+    pip install --no-cache-dir flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
+
+# Fix cv2
+RUN pip uninstall -y pynvml nvidia-ml-py && \
+    pip install --no-cache-dir nvidia-ml-py>=12.560.30 opencv-python-headless==4.8.0.74 fastapi==0.115.6 && \
+    pip install --no-cache-dir --upgrade optree>=0.13.0
+
+# Reset pip config
+RUN pip config unset global.index-url && \
+    pip config unset global.extra-index-url
--- a/README.md
+++ b/README.md
@@ -13,7 +13,9 @@ EasyR1基于 **[HybirdEngine](https://arxiv.org/abs/2409.19256)** 和最新发

 - 支持的算法
  - GRPO
-  - others RL algorithms (comming soon)
+  - Reinforce++
+  - ReMax
+  - RLOO

 - 支持的数据集
  - Any text, vision-text dataset in a [specific format](#custom-dataset).
@@ -22,40 +24,93 @@ EasyR1基于 **[HybirdEngine](https://arxiv.org/abs/2409.19256)** 和最新发

 ### 软件依赖

- Python 3.9+
+- Python 3.10+
 - transformers>=4.49.0
- flash-attn>=2.4.3
+- flash-attn==2.6.1+das.opt4.dtk2504
 - vllm>=0.7.3

-We provide a [Dockerfile](./Dockerfile) to easily build environments.
-
 ### 硬件依赖

 \* *估算值*

-| 方法                     | 精度  |  1.5B  |   3B   |   7B   |
-| ------------------------ | ---- | ------ | ------ | ------ |
-| GRPO Full Fine-Tuning    |  AMP | 2*24GB | 4*40GB | 8*40GB |
+| Method                   | Bits |  1.5B  |   3B   |   7B   |   32B   |
+| ------------------------ | ---- | ------ | ------ | ------ | ------- |
+| GRPO Full Fine-Tuning    |  AMP | 2*24GB | 4*40GB | 8*40GB | 16*80GB |
+| GRPO Full Fine-Tuning    | BF16 | 1*24GB | 1*40GB | 4*40GB |  8*80GB |

 > [!NOTE]
-> 我们正在努力减少RL训练中的VRAM， LoRA支持将在下一次更新中集成。
+> 使用 `worker.actor.fsdp.torch_dtype=bf16` 和 `worker.actor.optim.strategy=adamw_bf16`参数确保使用 bf16 类型训练。
+>
+> 我们正在努力减少RL训练中的VRAM，LoRA支持将在下一次更新中集成。

-## 教程: 只需三步， 在 [Geometry3K](https://huggingface.co/datasets/hiyouga/geometry3k) 数据集上基于GRPO算法训练Qwen2.5-VL。
+## 教程: 只需三步，在 [Geometry3K](https://huggingface.co/datasets/hiyouga/geometry3k) 数据集上基于GRPO算法训练Qwen2.5-VL。

 ![image](assets/qwen2_5_vl_7b_geo.png)

-### 如何使用
+### 环境准备
+
+-v 路径、docker_name和imageID根据实际情况修改
+
+####  Docker（方法一）
+
+基于光源pytorch2.4.1+dtk25.04基础镜像环境：镜像下载地址：[https://sourcefind.cn/#/image/dcu/pytorch](https://sourcefind.cn/#/image/dcu/pytorch)，根据pytorch2.4.1、python、dtk及系统下载对应的镜像版本。
+
+```bash
+docker pull image.sourcefind.cn:5000/dcu/admin/base/pytorch:2.4.1-ubuntu22.04-dtk25.04-py3.10
+
+docker run -it --shm-size 200g --network=host --name docker_name --privileged --device=/dev/kfd --device=/dev/dri --device=/dev/mkfd --group-add video --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -u root -v /path/your_code_data/:/path/your_code_data/ -v /opt/hyhal/:/opt/hyhal/:ro imageID bash
+
+## 安装所需环境包
+cd EasyR1
+pip install vllm-0.8.2+das.opt1.fe6d3b0.dtk2504-cp310-cp310-linux_x86_64.whl
+pip install -r requirements.txt --no-deps
+## 注释掉accelerate、liger-kernel、tensordict之后再执行以下步骤
+pip install -r requirements.txt
+# 编译
+pip install -e .
+```
+#### Dockerfile（方法二）
+
+```bash
+cd docker
+docker build --no-cache -t llama-factory:latest .
+docker run -it --shm-size 200g --network=host --name docker_name --privileged --device=/dev/kfd --device=/dev/dri --device=/dev/mkfd --group-add video --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -u root -v /path/your_code_data/:/path/your_code_data/ -v /opt/hyhal/:/opt/hyhal/:ro imageID bash
+
+## 安装所需环境包
+cd EasyR1
+pip install vllm-0.8.2+das.opt1.fe6d3b0.dtk2504-cp310-cp310-linux_x86_64.whl
+pip install -r requirements.txt --no-deps
+## 注释掉accelerate、liger-kernel、tensordict之后再执行以下步骤
+pip install -r requirements.txt
+# 编译
+pip install -e .
+```
+
+#### Anaconda（方法三）
+关于本项目DCU显卡所需的特殊深度学习库可从[光合](https://developer.hpccube.com/tool/)开发者社区下载安装。
+```bash
+DTK驱动: dtk25.04
+python: 3.10
+torch: 2.4.1
+deepspeed: 0.14.2+das.opt2.dtk2504
+flash-attn: 2.6.1+das.opt4.dtk2504
+```
+`Tips：以上dtk驱动、python、torch等DCU相关工具版本需要严格一一对应`

 ```bash
-git clone https://github.com/hiyouga/EasyR1.git
 cd EasyR1
+pip install vllm-0.8.2+das.opt1.fe6d3b0.dtk2504-cp310-cp310-linux_x86_64.whl
+pip install -r requirements.txt --no-deps
+## 注释掉accelerate、liger-kernel、tensordict之后再执行以下步骤
+pip install -r requirements.txt
+# 编译
 pip install -e .
 ```

 ### GRPO 训练

 ```bash
-bash examples/run_qwen2_5_vl_7b_geo.sh
+bash examples/qwen2_5_7b_math_grpo.sh
 ```

 ### 基于Hugging Face Format融合Checkpoint
@@ -65,9 +120,8 @@ python3 scripts/model_merger.py --local_dir path_to_your_last_actor_checkpoint
 ```

 > [!NOTE]
->如果您在连接“Hugging Face”时遇到问题，请考虑使用 `export HF_ENDPOINT=https://hf-mirror.com`.
->
-> 如果您想使用SwanLab日志记录器，请考虑使用 `bash examples/run_qwen2_5_vl_7b_geo_swanlab.sh`.
+
+> 如果您想使用SwanLab日志记录器，请考虑使用 `bash examples/qwen2_5_vl_7b_geo3k_swanlab.sh`.

 ## 自定义数据集

@@ -88,5 +142,3 @@ python3 scripts/model_merger.py --local_dir path_to_your_last_actor_checkpoint
 这些功能目前暂时禁用，我们计划在未来的更新中逐一修复。

 - 视觉语言模型目前不兼容 padding-free 训练和 DeepSpeed Ulysses并行方法。
- 视觉语言模型目前不兼容 `enable_chunked_prefill` 除非 [vLLM v1](https://blog.vllm.ai/2025/01/27/v1-alpha-release.html) 已支持。
-
--- a/README_en.md
+++ b/README_en.md
 # EasyR1: An Efficient, Scalable, Multi-Modality RL Training Framework

+[![GitHub Repo stars](https://img.shields.io/github/stars/hiyouga/EasyR1)](https://github.com/hiyouga/EasyR1/stargazers)
+[![Twitter](https://img.shields.io/twitter/follow/llamafactory_ai)](https://twitter.com/llamafactory_ai)
+
 This project is a clean fork of the original [veRL](https://github.com/volcengine/verl) project to support vision language models, we thank all the authors for providing such a high-performance RL training framework.

 EasyR1 is efficient and scalable due to the design of **[HybirdEngine](https://arxiv.org/abs/2409.19256)** and the latest release of **[vLLM](https://github.com/vllm-project/vllm)**'s SPMD mode.
@@ -7,16 +10,23 @@ EasyR1 is efficient and scalable due to the design of **[HybirdEngine](https://a
 ## Features

 - Supported models
-  - Qwen2/Qwen2.5 language models
+  - Llama3/Qwen2/Qwen2.5 language models
  - Qwen2/Qwen2.5-VL vision language models
  - DeepSeek-R1 distill models

 - Supported algorithms
  - GRPO
-  - others RL algorithms (comming soon)
+  - Reinforce++
+  - ReMax
+  - RLOO

 - Supported datasets
-  - Any text, vision-text dataset in a [specific format](#custom-dataset).
+  - Any text, vision-text dataset in a [specific format](#custom-dataset)
+
+- Supported tricks
+  - Padding-free training
+  - Resuming from checkpoint
+  - Wandb & SwanLab & Mlflow & Tensorboard tracking

 ## Requirements

@@ -29,15 +39,27 @@ EasyR1 is efficient and scalable due to the design of **[HybirdEngine](https://a

 We provide a [Dockerfile](./Dockerfile) to easily build environments.

+We recommend using the [pre-built docker image](https://hub.docker.com/r/hiyouga/verl) in EasyR1.
+
+```bash
+# stable
+docker pull hiyouga/verl:ngc-th2.5.1-cu120-vllm0.7.4-hotfix
+# nightly
+docker pull hiyouga/verl:ngc-th2.6.0-cu120-vllm0.8.2
+```
+
 ### Hardware Requirements

 \* *estimated*

-| Method                   | Bits |  1.5B  |   3B   |   7B   |
-| ------------------------ | ---- | ------ | ------ | ------ |
-| GRPO Full Fine-Tuning    |  AMP | 2*24GB | 4*40GB | 8*40GB |
+| Method                   | Bits |  1.5B  |   3B   |   7B   |   32B   |
+| ------------------------ | ---- | ------ | ------ | ------ | ------- |
+| GRPO Full Fine-Tuning    |  AMP | 2*24GB | 4*40GB | 8*40GB | 16*80GB |
+| GRPO Full Fine-Tuning    | BF16 | 1*24GB | 1*40GB | 4*40GB |  8*80GB |

 > [!NOTE]
+> Use `worker.actor.fsdp.torch_dtype=bf16` and `worker.actor.optim.strategy=adamw_bf16` to enable bf16 training.
+>
 > We are working hard to reduce the VRAM in RL training, LoRA support will be integrated in next updates.

 ## Tutorial: Run Qwen2.5-VL GRPO on [Geometry3K](https://huggingface.co/datasets/hiyouga/geometry3k) Dataset in Just 3 Steps
@@ -55,47 +77,68 @@ pip install -e .
 ### GRPO Training

 ```bash
-bash examples/run_qwen2_5_vl_7b_geo.sh
+bash examples/qwen2_5_vl_7b_geo3k_grpo.sh
 ```

 ### Merge Checkpoint in Hugging Face Format

 ```bash
-python3 scripts/model_merger.py --local_dir path_to_your_last_actor_checkpoint
+python3 scripts/model_merger.py --local_dir checkpoints/easy_r1/exp_name/global_step_1/actor
 ```

-> [!NOTE]
+> [!TIP]
 > If you encounter issues with connecting to Hugging Face, consider using `export HF_ENDPOINT=https://hf-mirror.com`.
 >
-> If you want to use SwanLab logger, consider using `bash examples/run_qwen2_5_vl_7b_geo_swanlab.sh`.
+> If you want to use SwanLab logger, consider using `bash examples/qwen2_5_vl_7b_geo3k_swanlab.sh`.

 ## Custom Dataset

-The dataset should strictly follow the example data format.
+Please refer to the example datasets to prepare your own dataset.

 - Text dataset: https://huggingface.co/datasets/hiyouga/math12k
-    - Required columns: problem, answer
-
 - Vision-text dataset: https://huggingface.co/datasets/hiyouga/geometry3k
-    - Required columns: images, problem, answer
+
+> [!TIP]
+> EasyR1 already supports multi-image dataset.
+
+## How to Understand GRPO in EasyR1
+
+![image](assets/easyr1_grpo.png)
+
+- To learn about the GRPO algorithm, you can refer to [Hugging Face's blog](https://huggingface.co/docs/trl/v0.15.2/en/grpo_trainer).
+
+## How to Run 70B+ Model in Multi-node Environment
+
+Please see the **[veRL's official doc](https://verl.readthedocs.io/en/latest/start/multinode.html)** for multi-node training and Ray debugger.

 ## Other Baselines

- [CLEVR-70k-Counting](examples/run_qwen2_5_vl_2b_clevr.sh): Train the Qwen2.5-VL-3B-Instruct model on counting problem.
+We also reproduced the following two baselines of the [R1-V](https://github.com/deep-agent/R1-V) project.
+- [CLEVR-70k-Counting](examples/baselines/qwen2_5_vl_3b_clevr.sh): Train the Qwen2.5-VL-3B-Instruct model on counting problem.
+- [GeoQA-8k](examples/baselines/qwen2_5_vl_3b_geoqa8k.sh): Train the Qwen2.5-VL-3B-Instruct model on GeoQA problem.
+
+## Awesome Work using EasyR1

+- **MMR1**: Advancing the Frontiers of Multimodal Reasoning. [![[code]](https://img.shields.io/github/stars/LengSicong/MMR1)](https://github.com/LengSicong/MMR1)
+- **Vision-R1**: Incentivizing Reasoning Capability in Multimodal Large Language Models. [![[code]](https://img.shields.io/github/stars/Osilly/Vision-R1)](https://github.com/Osilly/Vision-R1) [![[arxiv]](https://img.shields.io/badge/arxiv-2503.06749-blue)](https://arxiv.org/abs/2503.06749)
+- **Seg-Zero**: Reasoning-Chain Guided Segmentation via Cognitive Reinforcement. [![[code]](https://img.shields.io/github/stars/dvlab-research/Seg-Zero)](https://github.com/dvlab-research/Seg-Zero) [![[arxiv]](https://img.shields.io/badge/arxiv-2503.06520-blue)](https://arxiv.org/abs/2503.06520)
+- **MetaSpatial**: Reinforcing 3D Spatial Reasoning in VLMs for the Metaverse. [![[code]](https://img.shields.io/github/stars/PzySeere/MetaSpatial)](https://github.com/PzySeere/MetaSpatial) [![[arxiv]](https://img.shields.io/badge/arxiv-2503.18470-blue)](https://arxiv.org/abs/2503.18470)
+- **Temporal-R1**: Envolving Temporal Reasoning Capability into LMMs via Temporal Consistent Reward
+ [![[code]](https://img.shields.io/github/stars/appletea233/Temporal-R1)](https://github.com/appletea233/Temporal-R1)
 ## TODO

- Support PPO, Reinforce++ and RLOO for VLMs.
- Support padding-free training for VLMs.
- Support ulysses parallelism for VLMs.
+- Support LoRA (high priority).
+- Support ulysses parallelism for VLMs (middle priority).
 - Support more VLM architectures.

+> [!NOTE]
+> We will not provide scripts for supervised fine-tuning and inference in this project. If you have such requirements, we recommend using [LLaMA-Factory](https://github.com/hiyouga/LLaMA-Factory).
+
 ### Known bugs

 These features are temporarily disabled for now, we plan to fix them one-by-one in the future updates.

- Vision language models are not compatible with padding-free training and ulysses parallelism yet.
- Vision language models are not compatible with `enable_chunked_prefill` unless [vLLM v1](https://blog.vllm.ai/2025/01/27/v1-alpha-release.html) is supported.
+- Vision language models are not compatible with ulysses parallelism yet.

 ## Discussion Group


--- a/assets/easyr1_grpo.png
+++ b/assets/easyr1_grpo.png
--- a/assets/qwen2_5_vl_7b_geo.png
+++ b/assets/qwen2_5_vl_7b_geo.png
--- a/assets/wechat.jpg
+++ b/assets/wechat.jpg
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
-# Start from the NVIDIA official image (ubuntu-22.04 + python-3.10)
-# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-24-08.html
-FROM nvcr.io/nvidia/pytorch:24.08-py3
-
-# Define environments
-ENV MAX_JOBS=32
-ENV VLLM_WORKER_MULTIPROC_METHOD=spawn
-ENV DEBIAN_FRONTEND=noninteractive
-ENV NODE_OPTIONS=""
-
-# Define installation arguments
-ARG APT_SOURCE=https://mirrors.tuna.tsinghua.edu.cn/ubuntu/
-ARG PIP_INDEX=https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
-
-# Set apt source
-RUN cp /etc/apt/sources.list /etc/apt/sources.list.bak && \
-    { \
-    echo "deb ${APT_SOURCE} jammy main restricted universe multiverse"; \
-    echo "deb ${APT_SOURCE} jammy-updates main restricted universe multiverse"; \
-    echo "deb ${APT_SOURCE} jammy-backports main restricted universe multiverse"; \
-    echo "deb ${APT_SOURCE} jammy-security main restricted universe multiverse"; \
-    } > /etc/apt/sources.list
-
-# Install systemctl
-RUN apt-get update && \
-    apt-get install -y -o Dpkg::Options::="--force-confdef" systemd && \
-    apt-get clean
-
-# Install tini
-RUN apt-get update && \
-    apt-get install -y tini && \
-    apt-get clean
-
-# Change pip source
-RUN pip config set global.index-url "${PIP_INDEX}" && \
-    pip config set global.extra-index-url "${PIP_INDEX}" && \
-    python -m pip install --upgrade pip
-
-# Install torch-2.5.1 + vllm-0.7.3
-RUN pip install --no-cache-dir vllm==0.7.3 torch==2.5.1 torchvision==0.20.1 torchaudio==2.5.1 tensordict \
-    transformers>=4.49.0 accelerate datasets peft \
-    ray codetiming hydra-core pandas pyarrow>=15.0.0 pylatexenc qwen-vl-utils
-
-# Install flash_attn-2.7.4.post1
-RUN pip uninstall -y transformer-engine flash-attn && \
-    wget -nv https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.5cxx11abiFALSE-cp310-cp310-linux_x86_64.whl && \
-    pip install --no-cache-dir flash_attn-2.7.4.post1+cu12torch2.5cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
+FROM image.sourcefind.cn:5000/dcu/admin/base/pytorch:2.4.1-ubuntu22.04-dtk25.04-py3.10
\ No newline at end of file
--- a/examples/baselines/qwen2_5_vl_3b_clevr.sh
+++ b/examples/baselines/qwen2_5_vl_3b_clevr.sh
+set -x
+
+MODEL_PATH=Qwen/Qwen2.5-VL-3B-Instruct  # replace it with your local file path
+
+SYSTEM_PROMPT="""A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant
+ first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning
+ process and answer are enclosed within <think> </think> and <answer> </answer> tags, respectively, i.e.,
+ <think> reasoning process here </think><answer> answer here </answer>"""
+
+python3 -m verl.trainer.main \
+    config=examples/config.yaml \
+    data.train_files=BUAADreamer/clevr_count_70k@train \
+    data.val_files=BUAADreamer/clevr_count_70k@test \
+    data.system_prompt="${SYSTEM_PROMPT}" \
+    worker.actor.model.model_path=${MODEL_PATH} \
+    worker.rollout.tensor_parallel_size=1 \
+    worker.rollout.enable_chunked_prefill=false \
+    worker.reward.compute_score=r1v \
+    trainer.experiment_name=qwen2_5_vl_3b_clevr \
+    trainer.n_gpus_per_node=2
--- a/examples/baselines/qwen2_5_vl_3b_geoqa8k.sh
+++ b/examples/baselines/qwen2_5_vl_3b_geoqa8k.sh
+set -x
+
+MODEL_PATH=Qwen/Qwen2.5-VL-3B-Instruct  # replace it with your local file path
+
+SYSTEM_PROMPT="""A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant
+ first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning
+ process and answer are enclosed within <think> </think> and <answer> </answer> tags, respectively, i.e.,
+ <think> reasoning process here </think><answer> answer here </answer>"""
+
+python3 -m verl.trainer.main \
+    config=examples/config.yaml \
+    data.train_files=leonardPKU/GEOQA_8K_R1V@train \
+    data.val_files=leonardPKU/GEOQA_8K_R1V@test \
+    data.system_prompt="${SYSTEM_PROMPT}" \
+    worker.actor.model.model_path=${MODEL_PATH} \
+    worker.rollout.tensor_parallel_size=1 \
+    worker.rollout.enable_chunked_prefill=false \
+    worker.reward.compute_score=r1v \
+    trainer.experiment_name=qwen2_5_vl_3b_geoqa8k \
+    trainer.n_gpus_per_node=8
--- a/examples/config.yaml
+++ b/examples/config.yaml
+data:
+  train_files: hiyouga/math12k@train
+  val_files: hiyouga/math12k@test
+  prompt_key: problem
+  answer_key: answer
+  image_key: images
+  max_prompt_length: 2048
+  max_response_length: 2048
+  rollout_batch_size: 512
+  val_batch_size: -1
+  shuffle: true
+  seed: 1
+  max_pixels: 4194304
+  min_pixels: 262144
+
+algorithm:
+  adv_estimator: grpo
+  disable_kl: false
+  use_kl_loss: true
+  kl_penalty: low_var_kl
+  kl_coef: 1.0e-2
+
+worker:
+  actor:
+    global_batch_size: 128
+    micro_batch_size_per_device_for_update: 4
+    micro_batch_size_per_device_for_experience: 16
+    max_grad_norm: 1.0
+    padding_free: true
+    ulysses_sequence_parallel_size: 1
+    model:
+      model_path: Qwen/Qwen2.5-7B-Instruct
+      enable_gradient_checkpointing: true
+      trust_remote_code: false
+      freeze_vision_tower: false
+    optim:
+      lr: 1.0e-6
+      weight_decay: 1.0e-2
+      strategy: adamw  # {adamw, adamw_bf16}
+      lr_warmup_ratio: 0.0
+    fsdp:
+      enable_full_shard: true
+      enable_cpu_offload: false
+      enable_rank0_init: true
+    offload:
+      offload_params: true  # true: more CPU memory; false: more GPU memory
+      offload_optimizer: true  # true: more CPU memory; false: more GPU memory
+
+  rollout:
+    temperature: 1.0
+    n: 5
+    gpu_memory_utilization: 0.6
+    enforce_eager: false
+    enable_chunked_prefill: false
+    tensor_parallel_size: 2
+    limit_images: 0
+    val_override_config:
+      temperature: 0.5
+      n: 1
+
+  ref:
+    fsdp:
+      enable_full_shard: true
+      enable_cpu_offload: true  # true: more CPU memory; false: more GPU memory
+      enable_rank0_init: true
+    offload:
+      offload_params: false
+
+  reward:
+    reward_type: function
+    compute_score: math
+
+trainer:
+  total_episodes: 15
+  logger: ["console", "wandb"]
+  project_name: easy_r1
+  experiment_name: qwen2_5_7b_math_grpo
+  n_gpus_per_node: 8
+  nnodes: 1
+  val_freq: 5  # -1 to disable
+  val_before_train: true
+  val_only: false
+  val_generations_to_log: 1
+  save_freq: 5  # -1 to disable
+  save_limit: 3  # -1 to disable
+  save_checkpoint_path: null
+  load_checkpoint_path: null
--- a/examples/qwen2_5_7b_math_grpo.sh
+++ b/examples/qwen2_5_7b_math_grpo.sh
+set -x
+
+MODEL_PATH=Qwen/Qwen2.5-7B-Instruct  # replace it with your local file path
+
+SYSTEM_PROMPT="""You FIRST think about the reasoning process as an internal monologue and then provide the final answer.
+ The reasoning process MUST BE enclosed within <think> </think> tags. The final answer MUST BE put in \boxed{}."""
+
+python3 -m verl.trainer.main \
+    config=examples/config.yaml \
+    data.train_files=hiyouga/math12k@train \
+    data.val_files=hiyouga/math12k@test \
+    data.system_prompt="${SYSTEM_PROMPT}" \
+    worker.actor.model.model_path=${MODEL_PATH} \
+    trainer.experiment_name=qwen2_5_7b_math_grpo \
+    trainer.n_gpus_per_node=8
--- a/examples/qwen2_5_vl_32b_geo3k_grpo.sh
+++ b/examples/qwen2_5_vl_32b_geo3k_grpo.sh
+set -x
+
+MODEL_PATH=Qwen/Qwen2.5-VL-32B-Instruct  # replace it with your local file path
+
+SYSTEM_PROMPT="""You FIRST think about the reasoning process as an internal monologue and then provide the final answer.
+ The reasoning process MUST BE enclosed within <think> </think> tags. The final answer MUST BE put in \boxed{}."""
+
+python3 -m verl.trainer.main \
+    config=examples/config.yaml \
+    data.train_files=hiyouga/geometry3k@train \
+    data.val_files=hiyouga/geometry3k@test \
+    data.system_prompt="${SYSTEM_PROMPT}" \
+    worker.actor.model.model_path=${MODEL_PATH} \
+    worker.actor.micro_batch_size_per_device_for_update=1 \
+    worker.actor.micro_batch_size_per_device_for_experience=8 \
+    worker.actor.fsdp.torch_dtype=bf16 \
+    worker.actor.optim.strategy=adamw_bf16 \
+    worker.rollout.tensor_parallel_size=8 \
+    worker.rollout.enable_chunked_prefill=false \
+    trainer.experiment_name=qwen2_5_vl_32b_geo_grpo \
+    trainer.n_gpus_per_node=8
--- a/examples/qwen2_5_vl_3b_geo3k_grpo.sh
+++ b/examples/qwen2_5_vl_3b_geo3k_grpo.sh
+set -x
+
+MODEL_PATH=Qwen/Qwen2.5-VL-3B-Instruct  # replace it with your local file path
+
+SYSTEM_PROMPT="""You FIRST think about the reasoning process as an internal monologue and then provide the final answer.
+ The reasoning process MUST BE enclosed within <think> </think> tags. The final answer MUST BE put in \boxed{}."""
+
+python3 -m verl.trainer.main \
+    config=examples/config.yaml \
+    data.train_files=hiyouga/geometry3k@train \
+    data.val_files=hiyouga/geometry3k@test \
+    data.system_prompt="${SYSTEM_PROMPT}" \
+    worker.actor.model.model_path=${MODEL_PATH} \
+    worker.rollout.tensor_parallel_size=1 \
+    worker.rollout.enable_chunked_prefill=false \
+    trainer.experiment_name=qwen2_5_vl_3b_geo_grpo \
+    trainer.n_gpus_per_node=2
--- a/examples/qwen2_5_vl_7b_geo3k_grpo.sh
+++ b/examples/qwen2_5_vl_7b_geo3k_grpo.sh
+set -x
+
+MODEL_PATH=Qwen/Qwen2.5-VL-7B-Instruct  # replace it with your local file path
+
+SYSTEM_PROMPT="""You FIRST think about the reasoning process as an internal monologue and then provide the final answer.
+ The reasoning process MUST BE enclosed within <think> </think> tags. The final answer MUST BE put in \boxed{}."""
+
+python3 -m verl.trainer.main \
+    config=examples/config.yaml \
+    data.train_files=hiyouga/geometry3k@train \
+    data.val_files=hiyouga/geometry3k@test \
+    data.system_prompt="${SYSTEM_PROMPT}" \
+    worker.actor.model.model_path=${MODEL_PATH} \
+    worker.rollout.enable_chunked_prefill=false \
+    trainer.experiment_name=qwen2_5_vl_7b_geo_grpo \
+    trainer.n_gpus_per_node=8
--- a/examples/qwen2_5_vl_7b_geo3k_reinforce.sh
+++ b/examples/qwen2_5_vl_7b_geo3k_reinforce.sh
+set -x
+
+MODEL_PATH=Qwen/Qwen2.5-VL-7B-Instruct  # replace it with your local file path
+
+SYSTEM_PROMPT="""You FIRST think about the reasoning process as an internal monologue and then provide the final answer.
+ The reasoning process MUST BE enclosed within <think> </think> tags. The final answer MUST BE put in \boxed{}."""
+
+python3 -m verl.trainer.main \
+    config=examples/config.yaml \
+    data.train_files=hiyouga/geometry3k@train \
+    data.val_files=hiyouga/geometry3k@test \
+    data.system_prompt="${SYSTEM_PROMPT}" \
+    worker.actor.model.model_path=${MODEL_PATH} \
+    worker.rollout.enable_chunked_prefill=false \
+    algorithm.adv_estimator=reinforce_plus_plus \
+    trainer.experiment_name=qwen2_5_vl_7b_geo_reinforce_pp \
+    trainer.n_gpus_per_node=8
--- a/examples/qwen2_5_vl_7b_geo3k_swanlab.sh
+++ b/examples/qwen2_5_vl_7b_geo3k_swanlab.sh
+set -x
+
+MODEL_PATH=Qwen/Qwen2.5-VL-7B-Instruct  # replace it with your local file path
+
+SYSTEM_PROMPT="""You FIRST think about the reasoning process as an internal monologue and then provide the final answer.
+ The reasoning process MUST BE enclosed within <think> </think> tags. The final answer MUST BE put in \boxed{}."""
+
+python3 -m verl.trainer.main \
+    config=examples/config.yaml \
+    data.train_files=hiyouga/geometry3k@train \
+    data.val_files=hiyouga/geometry3k@test \
+    data.system_prompt="${SYSTEM_PROMPT}" \
+    worker.actor.model.model_path=${MODEL_PATH} \
+    worker.rollout.enable_chunked_prefill=false \
+    trainer.experiment_name=qwen2_5_vl_7b_geo_grpo \
+    trainer.logger=['console','swanlab'] \
+    trainer.n_gpus_per_node=8
--- a/examples/runtime_env.yaml
+++ b/examples/runtime_env.yaml
@@ -2,4 +2,3 @@ working_dir: ./
 excludes: ["/.git/"]
 env_vars:
  TORCH_NCCL_AVOID_RECORD_STREAMS: "1"
-  VLLM_ATTENTION_BACKEND: "XFORMERS"
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,17 +4,25 @@ build-backend = "setuptools.build_meta"

 [project]
 name = "verl"
-dynamic = ["version", "dependencies", "optional-dependencies", "readme", "license"]
-requires-python = ">=3.8"
+dynamic = [
+    "version",
+    "dependencies",
+    "optional-dependencies",
+    "requires-python",
+    "authors",
+    "description",
+    "readme",
+    "license"
+]

 [tool.ruff]
-target-version = "py38"
+target-version = "py39"
 line-length = 119
 indent-width = 4

 [tool.ruff.lint]
 ignore = ["C901", "E501", "E741", "W605", "C408"]
-select = ["C", "E", "F", "I", "W"]
+select = ["C", "E", "F", "I", "W", "RUF022"]

 [tool.ruff.lint.per-file-ignores]
 "__init__.py" = ["E402", "F401", "F403", "F811"]