First commit.

f92481f0 · chenych · 7121d0b0 · f92481f0 · f92481f0 · f92481f0
Commit f92481f0 authored Mar 04, 2025 by chenych
20 changed files
--- a/.gitignore
+++ b/.gitignore
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# UV
+#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#uv.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
+.pdm.toml
+.pdm-python
+.pdm-build/
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+# PyPI configuration file
+.pypirc
+# outputs
+outputs/
+checkpoints/
+wandb/
--- a/LICENSE
+++ b/LICENSE
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright [yyyy] [name of copyright owner]
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
--- a/Makefile
+++ b/Makefile
+.PHONY: build commit quality style
+check_dirs := scripts verl setup.py
+build:
+	python3 setup.py sdist bdist_wheel
+commit:
+	pre-commit install
+	pre-commit run --all-files
+quality:
+	ruff check $(check_dirs)
+	ruff format --check $(check_dirs)
+style:
+	ruff check $(check_dirs) --fix
+	ruff format $(check_dirs)
--- a/README.md
+++ b/README.md
-# EasyR1
+# EasyR1: An Efficient, Scalable, Multi-Modality RL Training Framework
+这个项目是原始[veRL](https://github.com/volcengine/verl)项目的一个干净的分支，以支持视觉语言模型。
+EasyR1基于 **[HybirdEngine](https://arxiv.org/abs/2409.19256)** 和最新发布的 **[vLLM](https://github.com/vllm-project/vllm)** 的SPMD模式，是一个高效和可扩展的项目。
+## 项目特色
+- 支持的模型
+  - Qwen2/Qwen2.5 language models
+  - Qwen2/Qwen2.5-VL vision language models
+  - DeepSeek-R1 distill models
+- 支持的算法
+  - GRPO
+  - others RL algorithms (comming soon)
+- 支持的数据集
+  - Any text, vision-text dataset in a [specific format](#custom-dataset).
+## 软硬件依赖
+### 软件依赖
+- Python 3.9+
+- transformers>=4.49.0
+- flash-attn>=2.4.3
+- vllm>=0.7.3
+We provide a [Dockerfile](./Dockerfile) to easily build environments.
+### 硬件依赖
+\* *估算值*
+| 方法                     | 精度  |  1.5B  |   3B   |   7B   |
+| ------------------------ | ---- | ------ | ------ | ------ |
+| GRPO Full Fine-Tuning    |  AMP | 2*24GB | 4*40GB | 8*40GB |
+> [!NOTE]
+> 我们正在努力减少RL训练中的VRAM， LoRA支持将在下一次更新中集成。
+## 教程: 只需三步， 在 [Geometry3K](https://huggingface.co/datasets/hiyouga/geometry3k) 数据集上基于GRPO算法训练Qwen2.5-VL。
+![image](assets/qwen2_5_vl_7b_geo.png)
+### 如何使用
+```bash
+git clone https://github.com/hiyouga/EasyR1.git
+cd EasyR1
+pip install -e .
+```
+### GRPO 训练
+```bash
+bash examples/run_qwen2_5_vl_7b_geo.sh
+```
+### 基于Hugging Face Format融合Checkpoint
+```bash
+python3 scripts/model_merger.py --local_dir path_to_your_last_actor_checkpoint
+```
+> [!NOTE]
+>如果您在连接“Hugging Face”时遇到问题，请考虑使用 `export HF_ENDPOINT=https://hf-mirror.com`.
+>
+> 如果您想使用SwanLab日志记录器，请考虑使用 `bash examples/run_qwen2_5_vl_7b_geo_swanlab.sh`.
+## 自定义数据集
+自定义数据集应严格遵循示例数据格式。
+- 文本数据集: https://huggingface.co/datasets/hiyouga/math12k
+    - Required columns: problem, answer
+- 视觉-文本数据集: https://huggingface.co/datasets/hiyouga/geometry3k
+    - Required columns: images, problem, answer
+## 其他基线
+- [CLEVR-70k-Counting](examples/run_qwen2_5_vl_2b_clevr.sh):训练 Qwen2.5-VL-3B-Instruct 模型计数问题。
+### 已知问题
+这些功能目前暂时禁用，我们计划在未来的更新中逐一修复。
+- 视觉语言模型目前不兼容 padding-free 训练和 DeepSpeed Ulysses并行方法。
+- 视觉语言模型目前不兼容 `enable_chunked_prefill` 除非 [vLLM v1](https://blog.vllm.ai/2025/01/27/v1-alpha-release.html) 已支持。
-高效易用的多模态强化学习训练框架
\ No newline at end of file
--- a/README_en.md
+++ b/README_en.md
+# EasyR1: An Efficient, Scalable, Multi-Modality RL Training Framework
+This project is a clean fork of the original [veRL](https://github.com/volcengine/verl) project to support vision language models, we thank all the authors for providing such a high-performance RL training framework.
+EasyR1 is efficient and scalable due to the design of **[HybirdEngine](https://arxiv.org/abs/2409.19256)** and the latest release of **[vLLM](https://github.com/vllm-project/vllm)**'s SPMD mode.
+## Features
+- Supported models
+  - Qwen2/Qwen2.5 language models
+  - Qwen2/Qwen2.5-VL vision language models
+  - DeepSeek-R1 distill models
+- Supported algorithms
+  - GRPO
+  - others RL algorithms (comming soon)
+- Supported datasets
+  - Any text, vision-text dataset in a [specific format](#custom-dataset).
+## Requirements
+### Software Requirements
+- Python 3.9+
+- transformers>=4.49.0
+- flash-attn>=2.4.3
+- vllm>=0.7.3
+We provide a [Dockerfile](./Dockerfile) to easily build environments.
+### Hardware Requirements
+\* *estimated*
+| Method                   | Bits |  1.5B  |   3B   |   7B   |
+| ------------------------ | ---- | ------ | ------ | ------ |
+| GRPO Full Fine-Tuning    |  AMP | 2*24GB | 4*40GB | 8*40GB |
+> [!NOTE]
+> We are working hard to reduce the VRAM in RL training, LoRA support will be integrated in next updates.
+## Tutorial: Run Qwen2.5-VL GRPO on [Geometry3K](https://huggingface.co/datasets/hiyouga/geometry3k) Dataset in Just 3 Steps
+![image](assets/qwen2_5_vl_7b_geo.png)
+### Installation
+```bash
+git clone https://github.com/hiyouga/EasyR1.git
+cd EasyR1
+pip install -e .
+```
+### GRPO Training
+```bash
+bash examples/run_qwen2_5_vl_7b_geo.sh
+```
+### Merge Checkpoint in Hugging Face Format
+```bash
+python3 scripts/model_merger.py --local_dir path_to_your_last_actor_checkpoint
+```
+> [!NOTE]
+> If you encounter issues with connecting to Hugging Face, consider using `export HF_ENDPOINT=https://hf-mirror.com`.
+>
+> If you want to use SwanLab logger, consider using `bash examples/run_qwen2_5_vl_7b_geo_swanlab.sh`.
+## Custom Dataset
+The dataset should strictly follow the example data format.
+- Text dataset: https://huggingface.co/datasets/hiyouga/math12k
+    - Required columns: problem, answer
+- Vision-text dataset: https://huggingface.co/datasets/hiyouga/geometry3k
+    - Required columns: images, problem, answer
+## Other Baselines
+- [CLEVR-70k-Counting](examples/run_qwen2_5_vl_2b_clevr.sh): Train the Qwen2.5-VL-3B-Instruct model on counting problem.
+## TODO
+- Support PPO, Reinforce++ and RLOO for VLMs.
+- Support padding-free training for VLMs.
+- Support ulysses parallelism for VLMs.
+- Support more VLM architectures.
+### Known bugs
+These features are temporarily disabled for now, we plan to fix them one-by-one in the future updates.
+- Vision language models are not compatible with padding-free training and ulysses parallelism yet.
+- Vision language models are not compatible with `enable_chunked_prefill` unless [vLLM v1](https://blog.vllm.ai/2025/01/27/v1-alpha-release.html) is supported.
+## Discussion Group
+👋 Join our [WeChat group](assets/wechat.jpg).
+## Citation
+Core contributors: [Yaowei Zheng](https://github.com/hiyouga), [Junting Lu](https://github.com/AL-377), [Shenzhi Wang](https://github.com/Shenzhi-Wang), [Zhangchi Feng](https://github.com/BUAADreamer), [Dongdong Kuang](https://github.com/Kuangdd01) and Yuwen Xiong
+We also thank Guangming Sheng and Chi Zhang for helpful discussions.
+```bibtex
+@misc{zheng2025easyr1,
+  title        = {EasyR1: An Efficient, Scalable, Multi-Modality RL Training Framework},
+  author       = {Yaowei Zheng, Junting Lu, Shenzhi Wang, Zhangchi Feng, Dongdong Kuang, Yuwen Xiong},
+  howpublished = {\url{https://github.com/hiyouga/EasyR1}},
+  year         = {2025}
+}
+```
+We recommend to also cite the original work.
+```bibtex
+@article{sheng2024hybridflow,
+  title   = {HybridFlow: A Flexible and Efficient RLHF Framework},
+  author  = {Guangming Sheng and Chi Zhang and Zilingfeng Ye and Xibin Wu and Wang Zhang and Ru Zhang and Yanghua Peng and Haibin Lin and Chuan Wu},
+  year    = {2024},
+  journal = {arXiv preprint arXiv: 2409.19256}
+}
+```
--- a/assets/qwen2_5_vl_7b_geo.png
+++ b/assets/qwen2_5_vl_7b_geo.png
--- a/assets/wechat.jpg
+++ b/assets/wechat.jpg
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
+# Start from the NVIDIA official image (ubuntu-22.04 + python-3.10)
+# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-24-08.html
+FROM nvcr.io/nvidia/pytorch:24.08-py3
+# Define environments
+ENV MAX_JOBS=32
+ENV VLLM_WORKER_MULTIPROC_METHOD=spawn
+ENV DEBIAN_FRONTEND=noninteractive
+ENV NODE_OPTIONS=""
+# Define installation arguments
+ARG APT_SOURCE=https://mirrors.tuna.tsinghua.edu.cn/ubuntu/
+ARG PIP_INDEX=https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
+# Set apt source
+RUN cp /etc/apt/sources.list /etc/apt/sources.list.bak && \
+    { \
+    echo "deb ${APT_SOURCE} jammy main restricted universe multiverse"; \
+    echo "deb ${APT_SOURCE} jammy-updates main restricted universe multiverse"; \
+    echo "deb ${APT_SOURCE} jammy-backports main restricted universe multiverse"; \
+    echo "deb ${APT_SOURCE} jammy-security main restricted universe multiverse"; \
+    } > /etc/apt/sources.list
+# Install systemctl
+RUN apt-get update && \
+    apt-get install -y -o Dpkg::Options::="--force-confdef" systemd && \
+    apt-get clean
+# Install tini
+RUN apt-get update && \
+    apt-get install -y tini && \
+    apt-get clean
+# Change pip source
+RUN pip config set global.index-url "${PIP_INDEX}" && \
+    pip config set global.extra-index-url "${PIP_INDEX}" && \
+    python -m pip install --upgrade pip
+# Install torch-2.5.1 + vllm-0.7.3
+RUN pip install --no-cache-dir vllm==0.7.3 torch==2.5.1 torchvision==0.20.1 torchaudio==2.5.1 tensordict \
+    transformers>=4.49.0 accelerate datasets peft \
+    ray codetiming hydra-core pandas pyarrow>=15.0.0 pylatexenc qwen-vl-utils
+# Install flash_attn-2.7.4.post1
+RUN pip uninstall -y transformer-engine flash-attn && \
+    wget -nv https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.5cxx11abiFALSE-cp310-cp310-linux_x86_64.whl && \
+    pip install --no-cache-dir flash_attn-2.7.4.post1+cu12torch2.5cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
--- a/examples/grpo_example.yaml
+++ b/examples/grpo_example.yaml
+data:
+  train_files: hiyouga/math12k@train
+  val_files: hiyouga/math12k@test
+  prompt_key: problem
+  max_prompt_length: 1024
+  max_response_length: 1024
+  rollout_batch_size: 512
+  shuffle: true
+  seed: 1
+  max_pixels: 4194304
+  min_pixels: 262144
+algorithm:
+  adv_estimator: grpo
+  kl_coef: 0.0
+worker:
+  actor:
+    global_batch_size: 128
+    micro_batch_size_per_device_for_update: 1
+    micro_batch_size_per_device_for_experience: 2
+    max_grad_norm: 1.0
+    use_kl_loss: true
+    kl_loss_coef: 1.0e-3
+    kl_loss_type: low_var_kl
+    model:
+      model_path: Qwen/Qwen2.5-7B-Instruct
+      enable_gradient_checkpointing: true
+    optim:
+      lr: 1.0e-6
+      weight_decay: 1.0e-2
+    fsdp:
+      param_offload: false
+      optimizer_offload: false
+      torch_dtype: null
+    offload:
+      param_offload: true
+      optimizer_offload: true
+  rollout:
+    temperature: 1.0
+    tensor_parallel_size: 2
+    gpu_memory_utilization: 0.6
+    n: 5
+    enable_chunked_prefill: true
+  ref:
+    offload:
+      param_offload: true
+  reward:
+    reward_type: function
+    compute_score: math
+trainer:
+  total_episodes: 15
+  logger: ["console", "wandb"]
+  project_name: easy_r1
+  experiment_name: qwen2_5_7b_math
+  n_gpus_per_node: 8
+  nnodes: 1
+  save_freq: 5
+  test_freq: 5
+  val_before_train: true
+  val_only: false
+  save_checkpoint_path: null
--- a/examples/remax_example.yaml
+++ b/examples/remax_example.yaml
+data:
+  train_files: hiyouga/math12k@train
+  val_files: hiyouga/math12k@test
+  prompt_key: problem
+  max_prompt_length: 1024
+  max_response_length: 1024
+  rollout_batch_size: 512
+  shuffle: true
+  seed: 1
+  max_pixels: 4194304
+  min_pixels: 262144
+algorithm:
+  adv_estimator: remax
+  kl_coef: 0.0
+worker:
+  actor:
+    global_batch_size: 128
+    micro_batch_size_per_device_for_update: 1
+    micro_batch_size_per_device_for_experience: 2
+    max_grad_norm: 1.0
+    use_kl_loss: true
+    kl_loss_coef: 1.0e-3
+    kl_loss_type: low_var_kl
+    model:
+      model_path: Qwen/Qwen2.5-7B-Instruct
+      enable_gradient_checkpointing: true
+    optim:
+      lr: 1.0e-6
+      weight_decay: 1.0e-2
+    fsdp:
+      param_offload: false
+      optimizer_offload: false
+      torch_dtype: null
+    offload:
+      param_offload: true
+      optimizer_offload: true
+  rollout:
+    temperature: 1.0
+    tensor_parallel_size: 2
+    gpu_memory_utilization: 0.6
+    n: 5
+    enable_chunked_prefill: true
+  ref:
+    offload:
+      param_offload: true
+  reward:
+    reward_type: function
+    compute_score: math
+trainer:
+  total_episodes: 15
+  logger: ["console", "wandb"]
+  project_name: easy_r1
+  experiment_name: qwen2_5_7b_remax_math
+  n_gpus_per_node: 8
+  nnodes: 1
+  save_freq: 5
+  test_freq: 5
+  val_before_train: true
+  val_only: false
+  save_checkpoint_path: null
--- a/examples/run_qwen2_5_7b_math.sh
+++ b/examples/run_qwen2_5_7b_math.sh
+set -x
+export VLLM_ATTENTION_BACKEND=XFORMERS
+MODEL_PATH=Qwen/Qwen2.5-7B-Instruct  # replace it with your local file path
+python3 -m verl.trainer.main \
+    config=examples/grpo_example.yaml \
+    worker.actor.model.model_path=${MODEL_PATH} \
+    trainer.n_gpus_per_node=4
--- a/examples/run_qwen2_5_7b_math_swanlab.sh
+++ b/examples/run_qwen2_5_7b_math_swanlab.sh
+set -x
+export VLLM_ATTENTION_BACKEND=XFORMERS
+MODEL_PATH=Qwen/Qwen2.5-7B-Instruct  # replace it with your local file path
+python3 -m verl.trainer.main \
+    config=examples/grpo_example.yaml \
+    worker.actor.model.model_path=${MODEL_PATH} \
+    trainer.logger=['console','swanlab'] \
+    trainer.n_gpus_per_node=4
--- a/examples/run_qwen2_5_vl_2b_clevr.sh
+++ b/examples/run_qwen2_5_vl_2b_clevr.sh
+set -x
+export VLLM_ATTENTION_BACKEND=XFORMERS
+MODEL_PATH=Qwen/Qwen2.5-VL-3B-Instruct  # replace it with your local file path
+SYSTEM_PROMPT="""A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant
+ first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning
+ process and answer are enclosed within <think> </think> and <answer> </answer> tags, respectively, i.e.,
+ <think> reasoning process here </think><answer> answer here </answer>"""
+python3 -m verl.trainer.main \
+    config=examples/grpo_example.yaml \
+    data.train_files=BUAADreamer/clevr_count_70k@train \
+    data.val_files=BUAADreamer/clevr_count_70k@test \
+    data.system_prompt="${SYSTEM_PROMPT}" \
+    worker.actor.model.model_path=${MODEL_PATH} \
+    worker.rollout.tensor_parallel_size=1 \
+    worker.rollout.enable_chunked_prefill=false \
+    worker.reward.compute_score=r1v \
+    trainer.experiment_name=qwen2_5_vl_3b_clevr \
+    trainer.n_gpus_per_node=2
--- a/examples/run_qwen2_5_vl_3b_geo.sh
+++ b/examples/run_qwen2_5_vl_3b_geo.sh
+set -x
+export VLLM_ATTENTION_BACKEND=XFORMERS
+MODEL_PATH=Qwen/Qwen2.5-VL-3B-Instruct  # replace it with your local file path
+python3 -m verl.trainer.main \
+    config=examples/grpo_example.yaml \
+    data.train_files=hiyouga/geometry3k@train \
+    data.val_files=hiyouga/geometry3k@test \
+    worker.actor.model.model_path=${MODEL_PATH} \
+    worker.rollout.tensor_parallel_size=1 \
+    worker.rollout.enable_chunked_prefill=false \
+    trainer.experiment_name=qwen2_5_vl_3b_geo \
+    trainer.n_gpus_per_node=2
--- a/examples/run_qwen2_5_vl_7b_geo.sh
+++ b/examples/run_qwen2_5_vl_7b_geo.sh
+set -x
+export VLLM_ATTENTION_BACKEND=XFORMERS
+MODEL_PATH=Qwen/Qwen2.5-VL-7B-Instruct  # replace it with your local file path
+python3 -m verl.trainer.main \
+    config=examples/grpo_example.yaml \
+    data.train_files=hiyouga/geometry3k@train \
+    data.val_files=hiyouga/geometry3k@test \
+    worker.actor.model.model_path=${MODEL_PATH} \
+    worker.rollout.enable_chunked_prefill=false \
+    trainer.experiment_name=qwen2_5_vl_7b_geo \
+    trainer.n_gpus_per_node=4
--- a/examples/run_qwen2_5_vl_7b_geo_swanlab.sh
+++ b/examples/run_qwen2_5_vl_7b_geo_swanlab.sh
+set -x
+export VLLM_ATTENTION_BACKEND=XFORMERS
+MODEL_PATH=Qwen/Qwen2.5-VL-7B-Instruct  # replace it with your local file path
+python3 -m verl.trainer.main \
+    config=examples/grpo_example.yaml \
+    data.train_files=hiyouga/geometry3k@train \
+    data.val_files=hiyouga/geometry3k@test \
+    worker.actor.model.model_path=${MODEL_PATH} \
+    worker.rollout.enable_chunked_prefill=false \
+    trainer.experiment_name=qwen2_5_vl_7b_geo \
+    trainer.logger=['console','swanlab'] \
+    trainer.n_gpus_per_node=4
--- a/examples/runtime_env.yaml
+++ b/examples/runtime_env.yaml
+working_dir: ./
+excludes: ["/.git/"]
+env_vars:
+  TORCH_NCCL_AVOID_RECORD_STREAMS: "1"
+  VLLM_ATTENTION_BACKEND: "XFORMERS"
--- a/pyproject.toml
+++ b/pyproject.toml
+[build-system]
+requires = ["setuptools>=61.0"]
+build-backend = "setuptools.build_meta"
+[project]
+name = "verl"
+dynamic = ["version", "dependencies", "optional-dependencies", "readme", "license"]
+requires-python = ">=3.8"
+[tool.ruff]
+target-version = "py38"
+line-length = 119
+indent-width = 4
+[tool.ruff.lint]
+ignore = ["C901", "E501", "E741", "W605", "C408"]
+select = ["C", "E", "F", "I", "W"]
+[tool.ruff.lint.per-file-ignores]
+"__init__.py" = ["E402", "F401", "F403", "F811"]
+[tool.ruff.lint.isort]
+lines-after-imports = 2
+known-first-party = ["verl"]
+known-third-party = ["torch", "transformers", "wandb"]
+[tool.ruff.format]
+quote-style = "double"
+indent-style = "space"
+skip-magic-trailing-comma = false
+line-ending = "auto"
--- a/requirements.txt
+++ b/requirements.txt
+accelerate
+codetiming
+datasets
+flash-attn>=2.4.3
+liger-kernel
+mathruler
+numpy
+omegaconf
+pandas
+peft
+pillow
+pyarrow>=15.0.0
+pylatexenc
+qwen-vl-utils
+ray
+tensordict
+transformers>=4.49.0
+vllm>=0.7.3
+wandb
--- a/scripts/model_merger.py
+++ b/scripts/model_merger.py
+# Copyright 2024 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import argparse
+import os
+import re
+from concurrent.futures import ThreadPoolExecutor
+from typing import Dict, List, Tuple
+import torch
+from torch.distributed._tensor import DTensor, Placement, Shard
+from transformers import AutoConfig, AutoModelForCausalLM, AutoModelForTokenClassification, AutoModelForVision2Seq
+def merge_by_placement(tensors: List[torch.Tensor], placement: Placement):
+    if placement.is_replicate():
+        return tensors[0]
+    elif placement.is_partial():
+        raise NotImplementedError("Partial placement is not supported yet")
+    elif placement.is_shard():
+        return torch.cat(tensors, dim=placement.dim).contiguous()
+    else:
+        raise ValueError(f"Unsupported placement: {placement}")
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--local_dir", required=True, type=str, help="The path for your saved model")
+    parser.add_argument("--hf_upload_path", default=False, type=str, help="The path of the huggingface repo to upload")
+    args = parser.parse_args()
+    assert not args.local_dir.endswith("huggingface"), "The local_dir should not end with huggingface"
+    local_dir = args.local_dir
+    # copy rank zero to find the shape of (dp, fsdp)
+    rank = 0
+    world_size = 0
+    for filename in os.listdir(local_dir):
+        match = re.match(r"model_world_size_(\d+)_rank_0\.pt", filename)
+        if match:
+            world_size = match.group(1)
+            break
+    assert world_size, "No model file with the proper format"
+    state_dict = torch.load(
+        os.path.join(local_dir, f"model_world_size_{world_size}_rank_{rank}.pt"), map_location="cpu"
+    )
+    pivot_key = sorted(state_dict.keys())[0]
+    weight = state_dict[pivot_key]
+    assert isinstance(weight, torch.distributed._tensor.DTensor)
+    # get sharding info
+    device_mesh = weight.device_mesh
+    mesh = device_mesh.mesh
+    mesh_dim_names = device_mesh.mesh_dim_names
+    print(f"Got device mesh {mesh}, mesh_dim_names {mesh_dim_names}")
+    assert mesh_dim_names in (("fsdp",),), f"Unsupported mesh_dim_names {mesh_dim_names}"
+    if "tp" in mesh_dim_names:
+        # fsdp * tp
+        total_shards = mesh.shape[-1] * mesh.shape[-2]
+        mesh_shape = (mesh.shape[-2], mesh.shape[-1])
+    else:
+        # fsdp
+        total_shards = mesh.shape[-1]
+        mesh_shape = (mesh.shape[-1],)
+    print(f"Processing model shards with {total_shards} {mesh_shape} in total")
+    model_state_dict_lst = []
+    model_state_dict_lst.append(state_dict)
+    model_state_dict_lst.extend([""] * (total_shards - 1))
+    def process_one_shard(rank):
+        model_path = os.path.join(local_dir, f"model_world_size_{world_size}_rank_{rank}.pt")
+        state_dict = torch.load(model_path, map_location="cpu", weights_only=False)
+        model_state_dict_lst[rank] = state_dict
+        return state_dict
+    with ThreadPoolExecutor(max_workers=min(32, os.cpu_count())) as executor:
+        for rank in range(1, total_shards):
+            executor.submit(process_one_shard, rank)
+    state_dict = {}
+    param_placements: Dict[str, List[Placement]] = {}
+    keys = set(model_state_dict_lst[0].keys())
+    for key in keys:
+        state_dict[key] = []
+        for model_state_dict in model_state_dict_lst:
+            try:
+                tensor = model_state_dict.pop(key)
+            except Exception:
+                print("-" * 30)
+                print(model_state_dict)
+            if isinstance(tensor, DTensor):
+                state_dict[key].append(tensor._local_tensor.bfloat16())
+                placements = tuple(tensor.placements)
+                # replicated placement at dp dimension can be discarded
+                if mesh_dim_names[0] == "dp":
+                    placements = placements[1:]
+                if key not in param_placements:
+                    param_placements[key] = placements
+                else:
+                    assert param_placements[key] == placements
+            else:
+                state_dict[key] = tensor.bfloat16()
+    del model_state_dict_lst
+    for key in sorted(state_dict):
+        if not isinstance(state_dict[key], list):
+            print(f"No need to merge key {key}")
+            continue
+        # merge shards
+        placements: Tuple[Shard] = param_placements[key]
+        if len(mesh_shape) == 1:
+            # 1-D list, FSDP without TP
+            assert len(placements) == 1
+            shards = state_dict[key]
+            state_dict[key] = merge_by_placement(shards, placements[0])
+        else:
+            # 2-D list, FSDP + TP
+            raise NotImplementedError("FSDP + TP is not supported yet")
+    print("Writing to local disk")
+    hf_path = os.path.join(local_dir, "huggingface")
+    config = AutoConfig.from_pretrained(hf_path)
+    if "ForTokenClassification" in config.architectures[0]:
+        auto_model = AutoModelForTokenClassification
+    elif "ForCausalLM" in config.architectures[0]:
+        auto_model = AutoModelForCausalLM
+    elif "ForConditionalGeneration" in config.architectures[0]:
+        auto_model = AutoModelForVision2Seq
+    else:
+        raise NotImplementedError(f"Unknown architecture {config.architectures}")
+    with torch.device("meta"):
+        model = auto_model.from_config(config, torch_dtype=torch.bfloat16)
+    model.to_empty(device="cpu")
+    print(f"Saving model to {hf_path}")
+    model.save_pretrained(hf_path, state_dict=state_dict)
+    del state_dict
+    del model
+    if args.hf_upload_path:
+        # Push to hugging face
+        from huggingface_hub import HfApi
+        api = HfApi()
+        api.create_repo(repo_id=args.hf_upload_path, private=False, exist_ok=True)
+        api.upload_folder(folder_path=hf_path, repo_id=args.hf_upload_path, repo_type="model")