Commit f92481f0 authored by chenych's avatar chenych
Browse files

First commit.

parent 7121d0b0
Pipeline #2435 failed with stages
in 0 seconds
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# UV
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
#uv.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
.pdm.toml
.pdm-python
.pdm-build/
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
# PyPI configuration file
.pypirc
# outputs
outputs/
checkpoints/
wandb/
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
.PHONY: build commit quality style
check_dirs := scripts verl setup.py
build:
python3 setup.py sdist bdist_wheel
commit:
pre-commit install
pre-commit run --all-files
quality:
ruff check $(check_dirs)
ruff format --check $(check_dirs)
style:
ruff check $(check_dirs) --fix
ruff format $(check_dirs)
# EasyR1 # EasyR1: An Efficient, Scalable, Multi-Modality RL Training Framework
这个项目是原始[veRL](https://github.com/volcengine/verl)项目的一个干净的分支,以支持视觉语言模型。
EasyR1基于 **[HybirdEngine](https://arxiv.org/abs/2409.19256)** 和最新发布的 **[vLLM](https://github.com/vllm-project/vllm)** 的SPMD模式,是一个高效和可扩展的项目。
## 项目特色
- 支持的模型
- Qwen2/Qwen2.5 language models
- Qwen2/Qwen2.5-VL vision language models
- DeepSeek-R1 distill models
- 支持的算法
- GRPO
- others RL algorithms (comming soon)
- 支持的数据集
- Any text, vision-text dataset in a [specific format](#custom-dataset).
## 软硬件依赖
### 软件依赖
- Python 3.9+
- transformers>=4.49.0
- flash-attn>=2.4.3
- vllm>=0.7.3
We provide a [Dockerfile](./Dockerfile) to easily build environments.
### 硬件依赖
\* *估算值*
| 方法 | 精度 | 1.5B | 3B | 7B |
| ------------------------ | ---- | ------ | ------ | ------ |
| GRPO Full Fine-Tuning | AMP | 2*24GB | 4*40GB | 8*40GB |
> [!NOTE]
> 我们正在努力减少RL训练中的VRAM, LoRA支持将在下一次更新中集成。
## 教程: 只需三步, 在 [Geometry3K](https://huggingface.co/datasets/hiyouga/geometry3k) 数据集上基于GRPO算法训练Qwen2.5-VL。
![image](assets/qwen2_5_vl_7b_geo.png)
### 如何使用
```bash
git clone https://github.com/hiyouga/EasyR1.git
cd EasyR1
pip install -e .
```
### GRPO 训练
```bash
bash examples/run_qwen2_5_vl_7b_geo.sh
```
### 基于Hugging Face Format融合Checkpoint
```bash
python3 scripts/model_merger.py --local_dir path_to_your_last_actor_checkpoint
```
> [!NOTE]
>如果您在连接“Hugging Face”时遇到问题,请考虑使用 `export HF_ENDPOINT=https://hf-mirror.com`.
>
> 如果您想使用SwanLab日志记录器,请考虑使用 `bash examples/run_qwen2_5_vl_7b_geo_swanlab.sh`.
## 自定义数据集
自定义数据集应严格遵循示例数据格式。
- 文本数据集: https://huggingface.co/datasets/hiyouga/math12k
- Required columns: problem, answer
- 视觉-文本数据集: https://huggingface.co/datasets/hiyouga/geometry3k
- Required columns: images, problem, answer
## 其他基线
- [CLEVR-70k-Counting](examples/run_qwen2_5_vl_2b_clevr.sh):训练 Qwen2.5-VL-3B-Instruct 模型计数问题。
### 已知问题
这些功能目前暂时禁用,我们计划在未来的更新中逐一修复。
- 视觉语言模型目前不兼容 padding-free 训练和 DeepSpeed Ulysses并行方法。
- 视觉语言模型目前不兼容 `enable_chunked_prefill` 除非 [vLLM v1](https://blog.vllm.ai/2025/01/27/v1-alpha-release.html) 已支持。
高效易用的多模态强化学习训练框架
\ No newline at end of file
# EasyR1: An Efficient, Scalable, Multi-Modality RL Training Framework
This project is a clean fork of the original [veRL](https://github.com/volcengine/verl) project to support vision language models, we thank all the authors for providing such a high-performance RL training framework.
EasyR1 is efficient and scalable due to the design of **[HybirdEngine](https://arxiv.org/abs/2409.19256)** and the latest release of **[vLLM](https://github.com/vllm-project/vllm)**'s SPMD mode.
## Features
- Supported models
- Qwen2/Qwen2.5 language models
- Qwen2/Qwen2.5-VL vision language models
- DeepSeek-R1 distill models
- Supported algorithms
- GRPO
- others RL algorithms (comming soon)
- Supported datasets
- Any text, vision-text dataset in a [specific format](#custom-dataset).
## Requirements
### Software Requirements
- Python 3.9+
- transformers>=4.49.0
- flash-attn>=2.4.3
- vllm>=0.7.3
We provide a [Dockerfile](./Dockerfile) to easily build environments.
### Hardware Requirements
\* *estimated*
| Method | Bits | 1.5B | 3B | 7B |
| ------------------------ | ---- | ------ | ------ | ------ |
| GRPO Full Fine-Tuning | AMP | 2*24GB | 4*40GB | 8*40GB |
> [!NOTE]
> We are working hard to reduce the VRAM in RL training, LoRA support will be integrated in next updates.
## Tutorial: Run Qwen2.5-VL GRPO on [Geometry3K](https://huggingface.co/datasets/hiyouga/geometry3k) Dataset in Just 3 Steps
![image](assets/qwen2_5_vl_7b_geo.png)
### Installation
```bash
git clone https://github.com/hiyouga/EasyR1.git
cd EasyR1
pip install -e .
```
### GRPO Training
```bash
bash examples/run_qwen2_5_vl_7b_geo.sh
```
### Merge Checkpoint in Hugging Face Format
```bash
python3 scripts/model_merger.py --local_dir path_to_your_last_actor_checkpoint
```
> [!NOTE]
> If you encounter issues with connecting to Hugging Face, consider using `export HF_ENDPOINT=https://hf-mirror.com`.
>
> If you want to use SwanLab logger, consider using `bash examples/run_qwen2_5_vl_7b_geo_swanlab.sh`.
## Custom Dataset
The dataset should strictly follow the example data format.
- Text dataset: https://huggingface.co/datasets/hiyouga/math12k
- Required columns: problem, answer
- Vision-text dataset: https://huggingface.co/datasets/hiyouga/geometry3k
- Required columns: images, problem, answer
## Other Baselines
- [CLEVR-70k-Counting](examples/run_qwen2_5_vl_2b_clevr.sh): Train the Qwen2.5-VL-3B-Instruct model on counting problem.
## TODO
- Support PPO, Reinforce++ and RLOO for VLMs.
- Support padding-free training for VLMs.
- Support ulysses parallelism for VLMs.
- Support more VLM architectures.
### Known bugs
These features are temporarily disabled for now, we plan to fix them one-by-one in the future updates.
- Vision language models are not compatible with padding-free training and ulysses parallelism yet.
- Vision language models are not compatible with `enable_chunked_prefill` unless [vLLM v1](https://blog.vllm.ai/2025/01/27/v1-alpha-release.html) is supported.
## Discussion Group
👋 Join our [WeChat group](assets/wechat.jpg).
## Citation
Core contributors: [Yaowei Zheng](https://github.com/hiyouga), [Junting Lu](https://github.com/AL-377), [Shenzhi Wang](https://github.com/Shenzhi-Wang), [Zhangchi Feng](https://github.com/BUAADreamer), [Dongdong Kuang](https://github.com/Kuangdd01) and Yuwen Xiong
We also thank Guangming Sheng and Chi Zhang for helpful discussions.
```bibtex
@misc{zheng2025easyr1,
title = {EasyR1: An Efficient, Scalable, Multi-Modality RL Training Framework},
author = {Yaowei Zheng, Junting Lu, Shenzhi Wang, Zhangchi Feng, Dongdong Kuang, Yuwen Xiong},
howpublished = {\url{https://github.com/hiyouga/EasyR1}},
year = {2025}
}
```
We recommend to also cite the original work.
```bibtex
@article{sheng2024hybridflow,
title = {HybridFlow: A Flexible and Efficient RLHF Framework},
author = {Guangming Sheng and Chi Zhang and Zilingfeng Ye and Xibin Wu and Wang Zhang and Ru Zhang and Yanghua Peng and Haibin Lin and Chuan Wu},
year = {2024},
journal = {arXiv preprint arXiv: 2409.19256}
}
```
# Start from the NVIDIA official image (ubuntu-22.04 + python-3.10)
# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-24-08.html
FROM nvcr.io/nvidia/pytorch:24.08-py3
# Define environments
ENV MAX_JOBS=32
ENV VLLM_WORKER_MULTIPROC_METHOD=spawn
ENV DEBIAN_FRONTEND=noninteractive
ENV NODE_OPTIONS=""
# Define installation arguments
ARG APT_SOURCE=https://mirrors.tuna.tsinghua.edu.cn/ubuntu/
ARG PIP_INDEX=https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
# Set apt source
RUN cp /etc/apt/sources.list /etc/apt/sources.list.bak && \
{ \
echo "deb ${APT_SOURCE} jammy main restricted universe multiverse"; \
echo "deb ${APT_SOURCE} jammy-updates main restricted universe multiverse"; \
echo "deb ${APT_SOURCE} jammy-backports main restricted universe multiverse"; \
echo "deb ${APT_SOURCE} jammy-security main restricted universe multiverse"; \
} > /etc/apt/sources.list
# Install systemctl
RUN apt-get update && \
apt-get install -y -o Dpkg::Options::="--force-confdef" systemd && \
apt-get clean
# Install tini
RUN apt-get update && \
apt-get install -y tini && \
apt-get clean
# Change pip source
RUN pip config set global.index-url "${PIP_INDEX}" && \
pip config set global.extra-index-url "${PIP_INDEX}" && \
python -m pip install --upgrade pip
# Install torch-2.5.1 + vllm-0.7.3
RUN pip install --no-cache-dir vllm==0.7.3 torch==2.5.1 torchvision==0.20.1 torchaudio==2.5.1 tensordict \
transformers>=4.49.0 accelerate datasets peft \
ray codetiming hydra-core pandas pyarrow>=15.0.0 pylatexenc qwen-vl-utils
# Install flash_attn-2.7.4.post1
RUN pip uninstall -y transformer-engine flash-attn && \
wget -nv https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.5cxx11abiFALSE-cp310-cp310-linux_x86_64.whl && \
pip install --no-cache-dir flash_attn-2.7.4.post1+cu12torch2.5cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
data:
train_files: hiyouga/math12k@train
val_files: hiyouga/math12k@test
prompt_key: problem
max_prompt_length: 1024
max_response_length: 1024
rollout_batch_size: 512
shuffle: true
seed: 1
max_pixels: 4194304
min_pixels: 262144
algorithm:
adv_estimator: grpo
kl_coef: 0.0
worker:
actor:
global_batch_size: 128
micro_batch_size_per_device_for_update: 1
micro_batch_size_per_device_for_experience: 2
max_grad_norm: 1.0
use_kl_loss: true
kl_loss_coef: 1.0e-3
kl_loss_type: low_var_kl
model:
model_path: Qwen/Qwen2.5-7B-Instruct
enable_gradient_checkpointing: true
optim:
lr: 1.0e-6
weight_decay: 1.0e-2
fsdp:
param_offload: false
optimizer_offload: false
torch_dtype: null
offload:
param_offload: true
optimizer_offload: true
rollout:
temperature: 1.0
tensor_parallel_size: 2
gpu_memory_utilization: 0.6
n: 5
enable_chunked_prefill: true
ref:
offload:
param_offload: true
reward:
reward_type: function
compute_score: math
trainer:
total_episodes: 15
logger: ["console", "wandb"]
project_name: easy_r1
experiment_name: qwen2_5_7b_math
n_gpus_per_node: 8
nnodes: 1
save_freq: 5
test_freq: 5
val_before_train: true
val_only: false
save_checkpoint_path: null
data:
train_files: hiyouga/math12k@train
val_files: hiyouga/math12k@test
prompt_key: problem
max_prompt_length: 1024
max_response_length: 1024
rollout_batch_size: 512
shuffle: true
seed: 1
max_pixels: 4194304
min_pixels: 262144
algorithm:
adv_estimator: remax
kl_coef: 0.0
worker:
actor:
global_batch_size: 128
micro_batch_size_per_device_for_update: 1
micro_batch_size_per_device_for_experience: 2
max_grad_norm: 1.0
use_kl_loss: true
kl_loss_coef: 1.0e-3
kl_loss_type: low_var_kl
model:
model_path: Qwen/Qwen2.5-7B-Instruct
enable_gradient_checkpointing: true
optim:
lr: 1.0e-6
weight_decay: 1.0e-2
fsdp:
param_offload: false
optimizer_offload: false
torch_dtype: null
offload:
param_offload: true
optimizer_offload: true
rollout:
temperature: 1.0
tensor_parallel_size: 2
gpu_memory_utilization: 0.6
n: 5
enable_chunked_prefill: true
ref:
offload:
param_offload: true
reward:
reward_type: function
compute_score: math
trainer:
total_episodes: 15
logger: ["console", "wandb"]
project_name: easy_r1
experiment_name: qwen2_5_7b_remax_math
n_gpus_per_node: 8
nnodes: 1
save_freq: 5
test_freq: 5
val_before_train: true
val_only: false
save_checkpoint_path: null
set -x
export VLLM_ATTENTION_BACKEND=XFORMERS
MODEL_PATH=Qwen/Qwen2.5-7B-Instruct # replace it with your local file path
python3 -m verl.trainer.main \
config=examples/grpo_example.yaml \
worker.actor.model.model_path=${MODEL_PATH} \
trainer.n_gpus_per_node=4
set -x
export VLLM_ATTENTION_BACKEND=XFORMERS
MODEL_PATH=Qwen/Qwen2.5-7B-Instruct # replace it with your local file path
python3 -m verl.trainer.main \
config=examples/grpo_example.yaml \
worker.actor.model.model_path=${MODEL_PATH} \
trainer.logger=['console','swanlab'] \
trainer.n_gpus_per_node=4
set -x
export VLLM_ATTENTION_BACKEND=XFORMERS
MODEL_PATH=Qwen/Qwen2.5-VL-3B-Instruct # replace it with your local file path
SYSTEM_PROMPT="""A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant
first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning
process and answer are enclosed within <think> </think> and <answer> </answer> tags, respectively, i.e.,
<think> reasoning process here </think><answer> answer here </answer>"""
python3 -m verl.trainer.main \
config=examples/grpo_example.yaml \
data.train_files=BUAADreamer/clevr_count_70k@train \
data.val_files=BUAADreamer/clevr_count_70k@test \
data.system_prompt="${SYSTEM_PROMPT}" \
worker.actor.model.model_path=${MODEL_PATH} \
worker.rollout.tensor_parallel_size=1 \
worker.rollout.enable_chunked_prefill=false \
worker.reward.compute_score=r1v \
trainer.experiment_name=qwen2_5_vl_3b_clevr \
trainer.n_gpus_per_node=2
set -x
export VLLM_ATTENTION_BACKEND=XFORMERS
MODEL_PATH=Qwen/Qwen2.5-VL-3B-Instruct # replace it with your local file path
python3 -m verl.trainer.main \
config=examples/grpo_example.yaml \
data.train_files=hiyouga/geometry3k@train \
data.val_files=hiyouga/geometry3k@test \
worker.actor.model.model_path=${MODEL_PATH} \
worker.rollout.tensor_parallel_size=1 \
worker.rollout.enable_chunked_prefill=false \
trainer.experiment_name=qwen2_5_vl_3b_geo \
trainer.n_gpus_per_node=2
set -x
export VLLM_ATTENTION_BACKEND=XFORMERS
MODEL_PATH=Qwen/Qwen2.5-VL-7B-Instruct # replace it with your local file path
python3 -m verl.trainer.main \
config=examples/grpo_example.yaml \
data.train_files=hiyouga/geometry3k@train \
data.val_files=hiyouga/geometry3k@test \
worker.actor.model.model_path=${MODEL_PATH} \
worker.rollout.enable_chunked_prefill=false \
trainer.experiment_name=qwen2_5_vl_7b_geo \
trainer.n_gpus_per_node=4
set -x
export VLLM_ATTENTION_BACKEND=XFORMERS
MODEL_PATH=Qwen/Qwen2.5-VL-7B-Instruct # replace it with your local file path
python3 -m verl.trainer.main \
config=examples/grpo_example.yaml \
data.train_files=hiyouga/geometry3k@train \
data.val_files=hiyouga/geometry3k@test \
worker.actor.model.model_path=${MODEL_PATH} \
worker.rollout.enable_chunked_prefill=false \
trainer.experiment_name=qwen2_5_vl_7b_geo \
trainer.logger=['console','swanlab'] \
trainer.n_gpus_per_node=4
working_dir: ./
excludes: ["/.git/"]
env_vars:
TORCH_NCCL_AVOID_RECORD_STREAMS: "1"
VLLM_ATTENTION_BACKEND: "XFORMERS"
[build-system]
requires = ["setuptools>=61.0"]
build-backend = "setuptools.build_meta"
[project]
name = "verl"
dynamic = ["version", "dependencies", "optional-dependencies", "readme", "license"]
requires-python = ">=3.8"
[tool.ruff]
target-version = "py38"
line-length = 119
indent-width = 4
[tool.ruff.lint]
ignore = ["C901", "E501", "E741", "W605", "C408"]
select = ["C", "E", "F", "I", "W"]
[tool.ruff.lint.per-file-ignores]
"__init__.py" = ["E402", "F401", "F403", "F811"]
[tool.ruff.lint.isort]
lines-after-imports = 2
known-first-party = ["verl"]
known-third-party = ["torch", "transformers", "wandb"]
[tool.ruff.format]
quote-style = "double"
indent-style = "space"
skip-magic-trailing-comma = false
line-ending = "auto"
# Copyright 2024 Bytedance Ltd. and/or its affiliates
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import os
import re
from concurrent.futures import ThreadPoolExecutor
from typing import Dict, List, Tuple
import torch
from torch.distributed._tensor import DTensor, Placement, Shard
from transformers import AutoConfig, AutoModelForCausalLM, AutoModelForTokenClassification, AutoModelForVision2Seq
def merge_by_placement(tensors: List[torch.Tensor], placement: Placement):
if placement.is_replicate():
return tensors[0]
elif placement.is_partial():
raise NotImplementedError("Partial placement is not supported yet")
elif placement.is_shard():
return torch.cat(tensors, dim=placement.dim).contiguous()
else:
raise ValueError(f"Unsupported placement: {placement}")
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--local_dir", required=True, type=str, help="The path for your saved model")
parser.add_argument("--hf_upload_path", default=False, type=str, help="The path of the huggingface repo to upload")
args = parser.parse_args()
assert not args.local_dir.endswith("huggingface"), "The local_dir should not end with huggingface"
local_dir = args.local_dir
# copy rank zero to find the shape of (dp, fsdp)
rank = 0
world_size = 0
for filename in os.listdir(local_dir):
match = re.match(r"model_world_size_(\d+)_rank_0\.pt", filename)
if match:
world_size = match.group(1)
break
assert world_size, "No model file with the proper format"
state_dict = torch.load(
os.path.join(local_dir, f"model_world_size_{world_size}_rank_{rank}.pt"), map_location="cpu"
)
pivot_key = sorted(state_dict.keys())[0]
weight = state_dict[pivot_key]
assert isinstance(weight, torch.distributed._tensor.DTensor)
# get sharding info
device_mesh = weight.device_mesh
mesh = device_mesh.mesh
mesh_dim_names = device_mesh.mesh_dim_names
print(f"Got device mesh {mesh}, mesh_dim_names {mesh_dim_names}")
assert mesh_dim_names in (("fsdp",),), f"Unsupported mesh_dim_names {mesh_dim_names}"
if "tp" in mesh_dim_names:
# fsdp * tp
total_shards = mesh.shape[-1] * mesh.shape[-2]
mesh_shape = (mesh.shape[-2], mesh.shape[-1])
else:
# fsdp
total_shards = mesh.shape[-1]
mesh_shape = (mesh.shape[-1],)
print(f"Processing model shards with {total_shards} {mesh_shape} in total")
model_state_dict_lst = []
model_state_dict_lst.append(state_dict)
model_state_dict_lst.extend([""] * (total_shards - 1))
def process_one_shard(rank):
model_path = os.path.join(local_dir, f"model_world_size_{world_size}_rank_{rank}.pt")
state_dict = torch.load(model_path, map_location="cpu", weights_only=False)
model_state_dict_lst[rank] = state_dict
return state_dict
with ThreadPoolExecutor(max_workers=min(32, os.cpu_count())) as executor:
for rank in range(1, total_shards):
executor.submit(process_one_shard, rank)
state_dict = {}
param_placements: Dict[str, List[Placement]] = {}
keys = set(model_state_dict_lst[0].keys())
for key in keys:
state_dict[key] = []
for model_state_dict in model_state_dict_lst:
try:
tensor = model_state_dict.pop(key)
except Exception:
print("-" * 30)
print(model_state_dict)
if isinstance(tensor, DTensor):
state_dict[key].append(tensor._local_tensor.bfloat16())
placements = tuple(tensor.placements)
# replicated placement at dp dimension can be discarded
if mesh_dim_names[0] == "dp":
placements = placements[1:]
if key not in param_placements:
param_placements[key] = placements
else:
assert param_placements[key] == placements
else:
state_dict[key] = tensor.bfloat16()
del model_state_dict_lst
for key in sorted(state_dict):
if not isinstance(state_dict[key], list):
print(f"No need to merge key {key}")
continue
# merge shards
placements: Tuple[Shard] = param_placements[key]
if len(mesh_shape) == 1:
# 1-D list, FSDP without TP
assert len(placements) == 1
shards = state_dict[key]
state_dict[key] = merge_by_placement(shards, placements[0])
else:
# 2-D list, FSDP + TP
raise NotImplementedError("FSDP + TP is not supported yet")
print("Writing to local disk")
hf_path = os.path.join(local_dir, "huggingface")
config = AutoConfig.from_pretrained(hf_path)
if "ForTokenClassification" in config.architectures[0]:
auto_model = AutoModelForTokenClassification
elif "ForCausalLM" in config.architectures[0]:
auto_model = AutoModelForCausalLM
elif "ForConditionalGeneration" in config.architectures[0]:
auto_model = AutoModelForVision2Seq
else:
raise NotImplementedError(f"Unknown architecture {config.architectures}")
with torch.device("meta"):
model = auto_model.from_config(config, torch_dtype=torch.bfloat16)
model.to_empty(device="cpu")
print(f"Saving model to {hf_path}")
model.save_pretrained(hf_path, state_dict=state_dict)
del state_dict
del model
if args.hf_upload_path:
# Push to hugging face
from huggingface_hub import HfApi
api = HfApi()
api.create_repo(repo_id=args.hf_upload_path, private=False, exist_ok=True)
api.upload_folder(folder_path=hf_path, repo_id=args.hf_upload_path, repo_type="model")
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment