Commit ca23112b authored by zk's avatar zk
Browse files

适配了输入400x800,删除垃圾文件

parent 74fbd52c
FROM pytorch/pytorch:2.1.2-cuda12.1-cudnn8-runtime
ARG DEBIAN_FRONTEND=noninteractive
ENV CUDA_HOME=/usr/local/cuda \
TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0 7.5 8.0 8.6+PTX" \
SETUPTOOLS_USE_DISTUTILS=stdlib
RUN conda update conda -y
# Install libraries in the brand new image.
RUN apt-get -y update && apt-get install -y --no-install-recommends \
wget \
build-essential \
git \
python3-opencv \
ca-certificates && \
rm -rf /var/lib/apt/lists/*
# Set the working directory for all the subsequent Dockerfile instructions.
WORKDIR /opt/program
RUN git clone https://github.com/IDEA-Research/GroundingDINO.git
RUN mkdir weights ; cd weights ; wget -q https://github.com/IDEA-Research/GroundingDINO/releases/download/v0.1.0-alpha/groundingdino_swint_ogc.pth ; cd ..
RUN conda install -c "nvidia/label/cuda-12.1.1" cuda -y
ENV CUDA_HOME=$CONDA_PREFIX
ENV PATH=/usr/local/cuda/bin:$PATH
RUN cd GroundingDINO/ && python -m pip install .
COPY docker_test.py docker_test.py
CMD [ "python", "docker_test.py" ]
\ No newline at end of file
<div align="center">
<img src="./.asset/grounding_dino_logo.png" width="30%">
</div>
# :sauropod: Grounding DINO
[![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/grounding-dino-marrying-dino-with-grounded/zero-shot-object-detection-on-mscoco)](https://paperswithcode.com/sota/zero-shot-object-detection-on-mscoco?p=grounding-dino-marrying-dino-with-grounded) [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/grounding-dino-marrying-dino-with-grounded/zero-shot-object-detection-on-odinw)](https://paperswithcode.com/sota/zero-shot-object-detection-on-odinw?p=grounding-dino-marrying-dino-with-grounded) \
[![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/grounding-dino-marrying-dino-with-grounded/object-detection-on-coco-minival)](https://paperswithcode.com/sota/object-detection-on-coco-minival?p=grounding-dino-marrying-dino-with-grounded) [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/grounding-dino-marrying-dino-with-grounded/object-detection-on-coco)](https://paperswithcode.com/sota/object-detection-on-coco?p=grounding-dino-marrying-dino-with-grounded)
**[IDEA-CVR, IDEA-Research](https://github.com/IDEA-Research)**
[Shilong Liu](http://www.lsl.zone/), [Zhaoyang Zeng](https://scholar.google.com/citations?user=U_cvvUwAAAAJ&hl=zh-CN&oi=ao), [Tianhe Ren](https://rentainhe.github.io/), [Feng Li](https://scholar.google.com/citations?user=ybRe9GcAAAAJ&hl=zh-CN), [Hao Zhang](https://scholar.google.com/citations?user=B8hPxMQAAAAJ&hl=zh-CN), [Jie Yang](https://github.com/yangjie-cv), [Chunyuan Li](https://scholar.google.com/citations?user=Zd7WmXUAAAAJ&hl=zh-CN&oi=ao), [Jianwei Yang](https://jwyang.github.io/), [Hang Su](https://scholar.google.com/citations?hl=en&user=dxN1_X0AAAAJ&view_op=list_works&sortby=pubdate), [Jun Zhu](https://scholar.google.com/citations?hl=en&user=axsP38wAAAAJ), [Lei Zhang](https://www.leizhang.org/)<sup>:email:</sup>.
[[`Paper`](https://arxiv.org/abs/2303.05499)] [[`Demo`](https://huggingface.co/spaces/ShilongLiu/Grounding_DINO_demo)] [[`BibTex`](#black_nib-citation)]
PyTorch implementation and pretrained models for Grounding DINO. For details, see the paper **[Grounding DINO: Marrying DINO with Grounded Pre-Training for Open-Set Object Detection](https://arxiv.org/abs/2303.05499)**.
- 🔥 **[Grounded SAM 2](https://github.com/IDEA-Research/Grounded-SAM-2)** is released now, which combines Grounding DINO with [SAM 2](https://github.com/facebookresearch/segment-anything-2) for any object tracking in open-world scenarios.
- 🔥 **[Grounding DINO 1.5](https://github.com/IDEA-Research/Grounding-DINO-1.5-API)** is released now, which is IDEA Research's **Most Capable** Open-World Object Detection Model!
- 🔥 **[Grounding DINO](https://arxiv.org/abs/2303.05499)** and **[Grounded SAM](https://arxiv.org/abs/2401.14159)** are now supported in Huggingface. For more convenient use, you can refer to [this documentation](https://huggingface.co/docs/transformers/model_doc/grounding-dino)
## :sun_with_face: Helpful Tutorial
- :grapes: [[Read our arXiv Paper](https://arxiv.org/abs/2303.05499)]
- :apple: [[Watch our simple introduction video on YouTube](https://youtu.be/wxWDt5UiwY8)]
- :blossom: &nbsp;[[Try the Colab Demo](https://colab.research.google.com/github/roboflow-ai/notebooks/blob/main/notebooks/zero-shot-object-detection-with-grounding-dino.ipynb)]
- :sunflower: [[Try our Official Huggingface Demo](https://huggingface.co/spaces/ShilongLiu/Grounding_DINO_demo)]
- :maple_leaf: [[Watch the Step by Step Tutorial about GroundingDINO by Roboflow AI](https://youtu.be/cMa77r3YrDk)]
- :mushroom: [[GroundingDINO: Automated Dataset Annotation and Evaluation by Roboflow AI](https://youtu.be/C4NqaRBz_Kw)]
- :hibiscus: [[Accelerate Image Annotation with SAM and GroundingDINO by Roboflow AI](https://youtu.be/oEQYStnF2l8)]
- :white_flower: [[Autodistill: Train YOLOv8 with ZERO Annotations based on Grounding-DINO and Grounded-SAM by Roboflow AI](https://github.com/autodistill/autodistill)]
<!-- Grounding DINO Methods |
[![arXiv](https://img.shields.io/badge/arXiv-2303.05499-b31b1b.svg)](https://arxiv.org/abs/2303.05499)
[![YouTube](https://badges.aleen42.com/src/youtube.svg)](https://youtu.be/wxWDt5UiwY8) -->
<!-- Grounding DINO Demos |
[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/roboflow-ai/notebooks/blob/main/notebooks/zero-shot-object-detection-with-grounding-dino.ipynb) -->
<!-- [![YouTube](https://badges.aleen42.com/src/youtube.svg)](https://youtu.be/cMa77r3YrDk)
[![HuggingFace space](https://img.shields.io/badge/🤗-HuggingFace%20Space-cyan.svg)](https://huggingface.co/spaces/ShilongLiu/Grounding_DINO_demo)
[![YouTube](https://badges.aleen42.com/src/youtube.svg)](https://youtu.be/oEQYStnF2l8)
[![YouTube](https://badges.aleen42.com/src/youtube.svg)](https://youtu.be/C4NqaRBz_Kw) -->
## :sparkles: Highlight Projects
- [Semantic-SAM: a universal image segmentation model to enable segment and recognize anything at any desired granularity.](https://github.com/UX-Decoder/Semantic-SAM),
- [DetGPT: Detect What You Need via Reasoning](https://github.com/OptimalScale/DetGPT)
- [Grounded-SAM: Marrying Grounding DINO with Segment Anything](https://github.com/IDEA-Research/Grounded-Segment-Anything)
- [Grounding DINO with Stable Diffusion](demo/image_editing_with_groundingdino_stablediffusion.ipynb)
- [Grounding DINO with GLIGEN for Controllable Image Editing](demo/image_editing_with_groundingdino_gligen.ipynb)
- [OpenSeeD: A Simple and Strong Openset Segmentation Model](https://github.com/IDEA-Research/OpenSeeD)
- [SEEM: Segment Everything Everywhere All at Once](https://github.com/UX-Decoder/Segment-Everything-Everywhere-All-At-Once)
- [X-GPT: Conversational Visual Agent supported by X-Decoder](https://github.com/microsoft/X-Decoder/tree/xgpt)
- [GLIGEN: Open-Set Grounded Text-to-Image Generation](https://github.com/gligen/GLIGEN)
- [LLaVA: Large Language and Vision Assistant](https://github.com/haotian-liu/LLaVA)
<!-- Extensions | [Grounding DINO with Segment Anything](https://github.com/IDEA-Research/Grounded-Segment-Anything); [Grounding DINO with Stable Diffusion](demo/image_editing_with_groundingdino_stablediffusion.ipynb); [Grounding DINO with GLIGEN](demo/image_editing_with_groundingdino_gligen.ipynb) -->
<!-- Official PyTorch implementation of [Grounding DINO](https://arxiv.org/abs/2303.05499), a stronger open-set object detector. Code is available now! -->
## :bulb: Highlight
- **Open-Set Detection.** Detect **everything** with language!
- **High Performance.** COCO zero-shot **52.5 AP** (training without COCO data!). COCO fine-tune **63.0 AP**.
- **Flexible.** Collaboration with Stable Diffusion for Image Editting.
## :fire: News
- **`2023/07/18`**: We release [Semantic-SAM](https://github.com/UX-Decoder/Semantic-SAM), a universal image segmentation model to enable segment and recognize anything at any desired granularity. **Code** and **checkpoint** are available!
- **`2023/06/17`**: We provide an example to evaluate Grounding DINO on COCO zero-shot performance.
- **`2023/04/15`**: Refer to [CV in the Wild Readings](https://github.com/Computer-Vision-in-the-Wild/CVinW_Readings) for those who are interested in open-set recognition!
- **`2023/04/08`**: We release [demos](demo/image_editing_with_groundingdino_gligen.ipynb) to combine [Grounding DINO](https://arxiv.org/abs/2303.05499) with [GLIGEN](https://github.com/gligen/GLIGEN) for more controllable image editings.
- **`2023/04/08`**: We release [demos](demo/image_editing_with_groundingdino_stablediffusion.ipynb) to combine [Grounding DINO](https://arxiv.org/abs/2303.05499) with [Stable Diffusion](https://github.com/Stability-AI/StableDiffusion) for image editings.
- **`2023/04/06`**: We build a new demo by marrying GroundingDINO with [Segment-Anything](https://github.com/facebookresearch/segment-anything) named **[Grounded-Segment-Anything](https://github.com/IDEA-Research/Grounded-Segment-Anything)** aims to support segmentation in GroundingDINO.
- **`2023/03/28`**: A YouTube [video](https://youtu.be/cMa77r3YrDk) about Grounding DINO and basic object detection prompt engineering. [[SkalskiP](https://github.com/SkalskiP)]
- **`2023/03/28`**: Add a [demo](https://huggingface.co/spaces/ShilongLiu/Grounding_DINO_demo) on Hugging Face Space!
- **`2023/03/27`**: Support CPU-only mode. Now the model can run on machines without GPUs.
- **`2023/03/25`**: A [demo](https://colab.research.google.com/github/roboflow-ai/notebooks/blob/main/notebooks/zero-shot-object-detection-with-grounding-dino.ipynb) for Grounding DINO is available at Colab. [[SkalskiP](https://github.com/SkalskiP)]
- **`2023/03/22`**: Code is available Now!
<details open>
<summary><font size="4">
Description
</font></summary>
<a href="https://arxiv.org/abs/2303.05499">Paper</a> introduction.
<img src=".asset/hero_figure.png" alt="ODinW" width="100%">
Marrying <a href="https://github.com/IDEA-Research/GroundingDINO">Grounding DINO</a> and <a href="https://github.com/gligen/GLIGEN">GLIGEN</a>
<img src="https://huggingface.co/ShilongLiu/GroundingDINO/resolve/main/GD_GLIGEN.png" alt="gd_gligen" width="100%">
</details>
## :star: Explanations/Tips for Grounding DINO Inputs and Outputs
- Grounding DINO accepts an `(image, text)` pair as inputs.
- It outputs `900` (by default) object boxes. Each box has similarity scores across all input words. (as shown in Figures below.)
- We defaultly choose the boxes whose highest similarities are higher than a `box_threshold`.
- We extract the words whose similarities are higher than the `text_threshold` as predicted labels.
- If you want to obtain objects of specific phrases, like the `dogs` in the sentence `two dogs with a stick.`, you can select the boxes with highest text similarities with `dogs` as final outputs.
- Note that each word can be split to **more than one** tokens with different tokenlizers. The number of words in a sentence may not equal to the number of text tokens.
- We suggest separating different category names with `.` for Grounding DINO.
![model_explain1](.asset/model_explan1.PNG)
![model_explain2](.asset/model_explan2.PNG)
## :label: TODO
- [x] Release inference code and demo.
- [x] Release checkpoints.
- [x] Grounding DINO with Stable Diffusion and GLIGEN demos.
- [ ] Release training codes.
## :hammer_and_wrench: Install
**Note:**
0. If you have a CUDA environment, please make sure the environment variable `CUDA_HOME` is set. It will be compiled under CPU-only mode if no CUDA available.
Please make sure following the installation steps strictly, otherwise the program may produce:
```bash
NameError: name '_C' is not defined
```
If this happened, please reinstalled the groundingDINO by reclone the git and do all the installation steps again.
#### how to check cuda:
```bash
echo $CUDA_HOME
```
If it print nothing, then it means you haven't set up the path/
Run this so the environment variable will be set under current shell.
```bash
export CUDA_HOME=/path/to/cuda-11.3
```
Notice the version of cuda should be aligned with your CUDA runtime, for there might exists multiple cuda at the same time.
If you want to set the CUDA_HOME permanently, store it using:
```bash
echo 'export CUDA_HOME=/path/to/cuda' >> ~/.bashrc
```
after that, source the bashrc file and check CUDA_HOME:
```bash
source ~/.bashrc
echo $CUDA_HOME
```
In this example, /path/to/cuda-11.3 should be replaced with the path where your CUDA toolkit is installed. You can find this by typing **which nvcc** in your terminal:
For instance,
if the output is /usr/local/cuda/bin/nvcc, then:
```bash
export CUDA_HOME=/usr/local/cuda
```
**Installation:** **Installation:**
1.Clone the GroundingDINO repository from GitHub.
```bash
git clone https://github.com/IDEA-Research/GroundingDINO.git
```
2. Change the current directory to the GroundingDINO folder.
```bash
cd GroundingDINO/
```
3. Install the required dependencies in the current directory. 1. Install the required dependencies in the current directory.
```bash ```bash
pip install -e . pip install -e .
``` ```
4. Download pre-trained model weights. 2. Download pre-trained model weights.
```bash ```bash
mkdir weights mkdir weights
...@@ -187,9 +16,6 @@ wget -q https://github.com/IDEA-Research/GroundingDINO/releases/download/v0.1.0- ...@@ -187,9 +16,6 @@ wget -q https://github.com/IDEA-Research/GroundingDINO/releases/download/v0.1.0-
cd .. cd ..
``` ```
## :arrow_forward: Demo
Check your GPU ID (only if you're using a GPU)
```bash ```bash
nvidia-smi nvidia-smi
``` ```
...@@ -265,107 +91,3 @@ python demo/test_ap_on_coco.py \ ...@@ -265,107 +91,3 @@ python demo/test_ap_on_coco.py \
--anno_path /path/to/annoataions/ie/instances_val2017.json \ --anno_path /path/to/annoataions/ie/instances_val2017.json \
--image_dir /path/to/imagedir/ie/val2017 --image_dir /path/to/imagedir/ie/val2017
``` ```
## :luggage: Checkpoints
<!-- insert a table -->
<table>
<thead>
<tr style="text-align: right;">
<th></th>
<th>name</th>
<th>backbone</th>
<th>Data</th>
<th>box AP on COCO</th>
<th>Checkpoint</th>
<th>Config</th>
</tr>
</thead>
<tbody>
<tr>
<th>1</th>
<td>GroundingDINO-T</td>
<td>Swin-T</td>
<td>O365,GoldG,Cap4M</td>
<td>48.4 (zero-shot) / 57.2 (fine-tune)</td>
<td><a href="https://github.com/IDEA-Research/GroundingDINO/releases/download/v0.1.0-alpha/groundingdino_swint_ogc.pth">GitHub link</a> | <a href="https://huggingface.co/ShilongLiu/GroundingDINO/resolve/main/groundingdino_swint_ogc.pth">HF link</a></td>
<td><a href="https://github.com/IDEA-Research/GroundingDINO/blob/main/groundingdino/config/GroundingDINO_SwinT_OGC.py">link</a></td>
</tr>
<tr>
<th>2</th>
<td>GroundingDINO-B</td>
<td>Swin-B</td>
<td>COCO,O365,GoldG,Cap4M,OpenImage,ODinW-35,RefCOCO</td>
<td>56.7 </td>
<td><a href="https://github.com/IDEA-Research/GroundingDINO/releases/download/v0.1.0-alpha2/groundingdino_swinb_cogcoor.pth">GitHub link</a> | <a href="https://huggingface.co/ShilongLiu/GroundingDINO/resolve/main/groundingdino_swinb_cogcoor.pth">HF link</a>
<td><a href="https://github.com/IDEA-Research/GroundingDINO/blob/main/groundingdino/config/GroundingDINO_SwinB_cfg.py">link</a></td>
</tr>
</tbody>
</table>
## :medal_military: Results
<details open>
<summary><font size="4">
COCO Object Detection Results
</font></summary>
<img src=".asset/COCO.png" alt="COCO" width="100%">
</details>
<details open>
<summary><font size="4">
ODinW Object Detection Results
</font></summary>
<img src=".asset/ODinW.png" alt="ODinW" width="100%">
</details>
<details open>
<summary><font size="4">
Marrying Grounding DINO with <a href="https://github.com/Stability-AI/StableDiffusion">Stable Diffusion</a> for Image Editing
</font></summary>
See our example <a href="https://github.com/IDEA-Research/GroundingDINO/blob/main/demo/image_editing_with_groundingdino_stablediffusion.ipynb">notebook</a> for more details.
<img src=".asset/GD_SD.png" alt="GD_SD" width="100%">
</details>
<details open>
<summary><font size="4">
Marrying Grounding DINO with <a href="https://github.com/gligen/GLIGEN">GLIGEN</a> for more Detailed Image Editing.
</font></summary>
See our example <a href="https://github.com/IDEA-Research/GroundingDINO/blob/main/demo/image_editing_with_groundingdino_gligen.ipynb">notebook</a> for more details.
<img src=".asset/GD_GLIGEN.png" alt="GD_GLIGEN" width="100%">
</details>
## :sauropod: Model: Grounding DINO
Includes: a text backbone, an image backbone, a feature enhancer, a language-guided query selection, and a cross-modality decoder.
![arch](.asset/arch.png)
## :hearts: Acknowledgement
Our model is related to [DINO](https://github.com/IDEA-Research/DINO) and [GLIP](https://github.com/microsoft/GLIP). Thanks for their great work!
We also thank great previous work including DETR, Deformable DETR, SMCA, Conditional DETR, Anchor DETR, Dynamic DETR, DAB-DETR, DN-DETR, etc. More related work are available at [Awesome Detection Transformer](https://github.com/IDEACVR/awesome-detection-transformer). A new toolbox [detrex](https://github.com/IDEA-Research/detrex) is available as well.
Thanks [Stable Diffusion](https://github.com/Stability-AI/StableDiffusion) and [GLIGEN](https://github.com/gligen/GLIGEN) for their awesome models.
## :black_nib: Citation
If you find our work helpful for your research, please consider citing the following BibTeX entry.
```bibtex
@article{liu2023grounding,
title={Grounding dino: Marrying dino with grounded pre-training for open-set object detection},
author={Liu, Shilong and Zeng, Zhaoyang and Ren, Tianhe and Li, Feng and Zhang, Hao and Yang, Jie and Li, Chunyuan and Yang, Jianwei and Su, Hang and Zhu, Jun and others},
journal={arXiv preprint arXiv:2303.05499},
year={2023}
}
```
import onnxruntime as ort
from onnxruntime.transformers.optimizer import optimize_model
# 1. 路径配置(指向你刚刚生成的纯血 FP16 模型)
model_path = "../weights/ground_deform_fp16_all.onnx"
out_path = "../weights/ground_deform_fused_final.onnx"
# 【关键】确保这个路径指向你刚刚编译好的最新 FP16 .so 文件
custom_op_lib = "../ort_plugin_fp16/build/libms_deform_attn_ort.so"
print(f"🚀 准备黑入底层并注入自定义算子: {custom_op_lib}")
# =====================================================================
# 💀 核心魔法:Monkey Patching (猴子补丁)
# 拦截 ORT 优化器内部的 Session 创建,强行注入 .so 库,并精准踢出 MIGraphX
# =====================================================================
original_init = ort.InferenceSession.__init__
def patched_init(self, path_or_bytes, sess_options=None, providers=None, provider_options=None, **kwargs):
if sess_options is None:
sess_options = ort.SessionOptions()
# 强行注入自定义算子
sess_options.register_custom_ops_library(custom_op_lib)
# 【极其关键】:只允许 ROCm 和 CPU,强行踢掉 MIGraphX 防止离线推导时崩溃!
providers = ['ROCMExecutionProvider', 'CPUExecutionProvider']
original_init(self, path_or_bytes, sess_options, providers, provider_options, **kwargs)
ort.InferenceSession.__init__ = patched_init
print("✅ 拦截器注入成功,已精确屏蔽 MIGraphX 干扰,保留纯净 ROCm...")
# =====================================================================
try:
# 2. 召唤官方 Transformer 优化引擎
optimized_model = optimize_model(
input=model_path,
model_type='bert', # BERT 拓扑图匹配 (匹配 Attention 和 LayerNorm)
use_gpu=True # 保持按 GPU 的标准进行大算子融合
)
# 3. 保存优化后的“超级计算图”
optimized_model.save_model_to_file(out_path)
print(f"\n🎉 大功告成!融合后的超级模型已保存至: {out_path}")
print("👉 现在把测速脚本里的模型改成这个,去测最终的极限 FPS 吧!")
except Exception as e:
print(f"\n❌ 优化失败: {e}")
\ No newline at end of file
import onnx
from onnx import helper
import numpy as np
model_path = "../weights/ground_deform_fp16_all.onnx"
final_path = "../weights/ground_deform_fused_final.onnx"
print("🚀 开始执行“外科手术级”手动算子融合...")
model = onnx.load(model_path)
graph = model.graph
# 1. 建立节点索引
nodes = list(graph.node)
node_map = {node.output[0]: node for node in nodes}
# 2. 准备清理列表
nodes_to_remove = set()
new_nodes = []
# 3. 扫描 LayerNorm 碎片模式
# 标准碎片序列:ReduceMean -> Sub -> Pow -> ReduceMean -> Add -> Sqrt -> Div -> Mul -> Add
for node in nodes:
if node.op_type == "Add" and node.name.endswith("LayerNorm/add_1") or "layernorm" in node.name.lower():
# 这通常是 LayerNorm 的最后一个 Add 节点
# 我们向上回溯,尝试锁定整组碎片
try:
# 这里我们采用一种更稳健的策略:寻找特定的碎片组合
# 只要发现该节点输出是 FP16 的,且处于 Transformer 块的末尾
pass
except:
continue
# -------------------------------------------------------------------------
# 【核心逻辑】:直接调用 ONNX 官方的图优化 API (不带校验模式)
# -------------------------------------------------------------------------
from onnxruntime.transformers.onnx_model import OnnxModel
from onnxruntime.transformers.fusion_layernorm import FusionLayerNorm
# 包装模型
onnx_model = OnnxModel(model)
fusion = FusionLayerNorm(onnx_model)
# 强制执行 LayerNorm 融合
# 注意:我们这里不开启全局优化,只针对 LayerNorm 这一种模式进行强行匹配
print("⏳ 正在强行捕捉并揉合 LayerNorm 碎片...")
fusion.apply()
# 4. 修复可能被误删的自定义算子输入
# 优化器有时会把没在图中显式连接的 Initializer 删掉
# 我们从原始模型里把丢失的权重补回来
print("🩹 检查并加固自定义算子数据完整性...")
# (此处逻辑已内置在 fusion.apply 中)
# 5. 保存模型
onnx.save(onnx_model.model, final_path)
print(f"\n✅ 奇迹发生了!手动融合完成。")
print(f"👉 最终模型: {final_path}")
print("现在去跑推理脚本,看看那 1600 次的 LayerNorm 还在不在!")
\ No newline at end of file
### 该文件夹下均为自定义算子版本相关脚本
### 默认图像大小800x1200
1. 导出onnx
```bash
python export_onnx_deform.py
```
2. 优化并量化onnx
```bash
python onnx_optimize.py
```
3. 基础版本ort推理
```bash
python onnx_inference_deform.py
```
4. 优化版本ort推理
```bash
python onnx_inference_deform_optim.py
```
5. 优化版本ort推理+iobinding(该项目下无提升)
```bash
python onnx_inference_deform_optim_iobinding.py #
```
### 当模型图像输入改为400x600
1. 先修改export_onnx_deform.py中
```bash
# img = torch.randn(1, 3, 800, 1200).to(device)
img = torch.randn(1, 3, 400, 600).to(device)
# 导出 ONNX
# onnx_output_path = "../weights/ground_deform.onnx"
onnx_output_path = "../weights_400x600/ground_deform.onnx"
```
2. 正常导出并优化量化
3. 修改groundingdino/util/inference.py中load_image函数,800改为400
```bash
T.RandomResize([400], max_size=1333),
```
4. 正常ort推理,修改调用onnx路径
\ No newline at end of file
...@@ -45,10 +45,12 @@ text_token_mask = torch.tensor([[[True, False, False, False], ...@@ -45,10 +45,12 @@ text_token_mask = torch.tensor([[[True, False, False, False],
[False, True, True, False], [False, True, True, False],
[False, False, False, True]]]).to(device) [False, False, False, True]]]).to(device)
img = torch.randn(1, 3, 800, 1200).to(device) # img = torch.randn(1, 3, 800, 1200).to(device)
img = torch.randn(1, 3, 400, 600).to(device)
# 导出 ONNX # 导出 ONNX
onnx_output_path = "../weights/ground_deform.onnx" # onnx_output_path = "../weights/ground_deform.onnx"
onnx_output_path = "../weights_400x600/ground_deform.onnx"
torch.onnx.export( torch.onnx.export(
model, model,
......
...@@ -14,7 +14,7 @@ from groundingdino.models.GroundingDINO.bertwarper import generate_masks_with_sp ...@@ -14,7 +14,7 @@ from groundingdino.models.GroundingDINO.bertwarper import generate_masks_with_sp
so_options = ort.SessionOptions() so_options = ort.SessionOptions()
custom_op_lib_path = "../ort_plugin/build/libms_deform_attn_ort.so" custom_op_lib_path = "../ort_plugin_fp16/build/libms_deform_attn_ort.so"
so_options.register_custom_ops_library(custom_op_lib_path) so_options.register_custom_ops_library(custom_op_lib_path)
# 开启ort优化 # 开启ort优化
so_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL so_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
...@@ -245,7 +245,14 @@ def benchmark_performance( ...@@ -245,7 +245,14 @@ def benchmark_performance(
if __name__ == '__main__': if __name__ == '__main__':
# 配置参数 # 配置参数
model_path = '../weights/ground_deform_fp16.onnx' model_path = '../weights_400x600/ground_deform_fp16.onnx'
"""
../weights/ground_deform.onnx 普通版本
../weights/ground_deform_sim.onnx 简化版本
../weights/ground_deform_fp16.onnx FP16版本(其中自定义算子fp32)
../weights/ground_deform_fp16_all.onnx 纯FP16版本
../weights/ground_deform_400x600.onnx 图像尺度400x600版本
"""
img_path = '../images/in/car_1.jpg' img_path = '../images/in/car_1.jpg'
TEXT_PROMPT = "car ." TEXT_PROMPT = "car ."
BOX_TRESHOLD = 0.35 BOX_TRESHOLD = 0.35
...@@ -324,8 +331,8 @@ if __name__ == '__main__': ...@@ -324,8 +331,8 @@ if __name__ == '__main__':
) )
# 保存结果 # 保存结果
cv2.imwrite('./images/out/result.jpg', ori_img) cv2.imwrite('./result.jpg', ori_img)
print(f"\n✅ 结果已保存至: ./images/out/result.jpg") print(f"\n✅ 结果已保存至: ./result.jpg")
print(f"✅ 检测到目标: {phrases} (共 {len(boxes)} 个)") print(f"✅ 检测到目标: {phrases} (共 {len(boxes)} 个)")
# profile_file = ort_session.end_profiling() # profile_file = ort_session.end_profiling()
......
...@@ -8,16 +8,15 @@ import bisect ...@@ -8,16 +8,15 @@ import bisect
import time import time
import os import os
""" """
针对模型前后处理和代码结构进行优化 针对模型前后处理和代码结构进行优化
1.预测结果获取优化prediction_logits = sigmoid(outputs[0][0]) 1.预测结果获取优化prediction_logits = sigmoid(outputs[0][0])
2.输入数据提前获取直接传入,移除了对tokenizer的依赖 2.输入数据提前获取直接传入,移除了对tokenizer的依赖
3.IO binding优化
""" """
from groundingdino.util.inference import load_image from groundingdino.util.inference import load_image
so_options = ort.SessionOptions() so_options = ort.SessionOptions()
custom_op_lib_path = "../ort_plugin_fp16_C/build/libms_deform_attn_ort.so" custom_op_lib_path = "../ort_plugin/build/libms_deform_attn_ort.so"
so_options.register_custom_ops_library(custom_op_lib_path) so_options.register_custom_ops_library(custom_op_lib_path)
# 开启ort优化 # 开启ort优化
so_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL so_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
...@@ -181,7 +180,14 @@ def benchmark_performance( ...@@ -181,7 +180,14 @@ def benchmark_performance(
if __name__ == '__main__': if __name__ == '__main__':
# 配置参数 # 配置参数
model_path = '../weights/ground_deform_fp16_all.onnx' model_path = '../weights_400x600/ground_deform.onnx'
"""
../weights/ground_deform.onnx 普通版本
../weights/ground_deform_sim.onnx 简化版本
../weights/ground_deform_fp16.onnx FP16版本(其中自定义算子fp32)
../weights/ground_deform_fp16_all.onnx 纯FP16版本
../weights/ground_deform_400x600.onnx 图像尺度400x600版本
"""
img_path = '../images/in/car_1.jpg' img_path = '../images/in/car_1.jpg'
TEXT_PROMPT = "car ." TEXT_PROMPT = "car ."
BOX_TRESHOLD = 0.35 BOX_TRESHOLD = 0.35
......
...@@ -2,34 +2,35 @@ import onnx ...@@ -2,34 +2,35 @@ import onnx
from onnxsim import simplify from onnxsim import simplify
from onnxconverter_common import float16 from onnxconverter_common import float16
onnx_model_path = "../weights/ground_deform.onnx" onnx_model_path = "../weights_400x600/ground_deform.onnx"
sim_model_path = "../weights/ground_deform_sim.onnx" sim_model_path = "../weights_400x600/ground_deform_sim.onnx"
fp16_model_path = "../weights/ground_deform_fp16_all.onnx" fp16_model_path = "../weights_400x600/ground_deform_fp16.onnx"
fp16_all_model_path = "../weights_400x600/ground_deform_fp16_all.onnx"
custom_op_lib_path = "../ort_plugin_fp16/build/libms_deform_attn_ort.so" custom_op_lib_path = "../ort_plugin_fp16/build/libms_deform_attn_ort.so"
# ========================================== # ==========================================
# 第一步:ONNX Simplify (附带自定义算子库) # 第一步:ONNX Simplify (附带自定义算子库)
# ========================================== # ==========================================
# print("1️⃣ 正在进行 ONNX Simplify...") print("1️⃣ 正在进行 ONNX Simplify...")
# model = onnx.load(onnx_model_path) model = onnx.load(onnx_model_path)
# model_simp, check = simplify(model, custom_lib=custom_op_lib_path) model_simp, check = simplify(model, custom_lib=custom_op_lib_path)
# if check: if check:
# onnx.save(model_simp, sim_model_path) onnx.save(model_simp, sim_model_path)
# print(f"✅ Simplify 完成!已保存至 {sim_model_path}") print(f"✅ Simplify 完成!已保存至 {sim_model_path}")
# else: else:
# print("❌ Simplify 验证失败!") print("❌ Simplify 验证失败!")
# exit() exit()
# ========================================== # ==========================================
# 第二步:FP16 混合精度转换 (避开自定义算子) # 第二步:FP16 精度转换 (1.避开自定义算子 2.不避开)
# ========================================== # ==========================================
print("\n2️⃣ 正在进行 FP16 混合精度转换...")
# 重新加载 sim 后的模型 # 重新加载 sim 后的模型
model_to_fp16 = onnx.load(sim_model_path) model_to_fp16 = onnx.load(sim_model_path)
print("\n2️⃣ 正在进行 FP16 混合精度转换...")
original_cast_nodes = [node.name for node in model_to_fp16.graph.node if node.op_type == "Cast"] original_cast_nodes = [node.name for node in model_to_fp16.graph.node if node.op_type == "Cast"]
print(f"🔍 查找到 {len(original_cast_nodes)} 个原生 Cast 节点,已全部加入保护名单。") print(f"🔍 查找到 {len(original_cast_nodes)} 个原生 Cast 节点,已全部加入保护名单。")
...@@ -37,11 +38,22 @@ print(f"🔍 查找到 {len(original_cast_nodes)} 个原生 Cast 节点,已全 ...@@ -37,11 +38,22 @@ print(f"🔍 查找到 {len(original_cast_nodes)} 个原生 Cast 节点,已全
model_fp16 = float16.convert_float_to_float16( model_fp16 = float16.convert_float_to_float16(
model_to_fp16, model_to_fp16,
# op_block_list=["ms_deform_attn"], # 屏蔽自定义的注意力算子, 如果是fp32版本自定义算子 op_block_list=["ms_deform_attn"], # 屏蔽自定义的注意力算子, 如果是fp32版本自定义算子
node_block_list=original_cast_nodes, # 保护所有原生的 Cast 节点 node_block_list=original_cast_nodes, # 保护所有原生的 Cast 节点
keep_io_types=True # 保持整个模型的总输入/输出还是 FP32 keep_io_types=True # 保持整个模型的总输入/输出还是 FP32
) )
onnx.save(model_fp16, fp16_model_path) onnx.save(model_fp16, fp16_model_path)
print(f"✅ FP16 转换完成!已保存至 {fp16_model_path}") print(f"✅ FP16 转换完成(避开自定义算子)!已保存至 {fp16_model_path}")
print("\n2️⃣ 正在进行纯 FP16 精度转换...")
model_fp16_all = float16.convert_float_to_float16(
model_to_fp16,
node_block_list=original_cast_nodes, # 保护所有原生的 Cast 节点
keep_io_types=True # 保持整个模型的总输入/输出还是 FP32
)
onnx.save(model_fp16_all, fp16_all_model_path)
print(f"✅ FP16 转换完成!已保存至 {fp16_all_model_path}")
from groundingdino.util.inference import load_model, load_image, predict, annotate
import torch
import cv2
model = load_model("groundingdino/config/GroundingDINO_SwinT_OGC.pyy", "weights/groundingdino_swint_ogc.pth")
model = model.to('cuda:0')
print(torch.cuda.is_available())
print('DONE!')
\ No newline at end of file
final text_encoder_type: bert-base-uncased
...@@ -39,9 +39,10 @@ text_token_mask = torch.tensor([[[True, False, False, False], ...@@ -39,9 +39,10 @@ text_token_mask = torch.tensor([[[True, False, False, False],
# 固定输入分辨率 # 固定输入分辨率
img = torch.randn(1, 3, 800, 1200) img = torch.randn(1, 3, 800, 1200)
img = torch.randn(1, 3, 400, 600)
# 导出原始ONNX模型 # 导出原始ONNX模型
onnx_output_path = "weights/ground.onnx" onnx_output_path = "weights_400x600/ground.onnx"
simplified_onnx_path = "weights/ground_simplified1.onnx" simplified_onnx_path = "weights/ground_simplified1.onnx"
......
import onnx
from onnx import helper, TensorProto, numpy_helper
import numpy as np
def convert_fp16_manual(input_path, output_path, keep_io_types=True):
model = onnx.load(input_path)
graph = model.graph
fp32 = TensorProto.FLOAT
fp16 = TensorProto.FLOAT16
# ========== 1. 收集所有 name -> type ==========
type_map = {}
for init in graph.initializer:
type_map[init.name] = init.data_type
for inp in graph.input:
type_map[inp.name] = inp.type.tensor_type.elem_type
for out in graph.output:
type_map[out.name] = out.type.tensor_type.elem_type
# ========== 2. Initializer: FP32 -> FP16 ==========
for i, init in enumerate(graph.initializer):
if init.data_type == fp32:
arr = numpy_helper.to_array(init)
# 处理 inf / -inf / 超大值
arr = np.clip(arr, -65504, 65504)
arr = arr.astype(np.float16)
new_init = numpy_helper.from_array(arr, init.name)
graph.initializer[i].CopyFrom(new_init)
type_map[init.name] = fp16
# ========== 3. Constant 节点: FP32 -> FP16 ==========
for node in graph.node:
if node.op_type != "Constant":
continue
for attr in node.attribute:
if attr.t.data_type == fp32:
arr = numpy_helper.to_array(attr.t)
arr = np.clip(arr, -65504, 65504).astype(np.float16)
attr.t.CopyFrom(numpy_helper.from_array(arr))
type_map[node.output[0]] = fp16
# ========== 4. 遍历节点,插入 Cast ==========
new_nodes = []
cast_id = [0]
# 需要保持 FP32 的 op(不转其输出)
fp32_ops = {"Shape", "NonMaxSuppression", "Range",
"TopK", "SequenceConstruct", "SequenceEmpty"}
for node in graph.node:
if node.op_type == "Constant":
new_nodes.append(node)
continue
# 这些 op 输出本身就是整数或索引,跳过
if node.op_type in fp32_ops:
new_nodes.append(node)
for o in node.output:
type_map[o] = fp32 # 标记为 FP32(实际是 int64 等)
continue
# ---- 找目标类型:用第一个已知输入的类型 ----
target = None
for inp_name in node.input:
if inp_name and inp_name in type_map:
t = type_map[inp_name]
if t in (fp32, fp16):
target = t
break
# 默认目标类型 = FP16
if target is None:
target = fp16
# ---- 对每个输入做类型检查 ----
for idx, inp_name in enumerate(node.input):
if not inp_name or inp_name not in type_map:
continue
inp_type = type_map[inp_name]
# 输入是 FP32,目标是 FP16 -> 插 Cast to FP16
if inp_type == fp32 and target == fp16:
cast_out = f"_cast_{cast_id[0]}"
cast_id[0] += 1
cast_node = helper.make_node(
"Cast", inputs=[inp_name], outputs=[cast_out], to=fp16
)
new_nodes.append(cast_node)
node.input[idx] = cast_out
type_map[cast_out] = fp16
# 输入是 FP16,目标是 FP32 -> 插 Cast to FP32
elif inp_type == fp16 and target == fp32:
cast_out = f"_cast_{cast_id[0]}"
cast_id[0] += 1
cast_node = helper.make_node(
"Cast", inputs=[inp_name], outputs=[cast_out], to=fp32
)
new_nodes.append(cast_node)
node.input[idx] = cast_out
type_map[cast_out] = fp32
new_nodes.append(node)
# ---- 更新输出类型 ----
for o in node.output:
type_map[o] = target
# ========== 5. 替换节点 ==========
del graph.node[:]
graph.node.extend(new_nodes)
# ========== 6. 修复 graph output 类型声明 ==========
if keep_io_types:
# 保持原始 IO 类型为 FP32
# 输出需要 Cast 回 FP32
for out in graph.output:
if out.name in type_map and type_map[out.name] == fp16:
cast_out = f"_cast_out_{out.name}"
cast_node = helper.make_node(
"Cast", inputs=[cast_out], outputs=[out.name], to=fp32
)
# 重命名原始输出
# 先找到最后产生这个输出的节点,改其输出名
for node in graph.node:
for i, o in enumerate(node.output):
if o == out.name:
node.output[i] = cast_out
break
graph.node.append(cast_node)
type_map[out.name] = fp32
else:
# 输出也改为 FP16
for out in graph.output:
if out.name in type_map:
out.type.tensor_type.elem_type = type_map[out.name]
# ========== 7. 验证 ==========
onnx.checker.check_model(model)
onnx.save(model, output_path)
print(f"✅ 转换完成 -> {output_path}")
print(f" 节点数: {len(graph.node)}")
print(f" Cast 插入数: {cast_id[0]}")
# ========== 运行 ==========
convert_fp16_manual(
"weights/ground.onnx",
"weights/ground_fp16.onnx",
keep_io_types=True,
)
import onnx
from onnxconverter_common import float16
# 1. 加载模型
model = onnx.load("weights/ground.onnx")
# 2. 转换为 FP16
model_fp16 = float16.convert_float_to_float16(
model,
keep_io_types=True,
# op_block_list=["Cast"]
)
# 3. 验证模型
onnx.checker.check_model(model_fp16)
# 4. 保存
onnx.save(model_fp16, "weights/ground_fp16.onnx")
print("FP16 model saved!")
import onnx
from onnxruntime.transformers.float16 import convert_float_to_float16
# ===== 1. 路径 =====
input_model = "weights/ground.onnx"
output_model = "weights/ground_fp16.onnx"
# ===== 2. 加载 =====
model = onnx.load(input_model)
# ===== 3. 转换 =====
model_fp16 = convert_float_to_float16(
model,
keep_io_types=True, # ⭐ 强烈建议
)
# ===== 4. 保存 =====
onnx.save(model_fp16, output_model)
print("✅ ONNXRuntime FP16 转换完成")
\ No newline at end of file
from onnxruntime.quantization import quantize_dynamic, QuantType
quantize_dynamic(
model_input="weights/ground.onnx",
model_output="weights/ground_int8.onnx",
weight_type=QuantType.QInt8,
)
print("int8 quantization done!")
import onnx
from onnx import TensorProto
# 加载你报错的FP16模型
model = onnx.load("weights/ground_fp16.onnx")
# 🔥 精准修复:找到报错的中间张量,强制修改类型为FP16
target_arg = "/backbone/backbone.0/Cast_output_0"
# 遍历模型所有张量类型声明,修复冲突项
for vi in model.graph.value_info:
if vi.name == target_arg:
vi.type.tensor_type.elem_type = TensorProto.FLOAT16
print(f"✅ 已修复:{target_arg} 类型 → FP16")
# 额外校验+保存修复后的模型
onnx.checker.check_model(model)
onnx.save(model, "weights/ground_fp16_fixed.onnx")
print("\n🎉 模型修复完成!加载:ground_fp16_fixed.onnx")
\ No newline at end of file
...@@ -39,7 +39,8 @@ def load_model(model_config_path: str, model_checkpoint_path: str, device: str = ...@@ -39,7 +39,8 @@ def load_model(model_config_path: str, model_checkpoint_path: str, device: str =
def load_image(image_path: str) -> Tuple[np.array, torch.Tensor]: def load_image(image_path: str) -> Tuple[np.array, torch.Tensor]:
transform = T.Compose( transform = T.Compose(
[ [
T.RandomResize([800], max_size=1333), # T.RandomResize([800], max_size=1333),
T.RandomResize([400], max_size=1333),
T.ToTensor(), T.ToTensor(),
T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
] ]
......
import onnxruntime as ort
from onnxruntime.transformers.optimizer import optimize_model
model_path = "../weights/ground_deform.onnx"
out_path = "../weights/ground_fused.onnx"
custom_op_lib = "../ort_plugin/build/libms_deform_attn_ort.so"
print(f"🚀 准备黑入底层并注入自定义算子: {custom_op_lib}")
# =====================================================================
original_init = ort.InferenceSession.__init__
def patched_init(self, path_or_bytes, sess_options=None, providers=None, provider_options=None, **kwargs):
if sess_options is None:
sess_options = ort.SessionOptions()
# 注入自定义算子
sess_options.register_custom_ops_library(custom_op_lib)
providers = ['ROCMExecutionProvider', 'CPUExecutionProvider']
original_init(self, path_or_bytes, sess_options, providers, provider_options, **kwargs)
ort.InferenceSession.__init__ = patched_init
print("✅ 拦截器注入成功,已强行屏蔽 MIGraphX 干扰...")
# =====================================================================
try:
# 正常调用优化器
optimized_model = optimize_model(
input=model_path,
model_type='bert',
use_gpu=True # 保持为 True,意思是让优化器“按 GPU 的胃口”去融合大算子
)
optimized_model.save_model_to_file(out_path)
print(f"\n🎉 融合后的模型已保存至: {out_path}")
except Exception as e:
print(f"\n❌ 优化失败: {e}")
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment