首次提交

34e4011b · zk · 34e4011b · 34e4011b · 34e4011b · 34e4011b
Commit 34e4011b authored Apr 14, 2026 by zk
10 changed files
--- a/resnet/profile.json_2026-04-03_11-44-14.json
+++ b/resnet/profile.json_2026-04-03_11-44-14.json
+[
+{"cat" : "Session","pid" :1775618,"tid" :1775618,"dur" :76845,"ts" :5,"ph" : "X","name" :"model_loading_uri","args" : {}},
+{"cat" : "Session","pid" :1775618,"tid" :1775618,"dur" :4130635,"ts" :76963,"ph" : "X","name" :"session_initialization","args" : {}}
+]
--- a/resnet/test.py
+++ b/resnet/test.py
+python -m onnxruntime.perf_test \
+-m times \
+-r 10 \
+-s \
+-e rocm \
+-p profile.json \
+-t random \
+ResNet50.onnx \
+perf_result_profile.txt
\ No newline at end of file
--- a/setup.py
+++ b/setup.py
+# coding=utf-8
+# Copyright 2022 The IDEA Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ------------------------------------------------------------------------------------------------
+# Modified from
+# https://github.com/fundamentalvision/Deformable-DETR/blob/main/models/ops/setup.py
+# https://github.com/facebookresearch/detectron2/blob/main/setup.py
+# https://github.com/open-mmlab/mmdetection/blob/master/setup.py
+# https://github.com/Oneflow-Inc/libai/blob/main/setup.py
+# ------------------------------------------------------------------------------------------------
+
+import glob
+import os
+import subprocess
+
+import subprocess
+import sys
+
+def install_torch():
+    try:
+        import torch
+    except ImportError:
+        subprocess.check_call([sys.executable, "-m", "pip", "install", "torch"])
+
+# Call the function to ensure torch is installed
+install_torch()
+
+import torch
+from setuptools import find_packages, setup
+from torch.utils.cpp_extension import CUDA_HOME, CppExtension, CUDAExtension
+
+# groundingdino version info
+version = "0.1.0"
+package_name = "groundingdino"
+cwd = os.path.dirname(os.path.abspath(__file__))
+
+
+sha = "Unknown"
+try:
+    sha = subprocess.check_output(["git", "rev-parse", "HEAD"], cwd=cwd).decode("ascii").strip()
+except Exception:
+    pass
+
+
+def write_version_file():
+    version_path = os.path.join(cwd, "groundingdino", "version.py")
+    with open(version_path, "w") as f:
+        f.write(f"__version__ = '{version}'\n")
+        # f.write(f"git_version = {repr(sha)}\n")
+
+
+requirements = ["torch", "torchvision"]
+
+torch_ver = [int(x) for x in torch.__version__.split(".")[:2]]
+
+
+def get_extensions():
+    this_dir = os.path.dirname(os.path.abspath(__file__))
+    extensions_dir = os.path.join(this_dir, "groundingdino", "models", "GroundingDINO", "csrc")
+
+    main_source = os.path.join(extensions_dir, "vision.cpp")
+    sources = glob.glob(os.path.join(extensions_dir, "**", "*.cpp"))
+    source_cuda = glob.glob(os.path.join(extensions_dir, "**", "*.cu")) + glob.glob(
+        os.path.join(extensions_dir, "*.cu")
+    )
+
+    sources = [main_source] + sources
+
+    extension = CppExtension
+
+    extra_compile_args = {"cxx": []}
+    define_macros = []
+
+    if CUDA_HOME is not None and (torch.cuda.is_available() or "TORCH_CUDA_ARCH_LIST" in os.environ):
+        print("Compiling with CUDA")
+        extension = CUDAExtension
+        sources += source_cuda
+        define_macros += [("WITH_CUDA", None)]
+        extra_compile_args["nvcc"] = [
+            "-DCUDA_HAS_FP16=1",
+            "-D__CUDA_NO_HALF_OPERATORS__",
+            "-D__CUDA_NO_HALF_CONVERSIONS__",
+            "-D__CUDA_NO_HALF2_OPERATORS__",
+        ]
+    else:
+        print("Compiling without CUDA")
+        define_macros += [("WITH_HIP", None)]
+        extra_compile_args["nvcc"] = []
+        return None
+
+    sources = [os.path.join(extensions_dir, s) for s in sources]
+    include_dirs = [extensions_dir]
+
+    ext_modules = [
+        extension(
+            "groundingdino._C",
+            sources,
+            include_dirs=include_dirs,
+            define_macros=define_macros,
+            extra_compile_args=extra_compile_args,
+        )
+    ]
+
+    return ext_modules
+
+
+def parse_requirements(fname="requirements.txt", with_version=True):
+    """Parse the package dependencies listed in a requirements file but strips
+    specific versioning information.
+
+    Args:
+        fname (str): path to requirements file
+        with_version (bool, default=False): if True include version specs
+
+    Returns:
+        List[str]: list of requirements items
+
+    CommandLine:
+        python -c "import setup; print(setup.parse_requirements())"
+    """
+    import re
+    import sys
+    from os.path import exists
+
+    require_fpath = fname
+
+    def parse_line(line):
+        """Parse information from a line in a requirements text file."""
+        if line.startswith("-r "):
+            # Allow specifying requirements in other files
+            target = line.split(" ")[1]
+            for info in parse_require_file(target):
+                yield info
+        else:
+            info = {"line": line}
+            if line.startswith("-e "):
+                info["package"] = line.split("#egg=")[1]
+            elif "@git+" in line:
+                info["package"] = line
+            else:
+                # Remove versioning from the package
+                pat = "(" + "|".join([">=", "==", ">"]) + ")"
+                parts = re.split(pat, line, maxsplit=1)
+                parts = [p.strip() for p in parts]
+
+                info["package"] = parts[0]
+                if len(parts) > 1:
+                    op, rest = parts[1:]
+                    if ";" in rest:
+                        # Handle platform specific dependencies
+                        # http://setuptools.readthedocs.io/en/latest/setuptools.html#declaring-platform-specific-dependencies
+                        version, platform_deps = map(str.strip, rest.split(";"))
+                        info["platform_deps"] = platform_deps
+                    else:
+                        version = rest  # NOQA
+                    info["version"] = (op, version)
+            yield info
+
+    def parse_require_file(fpath):
+        with open(fpath, "r") as f:
+            for line in f.readlines():
+                line = line.strip()
+                if line and not line.startswith("#"):
+                    for info in parse_line(line):
+                        yield info
+
+    def gen_packages_items():
+        if exists(require_fpath):
+            for info in parse_require_file(require_fpath):
+                parts = [info["package"]]
+                if with_version and "version" in info:
+                    parts.extend(info["version"])
+                if not sys.version.startswith("3.4"):
+                    # apparently package_deps are broken in 3.4
+                    platform_deps = info.get("platform_deps")
+                    if platform_deps is not None:
+                        parts.append(";" + platform_deps)
+                item = "".join(parts)
+                yield item
+
+    packages = list(gen_packages_items())
+    return packages
+
+
+if __name__ == "__main__":
+    print(f"Building wheel {package_name}-{version}")
+
+    with open("LICENSE", "r", encoding="utf-8") as f:
+        license = f.read()
+
+    write_version_file()
+
+    setup(
+        name="groundingdino",
+        version="0.1.0",
+        author="International Digital Economy Academy, Shilong Liu",
+        url="https://github.com/IDEA-Research/GroundingDINO",
+        description="open-set object detector",
+        license=license,
+        install_requires=parse_requirements("requirements.txt"),
+        packages=find_packages(
+            exclude=(
+                "configs",
+                "tests",
+            )
+        ),
+        ext_modules=get_extensions(),
+        cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension},
+    )
--- a/test.ipynb
+++ b/test.ipynb
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "final text_encoder_type: bert-base-uncased\n"
+     ]
+    },
+    {
+     "data": {
+      "application/json": {
+       "ascii": false,
+       "bar_format": null,
+       "colour": null,
+       "elapsed": 0.014210224151611328,
+       "initial": 0,
+       "n": 0,
+       "ncols": null,
+       "nrows": null,
+       "postfix": null,
+       "prefix": "Downloading model.safetensors",
+       "rate": null,
+       "total": 440449768,
+       "unit": "B",
+       "unit_divisor": 1000,
+       "unit_scale": true
+      },
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "5922f34578364d36afa13de9f01254bd",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/root/miniconda3/lib/python3.8/site-packages/transformers/modeling_utils.py:881: FutureWarning: The `device` argument is deprecated and will be removed in v5 of Transformers.\n",
+      "  warnings.warn(\n",
+      "/root/miniconda3/lib/python3.8/site-packages/torch/utils/checkpoint.py:31: UserWarning: None of the inputs have requires_grad=True. Gradients will be None\n",
+      "  warnings.warn(\"None of the inputs have requires_grad=True. Gradients will be None\")\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "True"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from groundingdino.util.inference import load_model, load_image, predict, annotate\n",
+    "import cv2\n",
+    "\n",
+    "model = load_model(\"groundingdino/config/GroundingDINO_SwinT_OGC.py\", \"../04-06-segment-anything/weights/groundingdino_swint_ogc.pth\")\n",
+    "IMAGE_PATH = \".asset/cat_dog.jpeg\"\n",
+    "TEXT_PROMPT = \"chair . person . dog .\"\n",
+    "BOX_TRESHOLD = 0.35\n",
+    "TEXT_TRESHOLD = 0.25\n",
+    "\n",
+    "image_source, image = load_image(IMAGE_PATH)\n",
+    "\n",
+    "boxes, logits, phrases = predict(\n",
+    "    model=model,\n",
+    "    image=image,\n",
+    "    caption=TEXT_PROMPT,\n",
+    "    box_threshold=BOX_TRESHOLD,\n",
+    "    text_threshold=TEXT_TRESHOLD\n",
+    ")\n",
+    "\n",
+    "annotated_frame = annotate(image_source=image_source, boxes=boxes, logits=logits, phrases=phrases)\n",
+    "cv2.imwrite(\"annotated_image.jpg\", annotated_frame)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "base",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.10"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/tmprllsblav/_remote_module_non_scriptable.py
+++ b/tmprllsblav/_remote_module_non_scriptable.py
+from typing import *
+
+import torch
+import torch.distributed.rpc as rpc
+from torch import Tensor
+from torch._jit_internal import Future
+from torch.distributed.rpc import RRef
+from typing import Tuple  # pyre-ignore: unused import
+
+
+module_interface_cls = None
+
+
+def forward_async(self, *args, **kwargs):
+    args = (self.module_rref, self.device, self.is_device_map_set, *args)
+    kwargs = {**kwargs}
+    return rpc.rpc_async(
+        self.module_rref.owner(),
+        _remote_forward,
+        args,
+        kwargs,
+    )
+
+
+def forward(self, *args, **kwargs):
+    args = (self.module_rref, self.device, self.is_device_map_set, *args)
+    kwargs = {**kwargs}
+    ret_fut = rpc.rpc_async(
+        self.module_rref.owner(),
+        _remote_forward,
+        args,
+        kwargs,
+    )
+    return ret_fut.wait()
+
+
+_generated_methods = [
+    forward_async,
+    forward,
+]
+
+
+
+
+def _remote_forward(
+    module_rref: RRef[module_interface_cls], device: str, is_device_map_set: bool, *args, **kwargs):
+    module = module_rref.local_value()
+    device = torch.device(device)
+
+    if device.type != "cuda":
+        return module.forward(*args, **kwargs)
+
+    # If the module is on a cuda device,
+    # move any CPU tensor in args or kwargs to the same cuda device.
+    # Since torch script does not support generator expression,
+    # have to use concatenation instead of
+    # ``tuple(i.to(device) if isinstance(i, Tensor) else i for i in *args)``.
+    args = (*args,)
+    out_args: Tuple[()] = ()
+    for arg in args:
+        arg = (arg.to(device),) if isinstance(arg, Tensor) else (arg,)
+        out_args = out_args + arg
+
+    kwargs = {**kwargs}
+    for k, v in kwargs.items():
+        if isinstance(v, Tensor):
+            kwargs[k] = kwargs[k].to(device)
+
+    if is_device_map_set:
+        return module.forward(*out_args, **kwargs)
+
+    # If the device map is empty, then only CPU tensors are allowed to send over wire,
+    # so have to move any GPU tensor to CPU in the output.
+    # Since torch script does not support generator expression,
+    # have to use concatenation instead of
+    # ``tuple(i.cpu() if isinstance(i, Tensor) else i for i in module.forward(*out_args, **kwargs))``.
+    ret: Tuple[()] = ()
+    for i in module.forward(*out_args, **kwargs):
+        i = (i.cpu(),) if isinstance(i, Tensor) else (i,)
+        ret = ret + i
+    return ret
--- a/utils/__init__.py
+++ b/utils/__init__.py
--- a/utils/profiler.py
+++ b/utils/profiler.py
+import tensorrt as trt
+
+class LayerProfiler(trt.IProfiler):
+    def __init__(self):
+        super(LayerProfiler, self).__init__()
+
+    def report_layer_time(self, layer_name, ms):
+        # 这里可以通过层名称记录信息或做进一步操作
+        print(f"Layer {layer_name} took {ms} ms to execute")
\ No newline at end of file
--- a/utils/utils.py
+++ b/utils/utils.py
+import cv2
+import torch
+import numpy as np
+from PIL import Image
+
+from typing import Dict
+from cuda import cuda, cudart
+from transformers import AutoTokenizer
+
+import groundingdino.datasets.transforms as T
+
+
+def sigmoid(x):
+    return 1 / (1 + np.exp(-x))
+
+# caption前处理
+def preprocess_caption(caption: str):
+    result = caption.lower().strip()
+    if result.endswith("."):
+        return result
+    return result + "."
+
+# 加载模型并返回前处理后的图像
+def load_image(image_path: str):
+    transform = T.Compose(
+        [
+            T.RandomResize([800], max_size=1333),
+            T.ToTensor(),
+            T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
+        ]
+    )
+    image_source = Image.open(image_path).convert("RGB")
+    image = np.asarray(image_source)
+    image_transformed, _ = transform(image_source, None)
+    return image, image_transformed
+
+# 根据匹配的概率图从文本中提取短语
+def get_phrases_from_posmap(
+    posmap: np.ndarray, tokenized: Dict, tokenizer: AutoTokenizer, left_idx: int = 0, right_idx: int = 255
+):
+    assert isinstance(posmap, np.ndarray), "posmap must be np.ndarray"
+    if posmap.ndim == 1:
+        # 将指定范围内的元素设为 False
+        posmap[:left_idx + 1] = False
+        posmap[right_idx:] = False
+
+        # 获取非零元素的索引
+        non_zero_idx = np.nonzero(posmap)[0]
+        token_ids = [tokenized["input_ids"][i] for i in non_zero_idx]
+        return tokenizer.decode(token_ids)
+    else:
+        raise NotImplementedError("posmap must be 1-dim")
+
+def check_cuda_err(err):
+    # 检查是否为 CUDA Driver API 的错误类型 CUresult
+    if isinstance(err, cuda.CUresult):
+        if err != cuda.CUresult.CUDA_SUCCESS:
+            raise RuntimeError("Cuda Error: {}".format(err))
+    # 检查是否为 CUDA Runtime API 的错误类型 cudaError_t
+    if isinstance(err, cudart.cudaError_t):
+        if err != cudart.cudaError_t.cudaSuccess:
+            raise RuntimeError("Cuda Runtime Error: {}".format(err))
+    # 未知错误
+    else:
+        raise RuntimeError("Unknown error type: {}".format(err))
+
+def cuda_call(call):
+    # 调用 CUDA 函数，并检查其返回的错误码，确保调用成功
+    err, res = call[0], call[1:]
+    check_cuda_err(err)
+    if len(res) == 1:
+        res = res[0]
+    return res
+
+# 将数据从 CPU 内存复制到 GPU 内存
+def memcpy_host_to_device(device_ptr: int, host_arr: np.ndarray):
+    nbytes = host_arr.size * host_arr.itemsize
+    cuda_call(cudart.cudaMemcpy(device_ptr, host_arr, nbytes, cudart.cudaMemcpyKind.cudaMemcpyHostToDevice))
+
+# 将数据从 GPU 内存复制到 CPU 内存
+def memcpy_device_to_host(host_arr: np.ndarray, device_ptr: int):
+    nbytes = host_arr.size * host_arr.itemsize
+    cuda_call(cudart.cudaMemcpy(host_arr, device_ptr, nbytes, cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost))
+
+# 递归求list的维度
+def get_shape(lst):
+    if isinstance(lst, list) and lst:
+        return (len(lst),) + get_shape(lst[0])
+    return ()
+
+# 画图并保存
+def draw_results(ori_img, boxes, confs, phrases, img_save_path='./result.jpg'):
+    ori_img = cv2.cvtColor(ori_img, cv2.COLOR_RGB2BGR)
+    img_h = ori_img.shape[0]
+    img_w = ori_img.shape[1]
+    for i in range(len(boxes)):
+        one_box = boxes[i]
+        one_conf = confs[i]
+        one_cls = phrases[i]
+        x1 = int((one_box[0] - one_box[2] / 2) * img_w)
+        y1 = int((one_box[1] - one_box[3] / 2) * img_h)
+        x2 = int((one_box[0] + one_box[2] / 2) * img_w)
+        y2 = int((one_box[1] + one_box[3] / 2) * img_h)
+        cv2.rectangle(ori_img, (x1, y1), (x2, y2), (0, 0, 255), 2)
+        cv2.putText(ori_img, f'{one_cls} {one_conf:.2f}', (x1-15, y1-15), fontFace = cv2.FONT_HERSHEY_SIMPLEX, color = (255, 255, 255), fontScale=1.5, thickness=3)
+    cv2.imwrite(img_save_path, ori_img)
+
+# 来自 groundingdino.models.GroundingDINO.bertwarper，用于生成包含特殊 token 的注意力掩码、位置编码以及类别到 token 的映射掩码列表
+def generate_masks_with_special_tokens_and_transfer_map(tokenized, special_tokens_list, tokenizer):
+    """Generate attention mask between each pair of special tokens
+    Args:
+        input_ids (torch.Tensor): input ids. Shape: [bs, num_token]
+        special_tokens_mask (list): special tokens mask.
+    Returns:
+        torch.Tensor: attention mask between each special tokens.
+    """
+    input_ids = tokenized["input_ids"]
+    bs, num_token = input_ids.shape
+    # special_tokens_mask: bs, num_token. 1 for special tokens. 0 for normal tokens
+    special_tokens_mask = torch.zeros((bs, num_token), device=input_ids.device).bool()
+    for special_token in special_tokens_list:
+        special_tokens_mask |= input_ids == special_token
+
+    # idxs: each row is a list of indices of special tokens
+    idxs = torch.nonzero(special_tokens_mask)
+
+    # generate attention mask and positional ids
+    attention_mask = (
+        torch.eye(num_token, device=input_ids.device).bool().unsqueeze(0).repeat(bs, 1, 1)
+    )
+    position_ids = torch.zeros((bs, num_token), device=input_ids.device)
+    cate_to_token_mask_list = [[] for _ in range(bs)]
+    previous_col = 0
+    for i in range(idxs.shape[0]):
+        row, col = idxs[i]
+        if (col == 0) or (col == num_token - 1):
+            attention_mask[row, col, col] = True
+            position_ids[row, col] = 0
+        else:
+            attention_mask[row, previous_col + 1 : col + 1, previous_col + 1 : col + 1] = True
+            position_ids[row, previous_col + 1 : col + 1] = torch.arange(
+                0, col - previous_col, device=input_ids.device
+            )
+            c2t_maski = torch.zeros((num_token), device=input_ids.device).bool()
+            c2t_maski[previous_col + 1 : col] = True
+            cate_to_token_mask_list[row].append(c2t_maski)
+        previous_col = col
+
+    cate_to_token_mask_list = [
+        torch.stack(cate_to_token_mask_listi, dim=0)
+        for cate_to_token_mask_listi in cate_to_token_mask_list
+    ]
+
+    # # padding mask
+    # padding_mask = tokenized['attention_mask']
+    # attention_mask = attention_mask & padding_mask.unsqueeze(1).bool() & padding_mask.unsqueeze(2).bool()
+
+    return attention_mask, position_ids.to(torch.long), cate_to_token_mask_list
\ No newline at end of file
--- a/val_test onnx.sh
+++ b/val_test onnx.sh
+CUDA_VISIBLE_DEVICES=0 \
+python demo/test_ap_on_coco_onnx.py \
+ -c groundingdino/config/GroundingDINO_SwinB_cfg.py \
+ -p weights/ground.onnx \
+ --anno_path /workspace/coco_2017/annotations/instances_val2017.json \
+ --image_dir /workspace/coco_2017/val2017
\ No newline at end of file
--- a/val_test.sh
+++ b/val_test.sh
+CUDA_VISIBLE_DEVICES=0 \
+python demo/test_ap_on_coco.py \
+ -c groundingdino/config/GroundingDINO_SwinB_cfg.py \
+ -p weights/groundingdino_swinb_cogcoor.pth \
+ --anno_path /workspace/coco_2017/annotations/instances_val2017.json \
+ --image_dir /workspace/coco_2017/val2017
\ No newline at end of file