Commit e9cee049 authored by luopl's avatar luopl
Browse files

Initial commit

parents
Pipeline #1056 canceled with stages
# Copyright (c) Tencent Inc. All rights reserved.
import os
import sys
import argparse
import os.path as osp
from io import BytesIO
from functools import partial
import cv2
import onnx
import torch
import onnxsim
import numpy as np
import gradio as gr
from PIL import Image
import supervision as sv
from torchvision.ops import nms
from mmengine.runner import Runner
from mmengine.dataset import Compose
from mmengine.runner.amp import autocast
from mmengine.config import Config, DictAction, ConfigDict
from mmdet.datasets import CocoDataset
from mmyolo.registry import RUNNERS
sys.path.append('./deploy')
from easydeploy import model as EM
BOUNDING_BOX_ANNOTATOR = sv.BoundingBoxAnnotator(thickness=1)
MASK_ANNOTATOR = sv.MaskAnnotator()
class LabelAnnotator(sv.LabelAnnotator):
@staticmethod
def resolve_text_background_xyxy(
center_coordinates,
text_wh,
position,
):
center_x, center_y = center_coordinates
text_w, text_h = text_wh
return center_x, center_y, center_x + text_w, center_y + text_h
LABEL_ANNOTATOR = LabelAnnotator(text_padding=4,
text_scale=0.5,
text_thickness=1)
def parse_args():
parser = argparse.ArgumentParser(description='YOLO-World Demo')
parser.add_argument('config', help='test config file path')
parser.add_argument('checkpoint', help='checkpoint file')
parser.add_argument(
'--work-dir',
help='the directory to save the file containing evaluation metrics',
default='output')
parser.add_argument(
'--cfg-options',
nargs='+',
action=DictAction,
help='override some settings in the used config, the key-value pair '
'in xxx=yyy format will be merged into config file. If the value to '
'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
'Note that the quotation marks are necessary and that no white space '
'is allowed.')
args = parser.parse_args()
return args
def run_image(runner,
image,
text,
max_num_boxes,
score_thr,
nms_thr,
image_path='./work_dirs/demo.png'):
# image.save(image_path)
texts = [[t.strip()] for t in text.split(',')] + [[' ']]
data_info = dict(img_id=0, img=np.array(image), texts=texts)
data_info = runner.pipeline(data_info)
data_batch = dict(inputs=data_info['inputs'].unsqueeze(0),
data_samples=[data_info['data_samples']])
with autocast(enabled=False), torch.no_grad():
output = runner.model.test_step(data_batch)[0]
pred_instances = output.pred_instances
keep = nms(pred_instances.bboxes,
pred_instances.scores,
iou_threshold=nms_thr)
pred_instances = pred_instances[keep]
pred_instances = pred_instances[pred_instances.scores.float() > score_thr]
if len(pred_instances.scores) > max_num_boxes:
indices = pred_instances.scores.float().topk(max_num_boxes)[1]
pred_instances = pred_instances[indices]
pred_instances = pred_instances.cpu().numpy()
if 'masks' in pred_instances:
masks = pred_instances['masks']
else:
masks = None
detections = sv.Detections(xyxy=pred_instances['bboxes'],
class_id=pred_instances['labels'],
confidence=pred_instances['scores'],
mask=masks)
labels = [
f"{texts[class_id][0]} {confidence:0.2f}" for class_id, confidence in
zip(detections.class_id, detections.confidence)
]
image = np.array(image)
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # Convert RGB to BGR
image = BOUNDING_BOX_ANNOTATOR.annotate(image, detections)
image = LABEL_ANNOTATOR.annotate(image, detections, labels=labels)
if masks is not None:
image = MASK_ANNOTATOR.annotate(image, detections)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # Convert BGR to RGB
image = Image.fromarray(image)
return image
def export_model(runner, text, max_num_boxes, score_thr, nms_thr):
backend = EM.MMYOLOBackend.ONNXRUNTIME
postprocess_cfg = ConfigDict(pre_top_k=10 * max_num_boxes,
keep_top_k=max_num_boxes,
iou_threshold=nms_thr,
score_threshold=score_thr)
base_model = runner.model
texts = [[t.strip() for t in text.split(',')] + [' ']]
base_model.reparameterize(texts)
deploy_model = EM.DeployModel(baseModel=base_model,
backend=backend,
postprocess_cfg=postprocess_cfg)
deploy_model.eval()
device = (next(iter(base_model.parameters()))).device
fake_input = torch.ones([1, 3, 640, 640], device=device)
deploy_model(fake_input)
save_onnx_path = os.path.join(
args.work_dir,
os.path.basename(args.checkpoint).replace('pth', 'onnx'))
# export onnx
with BytesIO() as f:
output_names = ['num_dets', 'boxes', 'scores', 'labels']
torch.onnx.export(deploy_model,
fake_input,
f,
input_names=['images'],
output_names=output_names,
opset_version=12)
f.seek(0)
onnx_model = onnx.load(f)
onnx.checker.check_model(onnx_model)
onnx_model, check = onnxsim.simplify(onnx_model)
onnx.save(onnx_model, save_onnx_path)
return gr.update(visible=True), save_onnx_path
def demo(runner, args):
with gr.Blocks(title="YOLO-World") as demo:
with gr.Row():
gr.Markdown('<h1><center>YOLO-World: Real-Time Open-Vocabulary '
'Object Detector</center></h1>')
with gr.Row():
with gr.Column(scale=0.3):
with gr.Row():
image = gr.Image(type='pil', label='input image')
input_text = gr.Textbox(
lines=7,
label='Enter the classes to be detected, '
'separated by comma',
value=', '.join(CocoDataset.METAINFO['classes']),
elem_id='textbox')
with gr.Row():
submit = gr.Button('Submit')
clear = gr.Button('Clear')
with gr.Row():
export = gr.Button('Deploy and Export ONNX Model')
with gr.Row():
gr.Markdown(
"It takes a few seconds to generate the ONNX file! YOLO-World-Seg (segmentation) is not supported now"
)
out_download = gr.File(visible=False)
max_num_boxes = gr.Slider(minimum=1,
maximum=300,
value=100,
step=1,
interactive=True,
label='Maximum Number Boxes')
score_thr = gr.Slider(minimum=0,
maximum=1,
value=0.05,
step=0.001,
interactive=True,
label='Score Threshold')
nms_thr = gr.Slider(minimum=0,
maximum=1,
value=0.7,
step=0.001,
interactive=True,
label='NMS Threshold')
with gr.Column(scale=0.7):
output_image = gr.Image(type='pil', label='output image')
submit.click(partial(run_image, runner),
[image, input_text, max_num_boxes, score_thr, nms_thr],
[output_image])
clear.click(lambda: [None, '', None], None,
[image, input_text, output_image])
export.click(partial(export_model, runner),
[input_text, max_num_boxes, score_thr, nms_thr],
[out_download, out_download])
demo.launch(server_name='0.0.0.0',
server_port=8080) # port 80 does not work for me
if __name__ == '__main__':
args = parse_args()
# load config
cfg = Config.fromfile(args.config)
if args.cfg_options is not None:
cfg.merge_from_dict(args.cfg_options)
if args.work_dir is not None:
cfg.work_dir = args.work_dir
elif cfg.get('work_dir', None) is None:
cfg.work_dir = osp.join('./work_dirs',
osp.splitext(osp.basename(args.config))[0])
cfg.load_from = args.checkpoint
if 'runner_type' not in cfg:
runner = Runner.from_cfg(cfg)
else:
runner = RUNNERS.build(cfg)
runner.call_hook('before_run')
runner.load_or_resume()
pipeline = cfg.test_dataloader.dataset.pipeline
pipeline[0].type = 'mmdet.LoadImageFromNDArray'
runner.pipeline = Compose(pipeline)
runner.model.eval()
demo(runner, args)
# Copyright (c) Tencent Inc. All rights reserved.
import os
import cv2
import argparse
import os.path as osp
import torch
from mmengine.config import Config, DictAction
from mmengine.runner.amp import autocast
from mmengine.dataset import Compose
from mmengine.utils import ProgressBar
from mmdet.apis import init_detector
from mmdet.utils import get_test_pipeline_cfg
import supervision as sv
BOUNDING_BOX_ANNOTATOR = sv.BoundingBoxAnnotator(thickness=1)
MASK_ANNOTATOR = sv.MaskAnnotator()
class LabelAnnotator(sv.LabelAnnotator):
@staticmethod
def resolve_text_background_xyxy(
center_coordinates,
text_wh,
position,
):
center_x, center_y = center_coordinates
text_w, text_h = text_wh
return center_x, center_y, center_x + text_w, center_y + text_h
LABEL_ANNOTATOR = LabelAnnotator(text_padding=4,
text_scale=0.5,
text_thickness=1)
def parse_args():
parser = argparse.ArgumentParser(description='YOLO-World Demo')
parser.add_argument('config', help='test config file path')
parser.add_argument('checkpoint', help='checkpoint file')
parser.add_argument('image', help='image path, include image file or dir.')
parser.add_argument(
'text',
help=
'text prompts, including categories separated by a comma or a txt file with each line as a prompt.'
)
parser.add_argument('--topk',
default=100,
type=int,
help='keep topk predictions.')
parser.add_argument('--threshold',
default=0.1,
type=float,
help='confidence score threshold for predictions.')
parser.add_argument('--device',
default='cuda:0',
help='device used for inference.')
parser.add_argument('--show',
action='store_true',
help='show the detection results.')
parser.add_argument(
'--annotation',
action='store_true',
help='save the annotated detection results as yolo text format.')
parser.add_argument('--amp',
action='store_true',
help='use mixed precision for inference.')
parser.add_argument('--output-dir',
default='demo_outputs',
help='the directory to save outputs')
parser.add_argument(
'--cfg-options',
nargs='+',
action=DictAction,
help='override some settings in the used config, the key-value pair '
'in xxx=yyy format will be merged into config file. If the value to '
'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
'Note that the quotation marks are necessary and that no white space '
'is allowed.')
args = parser.parse_args()
return args
def inference_detector(model,
image,
texts,
test_pipeline,
max_dets=100,
score_thr=0.3,
output_dir='./work_dir',
use_amp=False,
show=False,
annotation=False):
data_info = dict(img_id=0, img_path=image, texts=texts)
data_info = test_pipeline(data_info)
data_batch = dict(inputs=data_info['inputs'].unsqueeze(0),
data_samples=[data_info['data_samples']])
with autocast(enabled=use_amp), torch.no_grad():
output = model.test_step(data_batch)[0]
pred_instances = output.pred_instances
pred_instances = pred_instances[pred_instances.scores.float() >
score_thr]
if len(pred_instances.scores) > max_dets:
indices = pred_instances.scores.float().topk(max_dets)[1]
pred_instances = pred_instances[indices]
pred_instances = pred_instances.cpu().numpy()
if 'masks' in pred_instances:
masks = pred_instances['masks']
else:
masks = None
detections = sv.Detections(xyxy=pred_instances['bboxes'],
class_id=pred_instances['labels'],
confidence=pred_instances['scores'],
mask=masks)
labels = [
f"{texts[class_id][0]} {confidence:0.2f}" for class_id, confidence in
zip(detections.class_id, detections.confidence)
]
# label images
image = cv2.imread(image_path)
anno_image = image.copy()
image = BOUNDING_BOX_ANNOTATOR.annotate(image, detections)
image = LABEL_ANNOTATOR.annotate(image, detections, labels=labels)
if masks is not None:
image = MASK_ANNOTATOR.annotate(image, detections)
cv2.imwrite(osp.join(output_dir, osp.basename(image_path)), image)
if annotation:
images_dict = {}
annotations_dict = {}
images_dict[osp.basename(image_path)] = anno_image
annotations_dict[osp.basename(image_path)] = detections
ANNOTATIONS_DIRECTORY = os.makedirs(r"./annotations", exist_ok=True)
MIN_IMAGE_AREA_PERCENTAGE = 0.002
MAX_IMAGE_AREA_PERCENTAGE = 0.80
APPROXIMATION_PERCENTAGE = 0.75
sv.DetectionDataset(
classes=texts, images=images_dict,
annotations=annotations_dict).as_yolo(
annotations_directory_path=ANNOTATIONS_DIRECTORY,
min_image_area_percentage=MIN_IMAGE_AREA_PERCENTAGE,
max_image_area_percentage=MAX_IMAGE_AREA_PERCENTAGE,
approximation_percentage=APPROXIMATION_PERCENTAGE)
if show:
cv2.imshow('Image', image) # Provide window name
k = cv2.waitKey(0)
if k == 27:
# wait for ESC key to exit
cv2.destroyAllWindows()
if __name__ == '__main__':
args = parse_args()
# load config
cfg = Config.fromfile(args.config)
if args.cfg_options is not None:
cfg.merge_from_dict(args.cfg_options)
cfg.work_dir = osp.join('./work_dirs',
osp.splitext(osp.basename(args.config))[0])
# init model
cfg.load_from = args.checkpoint
model = init_detector(cfg, checkpoint=args.checkpoint, device=args.device)
# init test pipeline
test_pipeline_cfg = get_test_pipeline_cfg(cfg=cfg)
# test_pipeline[0].type = 'mmdet.LoadImageFromNDArray'
test_pipeline = Compose(test_pipeline_cfg)
if args.text.endswith('.txt'):
with open(args.text) as f:
lines = f.readlines()
texts = [[t.rstrip('\r\n')] for t in lines] + [[' ']]
else:
texts = [[t.strip()] for t in args.text.split(',')] + [[' ']]
output_dir = args.output_dir
if not osp.exists(output_dir):
os.mkdir(output_dir)
# load images
if not osp.isfile(args.image):
images = [
osp.join(args.image, img) for img in os.listdir(args.image)
if img.endswith('.png') or img.endswith('.jpg')
]
else:
images = [args.image]
# reparameterize texts
model.reparameterize(texts)
progress_bar = ProgressBar(len(images))
for image_path in images:
inference_detector(model,
image_path,
texts,
test_pipeline,
args.topk,
args.threshold,
output_dir=output_dir,
use_amp=args.amp,
show=args.show,
annotation=args.annotation)
progress_bar.update()
This source diff could not be displayed because it is too large. You can view the blob instead.
# Copyright (c) Tencent Inc. All rights reserved.
import os.path as osp
import cv2
import torch
from mmengine.config import Config
from mmengine.dataset import Compose
from mmdet.apis import init_detector
from mmdet.utils import get_test_pipeline_cfg
def inference(model, image, texts, test_pipeline, score_thr=0.3, max_dets=100):
image = cv2.imread(image)
image = image[:, :, [2, 1, 0]]
data_info = dict(img=image, img_id=0, texts=texts)
data_info = test_pipeline(data_info)
data_batch = dict(inputs=data_info['inputs'].unsqueeze(0),
data_samples=[data_info['data_samples']])
with torch.no_grad():
output = model.test_step(data_batch)[0]
pred_instances = output.pred_instances
# score thresholding
pred_instances = pred_instances[pred_instances.scores.float() > score_thr]
# max detections
if len(pred_instances.scores) > max_dets:
indices = pred_instances.scores.float().topk(max_dets)[1]
pred_instances = pred_instances[indices]
pred_instances = pred_instances.cpu().numpy()
boxes = pred_instances['bboxes']
labels = pred_instances['labels']
scores = pred_instances['scores']
label_texts = [texts[x][0] for x in labels]
return boxes, labels, label_texts, scores
if __name__ == "__main__":
config_file = "configs/pretrain/yolo_world_v2_x_vlpan_bn_2e-3_100e_4x8gpus_obj365v1_goldg_train_1280ft_lvis_minival.py"
checkpoint = "weights/yolo_world_v2_x_obj365v1_goldg_cc3mlite_pretrain_1280ft-14996a36.pth"
cfg = Config.fromfile(config_file)
cfg.work_dir = osp.join('./work_dirs')
# init model
cfg.load_from = checkpoint
model = init_detector(cfg, checkpoint=checkpoint, device='cuda:0')
test_pipeline_cfg = get_test_pipeline_cfg(cfg=cfg)
test_pipeline_cfg[0].type = 'mmdet.LoadImageFromNDArray'
test_pipeline = Compose(test_pipeline_cfg)
texts = [['person'], ['bus'], [' ']]
image = "demo/sample_images/bus.jpg"
print(f"starting to detect: {image}")
results = inference(model, image, texts, test_pipeline)
format_str = [
f"obj-{idx}: {box}, label-{lbl}, class-{lbl_text}, score-{score}"
for idx, (box, lbl, lbl_text, score) in enumerate(zip(*results))
]
print("detecting results:")
for q in format_str:
print(q)
# Copyright (c) Tencent Inc. All rights reserved.
# This file is modifef from mmyolo/demo/video_demo.py
import argparse
import cv2
import mmcv
import torch
from mmengine.dataset import Compose
from mmdet.apis import init_detector
from mmengine.utils import track_iter_progress
from mmyolo.registry import VISUALIZERS
def parse_args():
parser = argparse.ArgumentParser(description='YOLO-World video demo')
parser.add_argument('config', help='Config file')
parser.add_argument('checkpoint', help='Checkpoint file')
parser.add_argument('video', help='video file path')
parser.add_argument(
'text',
help=
'text prompts, including categories separated by a comma or a txt file with each line as a prompt.'
)
parser.add_argument('--device',
default='cuda:0',
help='device used for inference')
parser.add_argument('--score-thr',
default=0.1,
type=float,
help='confidence score threshold for predictions.')
parser.add_argument('--out', type=str, help='output video file')
args = parser.parse_args()
return args
def inference_detector(model, image, texts, test_pipeline, score_thr=0.3):
data_info = dict(img_id=0, img=image, texts=texts)
data_info = test_pipeline(data_info)
data_batch = dict(inputs=data_info['inputs'].unsqueeze(0),
data_samples=[data_info['data_samples']])
with torch.no_grad():
output = model.test_step(data_batch)[0]
pred_instances = output.pred_instances
pred_instances = pred_instances[pred_instances.scores.float() >
score_thr]
output.pred_instances = pred_instances
return output
def main():
args = parse_args()
model = init_detector(args.config, args.checkpoint, device=args.device)
# build test pipeline
model.cfg.test_dataloader.dataset.pipeline[
0].type = 'mmdet.LoadImageFromNDArray'
test_pipeline = Compose(model.cfg.test_dataloader.dataset.pipeline)
if args.text.endswith('.txt'):
with open(args.text) as f:
lines = f.readlines()
texts = [[t.rstrip('\r\n')] for t in lines] + [[' ']]
else:
texts = [[t.strip()] for t in args.text.split(',')] + [[' ']]
# reparameterize texts
model.reparameterize(texts)
# init visualizer
visualizer = VISUALIZERS.build(model.cfg.visualizer)
# the dataset_meta is loaded from the checkpoint and
# then pass to the model in init_detector
visualizer.dataset_meta = model.dataset_meta
video_reader = mmcv.VideoReader(args.video)
video_writer = None
if args.out:
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
video_writer = cv2.VideoWriter(
args.out, fourcc, video_reader.fps,
(video_reader.width, video_reader.height))
for frame in track_iter_progress(video_reader):
result = inference_detector(model,
frame,
texts,
test_pipeline,
score_thr=args.score_thr)
visualizer.add_datasample(name='video',
image=frame,
data_sample=result,
draw_gt=False,
show=False,
pred_score_thr=args.score_thr)
frame = visualizer.get_image()
if args.out:
video_writer.write(frame)
if video_writer:
video_writer.release()
if __name__ == '__main__':
main()
# MMYOLO Model Easy-Deployment
## Introduction
This project is developed for easily converting your MMYOLO models to other inference backends without the need of MMDeploy, which reduces the cost of both time and effort on getting familiar with MMDeploy.
Currently we support converting to `ONNX` and `TensorRT` formats, other inference backends such `ncnn` will be added to this project as well.
## Supported Backends
- [Model Convert](docs/model_convert.md)
# MMYOLO 模型转换
## 介绍
本项目作为 MMYOLO 的部署 project 单独存在,意图剥离 MMDeploy 当前的体系,独自支持用户完成模型训练后的转换和部署功能,使用户的学习和工程成本下降。
当前支持对 ONNX 格式和 TensorRT 格式的转换,后续对其他推理平台也会支持起来。
## 转换教程
- [Model Convert](docs/model_convert.md)
# Copyright (c) OpenMMLab. All rights reserved.
from .common import DeployC2f
from .focus import DeployFocus, GConvFocus, NcnnFocus
__all__ = ['DeployFocus', 'NcnnFocus', 'GConvFocus', 'DeployC2f']
import torch
import torch.nn as nn
from torch import Tensor
class DeployC2f(nn.Module):
def __init__(self, *args, **kwargs):
super().__init__()
def forward(self, x: Tensor) -> Tensor:
x_main = self.main_conv(x)
x_main = [x_main, x_main[:, self.mid_channels:, ...]]
x_main.extend(blocks(x_main[-1]) for blocks in self.blocks)
x_main.pop(1)
return self.final_conv(torch.cat(x_main, 1))
# Copyright (c) OpenMMLab. All rights reserved.
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import Tensor
class DeployFocus(nn.Module):
def __init__(self, orin_Focus: nn.Module):
super().__init__()
self.__dict__.update(orin_Focus.__dict__)
def forward(self, x: Tensor) -> Tensor:
batch_size, channel, height, width = x.shape
x = x.reshape(batch_size, channel, -1, 2, width)
x = x.reshape(batch_size, channel, x.shape[2], 2, -1, 2)
half_h = x.shape[2]
half_w = x.shape[4]
x = x.permute(0, 5, 3, 1, 2, 4)
x = x.reshape(batch_size, channel * 4, half_h, half_w)
return self.conv(x)
class NcnnFocus(nn.Module):
def __init__(self, orin_Focus: nn.Module):
super().__init__()
self.__dict__.update(orin_Focus.__dict__)
def forward(self, x: Tensor) -> Tensor:
batch_size, c, h, w = x.shape
assert h % 2 == 0 and w % 2 == 0, f'focus for yolox needs even feature\
height and width, got {(h, w)}.'
x = x.reshape(batch_size, c * h, 1, w)
_b, _c, _h, _w = x.shape
g = _c // 2
# fuse to ncnn's shufflechannel
x = x.view(_b, g, 2, _h, _w)
x = torch.transpose(x, 1, 2).contiguous()
x = x.view(_b, -1, _h, _w)
x = x.reshape(_b, c * h * w, 1, 1)
_b, _c, _h, _w = x.shape
g = _c // 2
# fuse to ncnn's shufflechannel
x = x.view(_b, g, 2, _h, _w)
x = torch.transpose(x, 1, 2).contiguous()
x = x.view(_b, -1, _h, _w)
x = x.reshape(_b, c * 4, h // 2, w // 2)
return self.conv(x)
class GConvFocus(nn.Module):
def __init__(self, orin_Focus: nn.Module):
super().__init__()
device = next(orin_Focus.parameters()).device
self.weight1 = torch.tensor([[1., 0], [0, 0]]).expand(3, 1, 2,
2).to(device)
self.weight2 = torch.tensor([[0, 0], [1., 0]]).expand(3, 1, 2,
2).to(device)
self.weight3 = torch.tensor([[0, 1.], [0, 0]]).expand(3, 1, 2,
2).to(device)
self.weight4 = torch.tensor([[0, 0], [0, 1.]]).expand(3, 1, 2,
2).to(device)
self.__dict__.update(orin_Focus.__dict__)
def forward(self, x: Tensor) -> Tensor:
conv1 = F.conv2d(x, self.weight1, stride=2, groups=3)
conv2 = F.conv2d(x, self.weight2, stride=2, groups=3)
conv3 = F.conv2d(x, self.weight3, stride=2, groups=3)
conv4 = F.conv2d(x, self.weight4, stride=2, groups=3)
return self.conv(torch.cat([conv1, conv2, conv3, conv4], dim=1))
# Copyright (c) OpenMMLab. All rights reserved.
from .bbox_coder import (rtmdet_bbox_decoder, yolov5_bbox_decoder,
yolox_bbox_decoder)
__all__ = ['yolov5_bbox_decoder', 'rtmdet_bbox_decoder', 'yolox_bbox_decoder']
# Copyright (c) OpenMMLab. All rights reserved.
from typing import Optional
import torch
from torch import Tensor
def yolov5_bbox_decoder(priors: Tensor, bbox_preds: Tensor,
stride: Tensor) -> Tensor:
bbox_preds = bbox_preds.sigmoid()
x_center = (priors[..., 0] + priors[..., 2]) * 0.5
y_center = (priors[..., 1] + priors[..., 3]) * 0.5
w = priors[..., 2] - priors[..., 0]
h = priors[..., 3] - priors[..., 1]
x_center_pred = (bbox_preds[..., 0] - 0.5) * 2 * stride + x_center
y_center_pred = (bbox_preds[..., 1] - 0.5) * 2 * stride + y_center
w_pred = (bbox_preds[..., 2] * 2)**2 * w
h_pred = (bbox_preds[..., 3] * 2)**2 * h
decoded_bboxes = torch.stack(
[x_center_pred, y_center_pred, w_pred, h_pred], dim=-1)
return decoded_bboxes
def rtmdet_bbox_decoder(priors: Tensor, bbox_preds: Tensor,
stride: Optional[Tensor]) -> Tensor:
stride = stride[None, :, None]
bbox_preds *= stride
tl_x = (priors[..., 0] - bbox_preds[..., 0])
tl_y = (priors[..., 1] - bbox_preds[..., 1])
br_x = (priors[..., 0] + bbox_preds[..., 2])
br_y = (priors[..., 1] + bbox_preds[..., 3])
decoded_bboxes = torch.stack([tl_x, tl_y, br_x, br_y], -1)
return decoded_bboxes
def yolox_bbox_decoder(priors: Tensor, bbox_preds: Tensor,
stride: Optional[Tensor]) -> Tensor:
stride = stride[None, :, None]
xys = (bbox_preds[..., :2] * stride) + priors
whs = bbox_preds[..., 2:].exp() * stride
decoded_bboxes = torch.cat([xys, whs], -1)
return decoded_bboxes
cmake_minimum_required(VERSION 2.8.12)
set(CMAKE_CUDA_ARCHITECTURES 60 61 62 70 72 75 86)
set(CMAKE_CUDA_COMPILER /usr/local/cuda/bin/nvcc)
project(nvdsparsebbox_mmyolo LANGUAGES CXX)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14 -O3 -g -Wall -Werror -shared -fPIC")
set(CMAKE_CXX_STANDARD 14)
set(CMAKE_BUILD_TYPE Release)
option(CUDA_USE_STATIC_CUDA_RUNTIME OFF)
# CUDA
find_package(CUDA REQUIRED)
# TensorRT
set(TensorRT_INCLUDE_DIRS "/usr/include/x86_64-linux-gnu" CACHE STRING "TensorRT headers path")
set(TensorRT_LIBRARIES "/usr/lib/x86_64-linux-gnu" CACHE STRING "TensorRT libs path")
# DeepStream
set(DEEPSTREAM "/opt/nvidia/deepstream/deepstream" CACHE STRING "DeepStream root path")
set(DS_LIBRARIES ${DEEPSTREAM}/lib)
set(DS_INCLUDE_DIRS ${DEEPSTREAM}/sources/includes)
include_directories(
${CUDA_INCLUDE_DIRS}
${TensorRT_INCLUDE_DIRS}
${DS_INCLUDE_DIRS})
add_library(
${PROJECT_NAME}
SHARED
custom_mmyolo_bbox_parser/nvdsparsebbox_mmyolo.cpp)
target_link_libraries(${PROJECT_NAME} PRIVATE nvinfer nvinfer_plugin)
# Inference MMYOLO Models with DeepStream
This project demonstrates how to inference MMYOLO models with customized parsers in [DeepStream SDK](https://developer.nvidia.com/deepstream-sdk).
## Pre-requisites
### 1. Install Nvidia Driver and CUDA
First, please follow the official documents and instructions to install dedicated Nvidia graphic driver and CUDA matched to your gpu and target Nvidia AIoT devices.
### 2. Install DeepStream SDK
Second, please follow the official instruction to download and install DeepStream SDK. Currently stable version of DeepStream is v6.2.
### 3. Generate TensorRT Engine
As DeepStream builds on top of several NVIDIA libraries, you need to first convert your trained MMYOLO models to TensorRT engine files. We strongly recommend you to try the supported TensorRT deployment solution in [EasyDeploy](../../easydeploy/).
## Build and Run
Please make sure that your converted TensorRT engine is already located in the `deepstream` folder as the config shows. Create your own model config files and change the `config-file` parameter in [deepstream_app_config.txt](deepstream_app_config.txt) to the model you want to run with.
```bash
mkdir build && cd build
cmake ..
make -j$(nproc) && make install
```
Then you can run the inference with this command.
```bash
deepstream-app -c deepstream_app_config.txt
```
## Code Structure
```bash
├── deepstream
│ ├── configs # config file for MMYOLO models
│ │ └── config_infer_rtmdet.txt
│ ├── custom_mmyolo_bbox_parser # customized parser for MMYOLO models to DeepStream formats
│ │ └── nvdsparsebbox_mmyolo.cpp
| ├── CMakeLists.txt
│ ├── coco_labels.txt # labels for coco detection
│ ├── deepstream_app_config.txt # deepStream reference app configs for MMYOLO models
│ ├── README_zh-CN.md
│ └── README.md
```
# 使用 DeepStream SDK 推理 MMYOLO 模型
本项目演示了如何使用 [DeepStream SDK](https://developer.nvidia.com/deepstream-sdk) 配合改写的 parser 来推理 MMYOLO 的模型。
## 预先准备
### 1. 安装 Nidia 驱动和 CUDA
首先请根据当前的显卡驱动和目标使用设备的驱动完成显卡驱动和 CUDA 的安装。
### 2. 安装 DeepStream SDK
目前 DeepStream SDK 稳定版本已经更新到 v6.2,官方推荐使用这个版本。
### 3. 将 MMYOLO 模型转换为 TensorRT Engine
推荐使用 EasyDeploy 中的 TensorRT 方案完成目标模型的转换部署,具体可参考 [此文档](../../easydeploy/docs/model_convert.md)
## 编译使用
当前项目使用的是 MMYOLO 的 rtmdet 模型,若想使用其他的模型,请参照目录下的配置文件进行改写。然后将转换完的 TensorRT engine 放在当前目录下并执行如下命令:
```bash
mkdir build && cd build
cmake ..
make -j$(nproc) && make install
```
完成编译后可使用如下命令进行推理:
```bash
deepstream-app -c deepstream_app_config.txt
```
## 项目代码结构
```bash
├── deepstream
│ ├── configs # MMYOLO 模型对应的 DeepStream 配置
│ │ └── config_infer_rtmdet.txt
│ ├── custom_mmyolo_bbox_parser # 适配 DeepStream formats 的 parser
│ │ └── nvdsparsebbox_mmyolo.cpp
| ├── CMakeLists.txt
│ ├── coco_labels.txt # coco labels
│ ├── deepstream_app_config.txt # DeepStream app 配置
│ ├── README_zh-CN.md
│ └── README.md
```
person
bicycle
car
motorbike
aeroplane
bus
train
truck
boat
traffic light
fire hydrant
stop sign
parking meter
bench
bird
cat
dog
horse
sheep
cow
elephant
bear
zebra
giraffe
backpack
umbrella
handbag
tie
suitcase
frisbee
skis
snowboard
sports ball
kite
baseball bat
baseball glove
skateboard
surfboard
tennis racket
bottle
wine glass
cup
fork
knife
spoon
bowl
banana
apple
sandwich
orange
broccoli
carrot
hot dog
pizza
donut
cake
chair
sofa
pottedplant
bed
diningtable
toilet
tvmonitor
laptop
mouse
remote
keyboard
cell phone
microwave
oven
toaster
sink
refrigerator
book
clock
vase
scissors
teddy bear
hair drier
toothbrush
[property]
gpu-id=0
net-scale-factor=0.01735207357279195
offsets=57.375;57.12;58.395
model-color-format=1
model-engine-file=../end2end.engine
labelfile-path=../coco_labels.txt
batch-size=1
network-mode=0
num-detected-classes=80
interval=0
gie-unique-id=1
process-mode=1
network-type=0
cluster-mode=2
maintain-aspect-ratio=1
parse-bbox-func-name=NvDsInferParseCustomMMYOLO
custom-lib-path=../build/libnvdsparsebbox_mmyolo.so
[class-attrs-all]
pre-cluster-threshold=0.45
topk=100
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment