提交yolov3推理代码

63d5904a · Your Name · af0ab330 · 63d5904a · 63d5904a · 63d5904a
Commit 63d5904a authored May 23, 2023 by Your Name
7 changed files
--- a/README.md
+++ b/README.md
-# YoloV3_MIGraphX
+# YoloV3

-This project builds yolov3 inference example based on migraphx
\ No newline at end of file
+## 模型介绍
+
+YOLOV3是由Joseph Redmon和Ali Farhadi在2018年提出的单阶段目标检测模型，主要用于自然场景的目标检测。
+
+## 模型结构
+
+算法基本思想首先通过特征提取网络对输入提取特征，backbone部分由YOLOV2时期的Darknet19进化至Darknet53加深了网络层数，引入了Resnet中的跨层加和操作；然后结合不同卷积层的特征实现多尺度训练，一共有13x13、26x26、52x52三种分辨率，分别用来预测大、中、小的物体；每种分辨率的特征图将输入图像分成不同数量的格子，每个格子预测B个bounding box，每个bounding box预测内容包括: Location(x, y, w, h)、Confidence Score和C个类别的概率，因此YOLOv3输出层的channel数为B*(5 + C)。YOLOv3的loss函数也有三部分组成：Location误差，Confidence误差和分类误差。参考论文地址：https://arxiv.org/abs/1804.02767
+
+## 推理
+
+### 环境配置
+
+在[光源](https://www.sourcefind.cn/#/image/dcu/custom)可拉取用于推理的docker镜像，YoloV3模型推理推荐的镜像如下：
+
+```
+docker pull image.sourcefind.cn:5000/dcu/admin/base/custom:ort_dcu_1.14.0_migraphx2.5.2_dtk22.10.1
+```
+
+在[光合开发者社区](https://cancon.hpccube.com:65024/4/main/)可下载MIGraphX安装包，python依赖安装：
+
+```
+pip install -r requirement.txt
+```
+
+### 运行示例
+
+YoloV3模型的推理示例程序是YoloV3_infer_migraphx.py，使用如下命令运行该推理示例：
+
+```
+python YoloV3_infer_migraphx.py \
+	--imgpath 测试图像路径 \ 
+	--modelpath onnx模型路径 \
+	--objectThreshold 判断是否有物体阈值，默认0.4 \
+	--confThreshold 置信度阈值，默认0.2 \
+	--nmsThreshold nms阈值，默认0.4 \
+```
+
+程序运行结束会在当前目录生成YOLOV3检测结果图像。
+
+<img src="./images/Result.jpg" alt="Result" style="zoom: 67%;" />
+
+## 历史版本
+
+		https://developer.hpccube.com/codes/modelzoo/yolov3_migraphx
+
+## 参考
+
+		https://github.com/ultralytics/yolov3
--- a/YoloV3_infer_migraphx.py
+++ b/YoloV3_infer_migraphx.py
+# -*- coding: utf-8 -*-
+import cv2
+import numpy as np
+import os
+import argparse
+import time
+import migraphx
+
+
+class YOLOv3:
+    def __init__(self, path, obj_thres=0.5, conf_thres=0.25, iou_thres=0.5):
+        self.objectThreshold = obj_thres
+        self.confThreshold = conf_thres
+        self.nmsThreshold = iou_thres
+
+        # 获取模型检测的类别信息
+        self.classNames = list(map(lambda x: x.strip(), open('./weights/coco.names', 'r').readlines()))
+
+        # 解析推理模型
+        self.model = migraphx.parse_onnx(path)
+
+        # 获取模型的输入name
+        self.inputName = self.model.get_parameter_names()[0]
+
+        # 获取模型的输入尺寸
+        inputShape = self.model.get_parameter_shapes()[self.inputName].lens()
+        self.inputHeight = int(inputShape[2])
+        self.inputWidth = int(inputShape[3])
+        print("inputName:{0} \ninputShape:{1}".format(self.inputName, inputShape))
+
+    def detect(self, image):
+        # 输入图片预处理
+        input_img = self.prepare_input(image)
+
+        # 模型编译
+        self.model.compile(t=migraphx.get_target("gpu"), device_id=0)  # device_id: 设置GPU设备，默认为0号设备
+        print("Success to compile")
+        # 执行推理
+        print("Start to inference")
+        start = time.time()
+        result = self.model.run({self.model.get_parameter_names()[0]: migraphx.argument(input_img)})
+        print('net forward time: {:.4f}'.format(time.time() - start))
+        # 模型输出结果后处理
+        boxes, scores, class_ids = self.process_output(result)
+
+        return boxes, scores, class_ids
+
+    def prepare_input(self, image):
+        self.img_height, self.img_width = image.shape[:2]
+        input_img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+        input_img = cv2.resize(input_img, (self.inputWidth, self.inputHeight))
+        input_img = input_img.transpose(2, 0, 1)
+        input_img = np.expand_dims(input_img, 0)
+        input_img = np.ascontiguousarray(input_img)
+        input_img = input_img.astype(np.float32)
+        input_img = input_img / 255
+
+        return input_img
+
+    def process_output(self, output):
+        predictions = np.squeeze(output[0])
+
+        # 筛选包含物体的anchor
+        obj_conf = predictions[:, 4]
+        predictions = predictions[obj_conf > self.objectThreshold]
+        obj_conf = obj_conf[obj_conf > self.objectThreshold]
+
+        # 筛选大于置信度阈值的anchor
+        predictions[:, 5:] *= obj_conf[:, np.newaxis]
+        scores = np.max(predictions[:, 5:], axis=1)
+        valid_scores = scores > self.confThreshold
+        predictions = predictions[valid_scores]
+        scores = scores[valid_scores]
+
+        # 获取最高置信度分数对应的类别ID
+        class_ids = np.argmax(predictions[:, 5:], axis=1)
+
+        # 获取每个物体对应的anchor
+        boxes = self.extract_boxes(predictions)
+
+        # 执行非极大值抑制消除冗余anchor
+        indices = cv2.dnn.NMSBoxes(boxes.tolist(), scores.tolist(), self.confThreshold, self.nmsThreshold).flatten()
+
+        return boxes[indices], scores[indices], class_ids[indices]
+
+    def extract_boxes(self, predictions):
+        # 获取anchor的坐标信息
+        boxes = predictions[:, :4]
+
+        # 将anchor的坐标信息映射到输入image
+        boxes = self.rescale_boxes(boxes)
+
+        # 格式转换
+        boxes_ = np.copy(boxes)
+        boxes_[..., 0] = boxes[..., 0] - boxes[..., 2] * 0.5
+        boxes_[..., 1] = boxes[..., 1] - boxes[..., 3] * 0.5
+        return boxes_
+
+    def rescale_boxes(self, boxes):
+        # 对anchor尺寸进行变换
+        input_shape = np.array([self.inputWidth, self.inputHeight, self.inputWidth, self.inputHeight])
+        boxes = np.divide(boxes, input_shape, dtype=np.float32)
+        boxes *= np.array([self.img_width, self.img_height, self.img_width, self.img_height])
+        return boxes
+
+    def draw_detections(self, image, boxes, scores, class_ids):
+        for box, score, class_id in zip(boxes, scores, class_ids):
+            cx, cy, w, h = box.astype(int)
+
+            # 绘制检测物体框
+            cv2.rectangle(image, (cx, cy), (cx + w, cy + h), (0, 255, 255), thickness=2)
+            label = self.classNames[class_id]
+            label = f'{label} {score:.2f}'
+            labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
+            cv2.putText(image, label, (cx, cy - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), thickness=2)
+        return image
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--imgpath', type=str, default='./images/dog.jpg', help="image path")
+    parser.add_argument('--modelpath', type=str, default='./weights/yolov3-tiny.onnx', help="onnx filepath")
+    parser.add_argument('--objectThreshold', default=0.4, type=float, help='class confidence')
+    parser.add_argument('--confThreshold', default=0.2, type=float, help='class confidence')
+    parser.add_argument('--nmsThreshold', default=0.4, type=float, help='nms iou thresh')
+    args = parser.parse_args()
+
+    yolov3_detector = YOLOv3(args.modelpath, obj_thres=args.objectThreshold, conf_thres=args.confThreshold,
+                             iou_thres=args.nmsThreshold)
+    srcimg = cv2.imread(args.imgpath, 1)
+
+    boxes, scores, class_ids = yolov3_detector.detect(srcimg)
+
+    dstimg = yolov3_detector.draw_detections(srcimg, boxes, scores, class_ids)
+
+    # 保存检测结果
+    cv2.imwrite("./Result.jpg", dstimg)
+    print("Success to save result")
+
+
+
+
+
+
+
+
+
+
+
+
--- a/images/Result.jpg
+++ b/images/Result.jpg
--- a/images/dog.jpg
+++ b/images/dog.jpg
--- a/requirements.txt
+++ b/requirements.txt
+opencv-contrib-python
+numpy
+os
+argparse
+time
\ No newline at end of file
--- a/weights/coco.names
+++ b/weights/coco.names
+person
+bicycle
+car
+motorbike
+aeroplane
+bus
+train
+truck
+boat
+traffic light
+fire hydrant
+stop sign
+parking meter
+bench
+bird
+cat
+dog
+horse
+sheep
+cow
+elephant
+bear
+zebra
+giraffe
+backpack
+umbrella
+handbag
+tie
+suitcase
+frisbee
+skis
+snowboard
+sports ball
+kite
+baseball bat
+baseball glove
+skateboard
+surfboard
+tennis racket
+bottle
+wine glass
+cup
+fork
+knife
+spoon
+bowl
+banana
+apple
+sandwich
+orange
+broccoli
+carrot
+hot dog
+pizza
+donut
+cake
+chair
+sofa
+pottedplant
+bed
+diningtable
+toilet
+tvmonitor
+laptop
+mouse
+remote
+keyboard
+cell phone
+microwave
+oven
+toaster
+sink
+refrigerator
+book
+clock
+vase
+scissors
+teddy bear
+hair drier
+toothbrush
--- a/weights/yolov3-tiny.onnx
+++ b/weights/yolov3-tiny.onnx