YoloV8_infer_migraphx.py

# -*- coding: utf-8 -*-
import os
import time
import migraphx
import argparse
import cv2
import numpy as np


class YOLOv8:
    """YOLOv8 object detection model class for handling inference and visualization."""

    def __init__(self, model_path, dynamic=False, conf_thres=0.5, iou_thres=0.5):
        """
        Initializes an instance of the YOLOv8 class.

        Args:
            model_path: Path to the ONNX model.
            dynamic: whether use dynamic inference.
            conf_thres: Confidence threshold for filtering detections.
            iou_thres: IoU (Intersection over Union) threshold for non-maximum suppression.
        """
        self.confThreshold = conf_thres
        self.nmsThreshold = iou_thres
        self.isDynamic = dynamic
        # 获取模型检测的类别信息
        self.classNames = list(map(lambda x: x.strip(), open('../Resource/Models/coco.names', 'r').readlines()))

        # 解析推理模型
        if self.isDynamic:
            maxInput={"images":[1,3,1024,1024]}
            self.model = migraphx.parse_onnx(model_path, map_input_dims=maxInput)

            # 获取模型输入/输出节点信息
            print("inputs:")
            inputs = self.model.get_inputs()
            for key,value in inputs.items():
                print("{}:{}".format(key,value))
            
            print("outputs:")
            outputs = self.model.get_outputs()
            for key,value in outputs.items():
                print("{}:{}".format(key,value))

            # 获取模型的输入name
            self.inputName = "images"
            
            # 获取模型的输入尺寸
            inputShape = inputShape=inputs[self.inputName].lens()
            self.inputHeight = int(inputShape[2])
            self.inputWidth = int(inputShape[3])
            print("inputName:{0} \ninputShape:{1}".format(self.inputName, inputShape))
        else:
            self.model = migraphx.parse_onnx(model_path) 
            # 获取模型输入/输出节点信息
            print("inputs:")
            inputs = self.model.get_inputs()
            for key,value in inputs.items():
                print("{}:{}".format(key,value))
            
            print("outputs:")
            outputs = self.model.get_outputs()
            for key,value in outputs.items():
                print("{}:{}".format(key,value))

            # 获取模型的输入name
            self.inputName = "images"

            # 获取模型的输入尺寸
            inputShape = inputShape=inputs[self.inputName].lens()
            self.inputHeight = int(inputShape[2])
            self.inputWidth = int(inputShape[3])
            print("inputName:{0} \ninputShape:{1}".format(self.inputName, inputShape))
        
        # 模型编译
        self.model.compile(t=migraphx.get_target("gpu"), device_id=0)  # device_id: 设置GPU设备，默认为0号设备
        print("Success to compile")

        # Generate a color palette for the classes
        self.color_palette = np.random.uniform(0, 255, size=(len(self.classNames), 3))

    def draw_detections(self, img, box, score, class_id):
        """
        Draws bounding boxes and labels on the input image based on the detected objects.

        Args:
            img: The input image to draw detections on.
            box: Detected bounding box.
            score: Corresponding detection score.
            class_id: Class ID for the detected object.

        Returns:
            None
        """

        # Extract the coordinates of the bounding box
        x1, y1, w, h = box

        # Retrieve the color for the class ID
        color = self.color_palette[class_id]

        # Draw the bounding box on the image
        cv2.rectangle(img, (int(x1), int(y1)), (int(x1 + w), int(y1 + h)), color, 2)

        # Create the label text with class name and score
        label = f'{self.classNames[class_id]}: {score:.2f}'

        # Calculate the dimensions of the label text
        (label_width, label_height), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)

        # Calculate the position of the label text
        label_x = x1
        label_y = y1 - 10 if y1 - 10 > label_height else y1 + 10

        # Draw a filled rectangle as the background for the label text
        cv2.rectangle(img, (label_x, label_y - label_height), (label_x + label_width, label_y + label_height), color,
                      cv2.FILLED)

        # Draw the label text on the image
        cv2.putText(img, label, (label_x, label_y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA)

    def preprocess(self, image):
        """
        Preprocesses the input image before performing inference.

        Returns:
            image_data: Preprocessed image data ready for inference.
        """
        # Read the input image using OpenCV
        # self.img = cv2.imread(self.input_image)
        self.img = image

        # Get the height and width of the input image
        self.img_height, self.img_width = self.img.shape[:2]

        # Convert the image color space from BGR to RGB
        img = cv2.cvtColor(self.img, cv2.COLOR_BGR2RGB)

        # Resize the image to match the input shape
        img = cv2.resize(img, (self.inputWidth, self.inputHeight))

        # Normalize the image data by dividing it by 255.0
        image_data = np.array(img) / 255.0

        # Transpose the image to have the channel dimension as the first dimension
        image_data = np.transpose(image_data, (2, 0, 1))  # Channel first

        # Expand the dimensions of the image data to match the expected input shape
        image_data = np.expand_dims(image_data, axis=0).astype(np.float32)

        # Make array memery contiguous
        image_data = np.ascontiguousarray(image_data)

        # Return the preprocessed image data
        return image_data

    def postprocess(self, input_image, output):
        """
        Performs post-processing on the model's output to extract bounding boxes, scores, and class IDs.

        Args:
            input_image (numpy.ndarray): The input image.
            output (numpy.ndarray): The output of the model.

        Returns:
            numpy.ndarray: The input image with detections drawn on it.
        """

        # Transpose and squeeze the output to match the expected shape
        outputs = np.transpose(np.squeeze(output[0]))

        # Get the number of rows in the outputs array
        rows = outputs.shape[0]

        # Lists to store the bounding boxes, scores, and class IDs of the detections
        boxes = []
        scores = []
        class_ids = []

        # Calculate the scaling factors for the bounding box coordinates
        x_factor = self.img_width / self.inputWidth
        y_factor = self.img_height / self.inputHeight

        # Iterate over each row in the outputs array
        for i in range(rows):
            # Extract the class scores from the current row
            classes_scores = outputs[i][4:]

            # Find the maximum score among the class scores
            max_score = np.amax(classes_scores)

            # If the maximum score is above the confidence threshold
            if max_score >= self.confThreshold:
                # Get the class ID with the highest score
                class_id = np.argmax(classes_scores)

                # Extract the bounding box coordinates from the current row
                x, y, w, h = outputs[i][0], outputs[i][1], outputs[i][2], outputs[i][3]

                # Calculate the scaled coordinates of the bounding box
                left = int((x - w / 2) * x_factor)
                top = int((y - h / 2) * y_factor)
                width = int(w * x_factor)
                height = int(h * y_factor)

                # Add the class ID, score, and box coordinates to the respective lists
                class_ids.append(class_id)
                scores.append(max_score)
                boxes.append([left, top, width, height])

        # Apply non-maximum suppression to filter out overlapping bounding boxes
        indices = cv2.dnn.NMSBoxes(boxes, scores, self.confThreshold, self.nmsThreshold)

        # Iterate over the selected indices after non-maximum suppression
        for i in indices:
            # Get the box, score, and class ID corresponding to the index
            box = boxes[i]
            score = scores[i]
            class_id = class_ids[i]

            # Draw the detection on the input image
            self.draw_detections(input_image, box, score, class_id)

        # Return the modified input image
        return input_image

    def detect(self, image, input_shape=None):
        if(self.isDynamic):
            self.inputWidth = input_shape[3]
            self.inputHeight = input_shape[2]
        # 输入图片预处理
        input_img = self.preprocess(image)

        # 执行推理
        start = time.time()
        result = self.model.run({self.inputName: input_img})
        print('net forward time: {:.4f}'.format(time.time() - start))
        # 模型输出结果后处理
        dstimg = self.postprocess(image, result)

        return dstimg


def read_images(image_path):
    image_lists = []
    
    for image_name in os.listdir(image_path):
        image = cv2.imread(image_path +"/" + image_name, 1)
        image_lists.append(image)
        
    return image_lists

def yolov8_Static(imgpath, modelpath, confThreshold, nmsThreshold):
    yolov8_detector = YOLOv8(modelpath, False, conf_thres=confThreshold,
                             iou_thres=nmsThreshold)
    srcimg = cv2.imread(imgpath, 1)

    dstimg = yolov8_detector.detect(srcimg)

    # 保存检测结果
    cv2.imwrite("./Result.jpg", dstimg)
    print("Success to save result")


def yolov8_dynamic(imgpath, modelpath, confThreshold, nmsThreshold):
    # 设置动态输入shape
    input_shapes = []
    input_shapes.append([1,3,416,416])
    input_shapes.append([1,3,608,608])
    
    # 读取测试图像
    image_lists = read_images(imgpath)
    
    # 推理
    yolov8_detector = YOLOv8(modelpath, True, 
                                    conf_thres=confThreshold, iou_thres=nmsThreshold)
    for i, image in enumerate(image_lists):
        print("Start to inference image{}".format(i))
        dstimg = yolov8_detector.detect(image, input_shapes[i])
        
        # 保存检测结果
        result_name = "Result{}.jpg".format(i)
        cv2.imwrite(result_name, dstimg)
    
    print("Success to save results")

if __name__ == '__main__':
    # Create an argument parser to handle command-line arguments
    parser = argparse.ArgumentParser()
    parser.add_argument('--imgPath', type=str, default='../Resource/Images/image_test.jpg', help="image path")
    parser.add_argument('--imgFolderPath', type=str, default='../Resource/Images/DynamicPics', help="image folder path")
    parser.add_argument('--staticModelPath', type=str, default='../Resource/Models/yolov8n_static.onnx', help="static onnx filepath")
    parser.add_argument('--dynamicModelPath', type=str, default='../Resource/Models/yolov8n_dynamic.onnx', help="dynamic onnx filepath")
    parser.add_argument('--confThreshold', default=0.5, type=float, help='class confidence')
    parser.add_argument('--nmsThreshold', default=0.5, type=float, help='nms iou thresh')
    parser.add_argument("--staticInfer",action="store_true",default=False,help="Performing static inference")
    parser.add_argument("--dynamicInfer",action="store_true",default=False,help="Performing dynamic inference")
    args = parser.parse_args()
    
    # 静态推理
    if args.staticInfer:
        yolov8_Static(args.imgPath, args.staticModelPath, args.confThreshold, args.nmsThreshold)
    # 动态推理
    if args.dynamicInfer:
        yolov8_dynamic(args.imgFolderPath, args.dynamicModelPath, args.confThreshold, args.nmsThreshold)