# -*- coding: utf-8 -*- import os import time import migraphx import argparse import cv2 import numpy as np class YOLOv9: """YOLOv9 object detection model class for handling inference and visualization.""" def __init__(self, model_path, dynamic=False, conf_thres=0.6, iou_thres=0.5): """ Initializes an instance of the YOLOv9 class. Args: model_path: Path to the ONNX model. dynamic: whether use dynamic inference. conf_thres: Confidence threshold for filtering detections. iou_thres: IoU (Intersection over Union) threshold for non-maximum suppression. """ self.confThreshold = conf_thres self.nmsThreshold = iou_thres self.isDynamic = dynamic # 获取模型检测的类别信息 self.classNames = list(map(lambda x: x.strip(), open('../Resource/Models/coco.names', 'r').readlines())) # 解析推理模型 if self.isDynamic: maxInput={"images":[1,3,1024,1024]} self.model = migraphx.parse_onnx(model_path, map_input_dims=maxInput) # 获取模型输入/输出节点信息 print("inputs:") inputs = self.model.get_inputs() for key,value in inputs.items(): print("{}:{}".format(key,value)) print("outputs:") outputs = self.model.get_outputs() for key,value in outputs.items(): print("{}:{}".format(key,value)) # 获取模型的输入name self.inputName = "images" # 获取模型的输入尺寸 inputShape = inputShape=inputs[self.inputName].lens() self.inputHeight = int(inputShape[2]) self.inputWidth = int(inputShape[3]) print("inputName:{0} \ninputShape:{1}".format(self.inputName, inputShape)) else: self.model = migraphx.parse_onnx(model_path) # 获取模型输入/输出节点信息 print("inputs:") inputs = self.model.get_inputs() for key,value in inputs.items(): print("{}:{}".format(key,value)) print("outputs:") outputs = self.model.get_outputs() for key,value in outputs.items(): print("{}:{}".format(key,value)) # 获取模型的输入name self.inputName = "images" # 获取模型的输入尺寸 inputShape = inputShape=inputs[self.inputName].lens() self.inputHeight = int(inputShape[2]) self.inputWidth = int(inputShape[3]) print("inputName:{0} \ninputShape:{1}".format(self.inputName, inputShape)) # 模型编译 self.model.compile(t=migraphx.get_target("gpu"), device_id=0) # device_id: 设置GPU设备,默认为0号设备 print("Success to compile") # Generate a color palette for the classes self.color_palette = np.random.uniform(0, 255, size=(len(self.classNames), 3)) def draw_detections(self, img, box, score, class_id): """ Draws bounding boxes and labels on the input image based on the detected objects. Args: img: The input image to draw detections on. box: Detected bounding box. score: Corresponding detection score. class_id: Class ID for the detected object. Returns: None """ # Extract the coordinates of the bounding box x1, y1, w, h = box # Retrieve the color for the class ID color = self.color_palette[class_id] # Draw the bounding box on the image cv2.rectangle(img, (int(x1), int(y1)), (int(x1 + w), int(y1 + h)), color, 2) # Create the label text with class name and score label = f'{self.classNames[class_id]}: {score:.2f}' # Calculate the dimensions of the label text (label_width, label_height), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1) # Calculate the position of the label text label_x = x1 label_y = y1 - 10 if y1 - 10 > label_height else y1 + 10 # Draw a filled rectangle as the background for the label text cv2.rectangle(img, (label_x, label_y - label_height), (label_x + label_width, label_y + label_height), color, cv2.FILLED) # Draw the label text on the image cv2.putText(img, label, (label_x, label_y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA) def preprocess(self, image): """ Preprocesses the input image before performing inference. Returns: image_data: Preprocessed image data ready for inference. """ # Read the input image using OpenCV # self.img = cv2.imread(self.input_image) self.img = image # Get the height and width of the input image self.img_height, self.img_width = self.img.shape[:2] # Convert the image color space from BGR to RGB img = cv2.cvtColor(self.img, cv2.COLOR_BGR2RGB) # Resize the image to match the input shape img = cv2.resize(img, (self.inputWidth, self.inputHeight)) # Normalize the image data by dividing it by 255.0 image_data = np.array(img) / 255.0 # Transpose the image to have the channel dimension as the first dimension image_data = np.transpose(image_data, (2, 0, 1)) # Channel first # Expand the dimensions of the image data to match the expected input shape image_data = np.expand_dims(image_data, axis=0).astype(np.float32) # Make array memery contiguous image_data = np.ascontiguousarray(image_data) # Return the preprocessed image data return image_data def postprocess(self, input_image, output): """ Performs post-processing on the model's output to extract bounding boxes, scores, and class IDs. Args: input_image (numpy.ndarray): The input image. output (numpy.ndarray): The output of the model. Returns: numpy.ndarray: The input image with detections drawn on it. """ # Transpose and squeeze the output to match the expected shape outputs = np.transpose(np.squeeze(output[0])) # Get the number of rows in the outputs array rows = outputs.shape[0] # Lists to store the bounding boxes, scores, and class IDs of the detections boxes = [] scores = [] class_ids = [] # Calculate the scaling factors for the bounding box coordinates x_factor = self.img_width / self.inputWidth y_factor = self.img_height / self.inputHeight # Iterate over each row in the outputs array for i in range(rows): # Extract the class scores from the current row classes_scores = outputs[i][4:] # Find the maximum score among the class scores max_score = np.amax(classes_scores) # If the maximum score is above the confidence threshold if max_score >= self.confThreshold: # Get the class ID with the highest score class_id = np.argmax(classes_scores) # Extract the bounding box coordinates from the current row x, y, w, h = outputs[i][0], outputs[i][1], outputs[i][2], outputs[i][3] # Calculate the scaled coordinates of the bounding box left = int((x - w / 2) * x_factor) top = int((y - h / 2) * y_factor) width = int(w * x_factor) height = int(h * y_factor) # Add the class ID, score, and box coordinates to the respective lists class_ids.append(class_id) scores.append(max_score) boxes.append([left, top, width, height]) # Apply non-maximum suppression to filter out overlapping bounding boxes indices = cv2.dnn.NMSBoxes(boxes, scores, self.confThreshold, self.nmsThreshold) # Iterate over the selected indices after non-maximum suppression for i in indices: # Get the box, score, and class ID corresponding to the index box = boxes[i] score = scores[i] class_id = class_ids[i] # Draw the detection on the input image self.draw_detections(input_image, box, score, class_id) # Return the modified input image return input_image def detect(self, image, input_shape=None): if(self.isDynamic): self.inputWidth = input_shape[3] self.inputHeight = input_shape[2] # 输入图片预处理 input_img = self.preprocess(image) # 执行推理 start = time.time() result = self.model.run({self.inputName: input_img}) print('net forward time: {:.4f}'.format(time.time() - start)) # 模型输出结果后处理 dstimg = self.postprocess(image, result) return dstimg def read_images(image_path): image_lists = [] for image_name in os.listdir(image_path): image = cv2.imread(image_path +"/" + image_name, 1) image_lists.append(image) return image_lists def yolov9_Static(imgpath, modelpath, confThreshold, nmsThreshold): yolov9_detector = YOLOv9(modelpath, False, conf_thres=confThreshold, iou_thres=nmsThreshold) srcimg = cv2.imread(imgpath, 1) dstimg = yolov9_detector.detect(srcimg) # 保存检测结果 cv2.imwrite("./Result.jpg", dstimg) print("Success to save result") def yolov9_dynamic(imgpath, modelpath, confThreshold, nmsThreshold): # 设置动态输入shape input_shapes = [] input_shapes.append([1,3,416,416]) input_shapes.append([1,3,608,608]) # 读取测试图像 image_lists = read_images(imgpath) # 推理 yolov9_detector = YOLOv9(modelpath, True, conf_thres=confThreshold, iou_thres=nmsThreshold) for i, image in enumerate(image_lists): print("Start to inference image{}".format(i)) dstimg = yolov9_detector.detect(image, input_shapes[i]) # 保存检测结果 result_name = "Result{}.jpg".format(i) cv2.imwrite(result_name, dstimg) print("Success to save results") if __name__ == '__main__': # Create an argument parser to handle command-line arguments parser = argparse.ArgumentParser() parser.add_argument('--imgPath', type=str, default='../Resource/Images/image_test.jpg', help="image path") parser.add_argument('--imgFolderPath', type=str, default='../Resource/Images/DynamicPics', help="image folder path") parser.add_argument('--staticModelPath', type=str, default='../Resource/Models/yolov9-c.onnx', help="static onnx filepath") parser.add_argument('--dynamicModelPath', type=str, default='../Resource/Models/yolov9-c-dynamic.onnx', help="dynamic onnx filepath") parser.add_argument('--confThreshold', default=0.6, type=float, help='class confidence') parser.add_argument('--nmsThreshold', default=0.5, type=float, help='nms iou thresh') parser.add_argument("--staticInfer",action="store_true",default=False,help="Performing static inference") parser.add_argument("--dynamicInfer",action="store_true",default=False,help="Performing dynamic inference") args = parser.parse_args() # 静态推理 if args.staticInfer: yolov9_Static(args.imgPath, args.staticModelPath, args.confThreshold, args.nmsThreshold) # 动态推理 if args.dynamicInfer: yolov9_dynamic(args.imgFolderPath, args.dynamicModelPath, args.confThreshold, args.nmsThreshold)