Unverified Commit dc92961d authored by turneram's avatar turneram Committed by GitHub
Browse files

YoloV4 example (#828)

Created a YoloV4 example
parent b45f7239
# YoloV4 Object Detection
The notebook [yolov4_inference.ipynb](./yolov4_inference.ipynb) is intended to be an example of how to use MIGraphX to perform object detection. The model used within is a pre-trained yolov4 from the ONNX model zoo.
## Run the Notebook
To run the example notebook, simply issue the following command from this directory:
```
$ jupyter notebook yolov4_inference.ipynb
```
# All pre- and post-processing methods used below are borrowed from the ONNX MOdel Zoo
# https://github.com/onnx/models/tree/master/vision/object_detection_segmentation/yolov4
import numpy as np
import cv2
from scipy import special
import colorsys
import random
# this function is from tensorflow-yolov4-tflite/core/utils.py
def image_preprocess(image, target_size, gt_boxes=None):
ih, iw = target_size
h, w, _ = image.shape
scale = min(iw / w, ih / h)
nw, nh = int(scale * w), int(scale * h)
image_resized = cv2.resize(image, (nw, nh))
image_padded = np.full(shape=[ih, iw, 3], fill_value=128.0)
dw, dh = (iw - nw) // 2, (ih - nh) // 2
image_padded[dh:nh + dh, dw:nw + dw, :] = image_resized
image_padded = image_padded / 255.
if gt_boxes is None:
return image_padded
else:
gt_boxes[:, [0, 2]] = gt_boxes[:, [0, 2]] * scale + dw
gt_boxes[:, [1, 3]] = gt_boxes[:, [1, 3]] * scale + dh
return image_padded, gt_boxes
def get_anchors(anchors_path, tiny=False):
'''loads the anchors from a file'''
with open(anchors_path) as f:
anchors = f.readline()
anchors = np.array(anchors.split(','), dtype=np.float32)
return anchors.reshape(3, 3, 2)
def postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE=[1, 1, 1]):
'''define anchor boxes'''
for i, pred in enumerate(pred_bbox):
conv_shape = pred.shape
output_size = conv_shape[1]
conv_raw_dxdy = pred[:, :, :, :, 0:2]
conv_raw_dwdh = pred[:, :, :, :, 2:4]
xy_grid = np.meshgrid(np.arange(output_size), np.arange(output_size))
xy_grid = np.expand_dims(np.stack(xy_grid, axis=-1), axis=2)
xy_grid = np.tile(np.expand_dims(xy_grid, axis=0), [1, 1, 1, 3, 1])
xy_grid = xy_grid.astype(np.float)
pred_xy = ((special.expit(conv_raw_dxdy) * XYSCALE[i]) - 0.5 *
(XYSCALE[i] - 1) + xy_grid) * STRIDES[i]
pred_wh = (np.exp(conv_raw_dwdh) * ANCHORS[i])
pred[:, :, :, :, 0:4] = np.concatenate([pred_xy, pred_wh], axis=-1)
pred_bbox = [np.reshape(x, (-1, np.shape(x)[-1])) for x in pred_bbox]
pred_bbox = np.concatenate(pred_bbox, axis=0)
return pred_bbox
def postprocess_boxes(pred_bbox, org_img_shape, input_size, score_threshold):
'''remove boundary boxs with a low detection probability'''
valid_scale = [0, np.inf]
pred_bbox = np.array(pred_bbox)
pred_xywh = pred_bbox[:, 0:4]
pred_conf = pred_bbox[:, 4]
pred_prob = pred_bbox[:, 5:]
# (1) (x, y, w, h) --> (xmin, ymin, xmax, ymax)
pred_coor = np.concatenate([
pred_xywh[:, :2] - pred_xywh[:, 2:] * 0.5,
pred_xywh[:, :2] + pred_xywh[:, 2:] * 0.5
],
axis=-1)
# (2) (xmin, ymin, xmax, ymax) -> (xmin_org, ymin_org, xmax_org, ymax_org)
org_h, org_w = org_img_shape
resize_ratio = min(input_size / org_w, input_size / org_h)
dw = (input_size - resize_ratio * org_w) / 2
dh = (input_size - resize_ratio * org_h) / 2
pred_coor[:, 0::2] = 1.0 * (pred_coor[:, 0::2] - dw) / resize_ratio
pred_coor[:, 1::2] = 1.0 * (pred_coor[:, 1::2] - dh) / resize_ratio
# (3) clip some boxes that are out of range
pred_coor = np.concatenate([
np.maximum(pred_coor[:, :2], [0, 0]),
np.minimum(pred_coor[:, 2:], [org_w - 1, org_h - 1])
],
axis=-1)
invalid_mask = np.logical_or((pred_coor[:, 0] > pred_coor[:, 2]),
(pred_coor[:, 1] > pred_coor[:, 3]))
pred_coor[invalid_mask] = 0
# (4) discard some invalid boxes
bboxes_scale = np.sqrt(
np.multiply.reduce(pred_coor[:, 2:4] - pred_coor[:, 0:2], axis=-1))
scale_mask = np.logical_and((valid_scale[0] < bboxes_scale),
(bboxes_scale < valid_scale[1]))
# (5) discard some boxes with low scores
classes = np.argmax(pred_prob, axis=-1)
scores = pred_conf * pred_prob[np.arange(len(pred_coor)), classes]
score_mask = scores > score_threshold
mask = np.logical_and(scale_mask, score_mask)
coors, scores, classes = pred_coor[mask], scores[mask], classes[mask]
return np.concatenate(
[coors, scores[:, np.newaxis], classes[:, np.newaxis]], axis=-1)
def bboxes_iou(boxes1, boxes2):
'''calculate the Intersection Over Union value'''
boxes1 = np.array(boxes1)
boxes2 = np.array(boxes2)
boxes1_area = (boxes1[..., 2] - boxes1[..., 0]) * (boxes1[..., 3] -
boxes1[..., 1])
boxes2_area = (boxes2[..., 2] - boxes2[..., 0]) * (boxes2[..., 3] -
boxes2[..., 1])
left_up = np.maximum(boxes1[..., :2], boxes2[..., :2])
right_down = np.minimum(boxes1[..., 2:], boxes2[..., 2:])
inter_section = np.maximum(right_down - left_up, 0.0)
inter_area = inter_section[..., 0] * inter_section[..., 1]
union_area = boxes1_area + boxes2_area - inter_area
ious = np.maximum(1.0 * inter_area / union_area, np.finfo(np.float32).eps)
return ious
def nms(bboxes, iou_threshold, sigma=0.3, method='nms'):
"""
:param bboxes: (xmin, ymin, xmax, ymax, score, class)
Note: soft-nms, https://arxiv.org/pdf/1704.04503.pdf
https://github.com/bharatsingh430/soft-nms
"""
classes_in_img = list(set(bboxes[:, 5]))
best_bboxes = []
for cls in classes_in_img:
cls_mask = (bboxes[:, 5] == cls)
cls_bboxes = bboxes[cls_mask]
while len(cls_bboxes) > 0:
max_ind = np.argmax(cls_bboxes[:, 4])
best_bbox = cls_bboxes[max_ind]
best_bboxes.append(best_bbox)
cls_bboxes = np.concatenate(
[cls_bboxes[:max_ind], cls_bboxes[max_ind + 1:]])
iou = bboxes_iou(best_bbox[np.newaxis, :4], cls_bboxes[:, :4])
weight = np.ones((len(iou), ), dtype=np.float32)
assert method in ['nms', 'soft-nms']
if method == 'nms':
iou_mask = iou > iou_threshold
weight[iou_mask] = 0.0
if method == 'soft-nms':
weight = np.exp(-(1.0 * iou**2 / sigma))
cls_bboxes[:, 4] = cls_bboxes[:, 4] * weight
score_mask = cls_bboxes[:, 4] > 0.
cls_bboxes = cls_bboxes[score_mask]
return best_bboxes
def read_class_names(class_file_name):
'''loads class name from a file'''
names = {}
with open(class_file_name, 'r') as data:
for ID, name in enumerate(data):
names[ID] = name.strip('\n')
return names
def draw_bbox(image,
bboxes,
classes=read_class_names("./utilities/coco.names"),
show_label=True):
"""
bboxes: [x_min, y_min, x_max, y_max, probability, cls_id] format coordinates.
"""
num_classes = len(classes)
image_h, image_w, _ = image.shape
hsv_tuples = [(1.0 * x / num_classes, 1., 1.) for x in range(num_classes)]
colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
colors = list(
map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
colors))
random.seed(0)
random.shuffle(colors)
random.seed(None)
for i, bbox in enumerate(bboxes):
coor = np.array(bbox[:4], dtype=np.int32)
fontScale = 0.5
score = bbox[4]
class_ind = int(bbox[5])
bbox_color = colors[class_ind]
bbox_thick = int(0.6 * (image_h + image_w) / 600)
c1, c2 = (coor[0], coor[1]), (coor[2], coor[3])
cv2.rectangle(image, c1, c2, bbox_color, bbox_thick)
if show_label:
bbox_mess = '%s: %.2f' % (classes[class_ind], score)
t_size = cv2.getTextSize(bbox_mess,
0,
fontScale,
thickness=bbox_thick // 2)[0]
cv2.rectangle(image, c1,
(c1[0] + t_size[0], c1[1] - t_size[1] - 3),
bbox_color, -1)
cv2.putText(image,
bbox_mess, (c1[0], c1[1] - 2),
cv2.FONT_HERSHEY_SIMPLEX,
fontScale, (0, 0, 0),
bbox_thick // 2,
lineType=cv2.LINE_AA)
return image
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Object Detection with YoloV4\n",
"This notebook is intended to be an example of how to use MIGraphX to perform object detection. The model used below is a pre-trained yolov4 from the ONNX model zoo. "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Download dependencies"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import os.path\n",
"\n",
"if not os.path.exists(\"./utilities/coco.names\"):\n",
" !wget https://github.com/onnx/models/raw/master/vision/object_detection_segmentation/yolov4/dependencies/coco.names -P ./utilities/\n",
"if not os.path.exists(\"./utilities/yolov4_anchors.txt\"):\n",
" !wget https://github.com/onnx/models/raw/master/vision/object_detection_segmentation/yolov4/dependencies/yolov4_anchors.txt -P ./utilities/\n",
"if not os.path.exists(\"./utilities/input.jpg\"):\n",
" # The image used is from the COCO dataset (https://cocodataset.org/#explore)\n",
" # Other images can be tested by replacing the link below\n",
" image_link = \"https://farm3.staticflickr.com/2009/2306189268_88cc86b30f_z.jpg\"\n",
" !wget -O ./utilities/input.jpg $image_link\n",
"if not os.path.exists(\"./utilities/yolov4.onnx\"):\n",
" !wget https://github.com/onnx/models/raw/master/vision/object_detection_segmentation/yolov4/model/yolov4.onnx -P ./utilities/"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Serialize model using MIGraphX Driver\n",
"Please refer to the [MIGraphX Driver example](../../migraphx/migraphx_driver) if you would like more information about this tool."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"if not os.path.exists(\"yolov4_fp16.msgpack\"):\n",
" !/opt/rocm/bin/migraphx-driver compile ./utilities/yolov4.onnx --gpu --enable-offload-copy --fp16ref --binary -o yolov4_fp16.msgpack\n",
"if not os.path.exists(\"yolov4.msgpack\"):\n",
" !/opt/rocm/bin/migraphx-driver compile ./utilities/yolov4.onnx --gpu --enable-offload-copy --binary -o yolov4.msgpack"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Import libraries \n",
"Please refer to [this section](https://github.com/ROCmSoftwarePlatform/AMDMIGraphX#using-migraphx-python-module) of the main README if the migraphx module is not found. "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import migraphx\n",
"import cv2\n",
"import time\n",
"import numpy as np\n",
"import image_processing as ip\n",
"from PIL import Image"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Read and pre-process image data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"input_size = 416\n",
"\n",
"original_image = cv2.imread(\"./utilities/input.jpg\")\n",
"original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)\n",
"original_image_size = original_image.shape[:2]\n",
"\n",
"image_data = ip.image_preprocess(np.copy(original_image), [input_size, input_size])\n",
"image_data = image_data[np.newaxis, ...].astype(np.float32)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Load and run model"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Load serialized model (either single- or half-precision)\n",
"model = migraphx.load(\"yolov4.msgpack\", format=\"msgpack\")\n",
"#model = migraphx.load(\"yolov4_fp16.msgpack\", format=\"msgpack\")\n",
"\n",
"# Get the name of the input parameter and convert image data to an MIGraphX argument\n",
"input_name = next(iter(model.get_parameter_shapes()))\n",
"input_argument = migraphx.argument(image_data)\n",
"\n",
"# Evaluate the model and convert the outputs for post-processing\n",
"outputs = model.run({input_name: input_argument})\n",
"detections = [np.ndarray(shape=out.get_shape().lens(), buffer=np.array(out.tolist()), dtype=float) for out in outputs]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Post-process the model outputs and display image with detection bounding boxes"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ANCHORS = \"./utilities/yolov4_anchors.txt\"\n",
"STRIDES = [8, 16, 32]\n",
"XYSCALE = [1.2, 1.1, 1.05]\n",
"\n",
"ANCHORS = ip.get_anchors(ANCHORS)\n",
"STRIDES = np.array(STRIDES)\n",
"\n",
"pred_bbox = ip.postprocess_bbbox(detections, ANCHORS, STRIDES, XYSCALE)\n",
"bboxes = ip.postprocess_boxes(pred_bbox, original_image_size, input_size, 0.25)\n",
"bboxes = ip.nms(bboxes, 0.213, method='nms')\n",
"image = ip.draw_bbox(original_image, bboxes)\n",
"\n",
"image = Image.fromarray(image)\n",
"image.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"name": "python3",
"display_name": "Python 3.8.3 64-bit ('base': conda)"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.3"
},
"metadata": {
"interpreter": {
"hash": "d7283edef085bb46d38a3069bce96b3de1793019cb5bd7b1e86bf9785b67f304"
}
},
"interpreter": {
"hash": "d7283edef085bb46d38a3069bce96b3de1793019cb5bd7b1e86bf9785b67f304"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment