Commit e63cf68a authored by chenzk's avatar chenzk
Browse files

v1.0

parents
Pipeline #2842 canceled with stages
# YOLOv8 LibTorch Inference C++
This example demonstrates how to perform inference using YOLOv8 models in C++ with LibTorch API.
## Dependencies
| Dependency | Version |
| ------------ | -------- |
| OpenCV | >=4.0.0 |
| C++ Standard | >=17 |
| Cmake | >=3.18 |
| Libtorch | >=1.12.1 |
## Usage
```bash
git clone ultralytics
cd ultralytics
pip install .
cd examples/YOLOv8-LibTorch-CPP-Inference
mkdir build
cd build
cmake ..
make
./yolov8_libtorch_inference
```
## Exporting YOLOv8
To export YOLOv8 models:
```bash
yolo export model=yolov8s.pt imgsz=640 format=torchscript
```
#include <iostream>
#include <opencv2/core.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/imgcodecs.hpp>
#include <torch/torch.h>
#include <torch/script.h>
using torch::indexing::Slice;
using torch::indexing::None;
float generate_scale(cv::Mat& image, const std::vector<int>& target_size) {
int origin_w = image.cols;
int origin_h = image.rows;
int target_h = target_size[0];
int target_w = target_size[1];
float ratio_h = static_cast<float>(target_h) / static_cast<float>(origin_h);
float ratio_w = static_cast<float>(target_w) / static_cast<float>(origin_w);
float resize_scale = std::min(ratio_h, ratio_w);
return resize_scale;
}
float letterbox(cv::Mat &input_image, cv::Mat &output_image, const std::vector<int> &target_size) {
if (input_image.cols == target_size[1] && input_image.rows == target_size[0]) {
if (input_image.data == output_image.data) {
return 1.;
} else {
output_image = input_image.clone();
return 1.;
}
}
float resize_scale = generate_scale(input_image, target_size);
int new_shape_w = std::round(input_image.cols * resize_scale);
int new_shape_h = std::round(input_image.rows * resize_scale);
float padw = (target_size[1] - new_shape_w) / 2.;
float padh = (target_size[0] - new_shape_h) / 2.;
int top = std::round(padh - 0.1);
int bottom = std::round(padh + 0.1);
int left = std::round(padw - 0.1);
int right = std::round(padw + 0.1);
cv::resize(input_image, output_image,
cv::Size(new_shape_w, new_shape_h),
0, 0, cv::INTER_AREA);
cv::copyMakeBorder(output_image, output_image, top, bottom, left, right,
cv::BORDER_CONSTANT, cv::Scalar(114.));
return resize_scale;
}
torch::Tensor xyxy2xywh(const torch::Tensor& x) {
auto y = torch::empty_like(x);
y.index_put_({"...", 0}, (x.index({"...", 0}) + x.index({"...", 2})).div(2));
y.index_put_({"...", 1}, (x.index({"...", 1}) + x.index({"...", 3})).div(2));
y.index_put_({"...", 2}, x.index({"...", 2}) - x.index({"...", 0}));
y.index_put_({"...", 3}, x.index({"...", 3}) - x.index({"...", 1}));
return y;
}
torch::Tensor xywh2xyxy(const torch::Tensor& x) {
auto y = torch::empty_like(x);
auto dw = x.index({"...", 2}).div(2);
auto dh = x.index({"...", 3}).div(2);
y.index_put_({"...", 0}, x.index({"...", 0}) - dw);
y.index_put_({"...", 1}, x.index({"...", 1}) - dh);
y.index_put_({"...", 2}, x.index({"...", 0}) + dw);
y.index_put_({"...", 3}, x.index({"...", 1}) + dh);
return y;
}
// Reference: https://github.com/pytorch/vision/blob/main/torchvision/csrc/ops/cpu/nms_kernel.cpp
torch::Tensor nms(const torch::Tensor& bboxes, const torch::Tensor& scores, float iou_threshold) {
if (bboxes.numel() == 0)
return torch::empty({0}, bboxes.options().dtype(torch::kLong));
auto x1_t = bboxes.select(1, 0).contiguous();
auto y1_t = bboxes.select(1, 1).contiguous();
auto x2_t = bboxes.select(1, 2).contiguous();
auto y2_t = bboxes.select(1, 3).contiguous();
torch::Tensor areas_t = (x2_t - x1_t) * (y2_t - y1_t);
auto order_t = std::get<1>(
scores.sort(/*stable=*/true, /*dim=*/0, /* descending=*/true));
auto ndets = bboxes.size(0);
torch::Tensor suppressed_t = torch::zeros({ndets}, bboxes.options().dtype(torch::kByte));
torch::Tensor keep_t = torch::zeros({ndets}, bboxes.options().dtype(torch::kLong));
auto suppressed = suppressed_t.data_ptr<uint8_t>();
auto keep = keep_t.data_ptr<int64_t>();
auto order = order_t.data_ptr<int64_t>();
auto x1 = x1_t.data_ptr<float>();
auto y1 = y1_t.data_ptr<float>();
auto x2 = x2_t.data_ptr<float>();
auto y2 = y2_t.data_ptr<float>();
auto areas = areas_t.data_ptr<float>();
int64_t num_to_keep = 0;
for (int64_t _i = 0; _i < ndets; _i++) {
auto i = order[_i];
if (suppressed[i] == 1)
continue;
keep[num_to_keep++] = i;
auto ix1 = x1[i];
auto iy1 = y1[i];
auto ix2 = x2[i];
auto iy2 = y2[i];
auto iarea = areas[i];
for (int64_t _j = _i + 1; _j < ndets; _j++) {
auto j = order[_j];
if (suppressed[j] == 1)
continue;
auto xx1 = std::max(ix1, x1[j]);
auto yy1 = std::max(iy1, y1[j]);
auto xx2 = std::min(ix2, x2[j]);
auto yy2 = std::min(iy2, y2[j]);
auto w = std::max(static_cast<float>(0), xx2 - xx1);
auto h = std::max(static_cast<float>(0), yy2 - yy1);
auto inter = w * h;
auto ovr = inter / (iarea + areas[j] - inter);
if (ovr > iou_threshold)
suppressed[j] = 1;
}
}
return keep_t.narrow(0, 0, num_to_keep);
}
torch::Tensor non_max_suppression(torch::Tensor& prediction, float conf_thres = 0.25, float iou_thres = 0.45, int max_det = 300) {
auto bs = prediction.size(0);
auto nc = prediction.size(1) - 4;
auto nm = prediction.size(1) - nc - 4;
auto mi = 4 + nc;
auto xc = prediction.index({Slice(), Slice(4, mi)}).amax(1) > conf_thres;
prediction = prediction.transpose(-1, -2);
prediction.index_put_({"...", Slice({None, 4})}, xywh2xyxy(prediction.index({"...", Slice(None, 4)})));
std::vector<torch::Tensor> output;
for (int i = 0; i < bs; i++) {
output.push_back(torch::zeros({0, 6 + nm}, prediction.device()));
}
for (int xi = 0; xi < prediction.size(0); xi++) {
auto x = prediction[xi];
x = x.index({xc[xi]});
auto x_split = x.split({4, nc, nm}, 1);
auto box = x_split[0], cls = x_split[1], mask = x_split[2];
auto [conf, j] = cls.max(1, true);
x = torch::cat({box, conf, j.toType(torch::kFloat), mask}, 1);
x = x.index({conf.view(-1) > conf_thres});
int n = x.size(0);
if (!n) { continue; }
// NMS
auto c = x.index({Slice(), Slice{5, 6}}) * 7680;
auto boxes = x.index({Slice(), Slice(None, 4)}) + c;
auto scores = x.index({Slice(), 4});
auto i = nms(boxes, scores, iou_thres);
i = i.index({Slice(None, max_det)});
output[xi] = x.index({i});
}
return torch::stack(output);
}
torch::Tensor clip_boxes(torch::Tensor& boxes, const std::vector<int>& shape) {
boxes.index_put_({"...", 0}, boxes.index({"...", 0}).clamp(0, shape[1]));
boxes.index_put_({"...", 1}, boxes.index({"...", 1}).clamp(0, shape[0]));
boxes.index_put_({"...", 2}, boxes.index({"...", 2}).clamp(0, shape[1]));
boxes.index_put_({"...", 3}, boxes.index({"...", 3}).clamp(0, shape[0]));
return boxes;
}
torch::Tensor scale_boxes(const std::vector<int>& img1_shape, torch::Tensor& boxes, const std::vector<int>& img0_shape) {
auto gain = (std::min)((float)img1_shape[0] / img0_shape[0], (float)img1_shape[1] / img0_shape[1]);
auto pad0 = std::round((float)(img1_shape[1] - img0_shape[1] * gain) / 2. - 0.1);
auto pad1 = std::round((float)(img1_shape[0] - img0_shape[0] * gain) / 2. - 0.1);
boxes.index_put_({"...", 0}, boxes.index({"...", 0}) - pad0);
boxes.index_put_({"...", 2}, boxes.index({"...", 2}) - pad0);
boxes.index_put_({"...", 1}, boxes.index({"...", 1}) - pad1);
boxes.index_put_({"...", 3}, boxes.index({"...", 3}) - pad1);
boxes.index_put_({"...", Slice(None, 4)}, boxes.index({"...", Slice(None, 4)}).div(gain));
return boxes;
}
int main() {
// Device
torch::Device device(torch::cuda::is_available() ? torch::kCUDA :torch::kCPU);
// Note that in this example the classes are hard-coded
std::vector<std::string> classes {"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light", "fire hydrant",
"stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra",
"giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite",
"baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork", "knife",
"spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair",
"couch", "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
"microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"};
try {
// Load the model (e.g. yolov8s.torchscript)
std::string model_path = "/path/to/yolov8s.torchscript";
torch::jit::script::Module yolo_model;
yolo_model = torch::jit::load(model_path);
yolo_model.eval();
yolo_model.to(device, torch::kFloat32);
// Load image and preprocess
cv::Mat image = cv::imread("/path/to/bus.jpg");
cv::Mat input_image;
letterbox(image, input_image, {640, 640});
cv::cvtColor(input_image, input_image, cv::COLOR_BGR2RGB);
torch::Tensor image_tensor = torch::from_blob(input_image.data, {input_image.rows, input_image.cols, 3}, torch::kByte).to(device);
image_tensor = image_tensor.toType(torch::kFloat32).div(255);
image_tensor = image_tensor.permute({2, 0, 1});
image_tensor = image_tensor.unsqueeze(0);
std::vector<torch::jit::IValue> inputs {image_tensor};
// Inference
torch::Tensor output = yolo_model.forward(inputs).toTensor().cpu();
// NMS
auto keep = non_max_suppression(output)[0];
auto boxes = keep.index({Slice(), Slice(None, 4)});
keep.index_put_({Slice(), Slice(None, 4)}, scale_boxes({input_image.rows, input_image.cols}, boxes, {image.rows, image.cols}));
// Show the results
for (int i = 0; i < keep.size(0); i++) {
int x1 = keep[i][0].item().toFloat();
int y1 = keep[i][1].item().toFloat();
int x2 = keep[i][2].item().toFloat();
int y2 = keep[i][3].item().toFloat();
float conf = keep[i][4].item().toFloat();
int cls = keep[i][5].item().toInt();
std::cout << "Rect: [" << x1 << "," << y1 << "," << x2 << "," << y2 << "] Conf: " << conf << " Class: " << classes[cls] << std::endl;
}
} catch (const c10::Error& e) {
std::cout << e.msg() << std::endl;
}
return 0;
}
cmake_minimum_required(VERSION 3.5)
set(PROJECT_NAME Yolov8OnnxRuntimeCPPInference)
project(${PROJECT_NAME} VERSION 0.0.1 LANGUAGES CXX)
# -------------- Support C++17 for using filesystem ------------------#
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS ON)
set(CMAKE_INCLUDE_CURRENT_DIR ON)
# -------------- OpenCV ------------------#
find_package(OpenCV REQUIRED)
include_directories(${OpenCV_INCLUDE_DIRS})
# -------------- Compile CUDA for FP16 inference if needed ------------------#
option(USE_CUDA "Enable CUDA support" ON)
if (NOT APPLE AND USE_CUDA)
find_package(CUDA REQUIRED)
include_directories(${CUDA_INCLUDE_DIRS})
add_definitions(-DUSE_CUDA)
else ()
set(USE_CUDA OFF)
endif ()
# -------------- ONNXRUNTIME ------------------#
# Set ONNXRUNTIME_VERSION
set(ONNXRUNTIME_VERSION 1.15.1)
if (WIN32)
if (USE_CUDA)
set(ONNXRUNTIME_ROOT "${CMAKE_CURRENT_SOURCE_DIR}/onnxruntime-win-x64-gpu-${ONNXRUNTIME_VERSION}")
else ()
set(ONNXRUNTIME_ROOT "${CMAKE_CURRENT_SOURCE_DIR}/onnxruntime-win-x64-${ONNXRUNTIME_VERSION}")
endif ()
elseif (LINUX)
if (USE_CUDA)
set(ONNXRUNTIME_ROOT "${CMAKE_CURRENT_SOURCE_DIR}/onnxruntime-linux-x64-gpu-${ONNXRUNTIME_VERSION}")
else ()
set(ONNXRUNTIME_ROOT "${CMAKE_CURRENT_SOURCE_DIR}/onnxruntime-linux-x64-${ONNXRUNTIME_VERSION}")
endif ()
elseif (APPLE)
set(ONNXRUNTIME_ROOT "${CMAKE_CURRENT_SOURCE_DIR}/onnxruntime-osx-arm64-${ONNXRUNTIME_VERSION}")
# Apple X64 binary
# set(ONNXRUNTIME_ROOT "${CMAKE_CURRENT_SOURCE_DIR}/onnxruntime-osx-x64-${ONNXRUNTIME_VERSION}")
# Apple Universal binary
# set(ONNXRUNTIME_ROOT "${CMAKE_CURRENT_SOURCE_DIR}/onnxruntime-osx-universal2-${ONNXRUNTIME_VERSION}")
else ()
message(SEND_ERROR "Variable ONNXRUNTIME_ROOT is not set properly. Please check if your cmake project \
is not compiled with `-D WIN32=TRUE`, `-D LINUX=TRUE`, or `-D APPLE=TRUE`!")
endif ()
include_directories(${PROJECT_NAME} ${ONNXRUNTIME_ROOT}/include)
set(PROJECT_SOURCES
main.cpp
inference.h
inference.cpp
)
add_executable(${PROJECT_NAME} ${PROJECT_SOURCES})
if (WIN32)
target_link_libraries(${PROJECT_NAME} ${OpenCV_LIBS} ${ONNXRUNTIME_ROOT}/lib/onnxruntime.lib)
if (USE_CUDA)
target_link_libraries(${PROJECT_NAME} ${CUDA_LIBRARIES})
endif ()
elseif (LINUX)
target_link_libraries(${PROJECT_NAME} ${OpenCV_LIBS} ${ONNXRUNTIME_ROOT}/lib/libonnxruntime.so)
if (USE_CUDA)
target_link_libraries(${PROJECT_NAME} ${CUDA_LIBRARIES})
endif ()
elseif (APPLE)
target_link_libraries(${PROJECT_NAME} ${OpenCV_LIBS} ${ONNXRUNTIME_ROOT}/lib/libonnxruntime.dylib)
endif ()
# For windows system, copy onnxruntime.dll to the same folder of the executable file
if (WIN32)
add_custom_command(TARGET ${PROJECT_NAME} POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy_if_different
"${ONNXRUNTIME_ROOT}/lib/onnxruntime.dll"
$<TARGET_FILE_DIR:${PROJECT_NAME}>)
endif ()
# Download https://raw.githubusercontent.com/ultralytics/ultralytics/main/ultralytics/cfg/datasets/coco.yaml
# and put it in the same folder of the executable file
configure_file(coco.yaml ${CMAKE_CURRENT_BINARY_DIR}/coco.yaml COPYONLY)
# Copy yolov8n.onnx file to the same folder of the executable file
configure_file(yolov8n.onnx ${CMAKE_CURRENT_BINARY_DIR}/yolov8n.onnx COPYONLY)
# Create folder name images in the same folder of the executable file
add_custom_command(TARGET ${PROJECT_NAME} POST_BUILD
COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_BINARY_DIR}/images
)
# YOLOv8 OnnxRuntime C++
<img alt="C++" src="https://img.shields.io/badge/C++-17-blue.svg?style=flat&logo=c%2B%2B"> <img alt="Onnx-runtime" src="https://img.shields.io/badge/OnnxRuntime-717272.svg?logo=Onnx&logoColor=white">
This example demonstrates how to perform inference using YOLOv8 in C++ with ONNX Runtime and OpenCV's API.
## Benefits ✨
- Friendly for deployment in the industrial sector.
- Faster than OpenCV's DNN inference on both CPU and GPU.
- Supports FP32 and FP16 CUDA acceleration.
## Note ☕
1. Benefit for Ultralytics' latest release, a `Transpose` op is added to the YOLOv8 model, while make v8 and v5 has the same output shape. Therefore, you can run inference with YOLOv5/v7/v8 via this project.
## Exporting YOLOv8 Models 📦
To export YOLOv8 models, use the following Python script:
```python
from ultralytics import YOLO
# Load a YOLOv8 model
model = YOLO("yolov8n.pt")
# Export the model
model.export(format="onnx", opset=12, simplify=True, dynamic=False, imgsz=640)
```
Alternatively, you can use the following command for exporting the model in the terminal
```bash
yolo export model=yolov8n.pt opset=12 simplify=True dynamic=False format=onnx imgsz=640,640
```
## Exporting YOLOv8 FP16 Models 📦
```python
import onnx
from onnxconverter_common import float16
model = onnx.load(R"YOUR_ONNX_PATH")
model_fp16 = float16.convert_float_to_float16(model)
onnx.save(model_fp16, R"YOUR_FP16_ONNX_PATH")
```
## Download COCO.yaml file 📂
In order to run example, you also need to download coco.yaml. You can download the file manually from [here](https://raw.githubusercontent.com/ultralytics/ultralytics/main/ultralytics/cfg/datasets/coco.yaml)
## Dependencies ⚙️
| Dependency | Version |
| -------------------------------- | ------------- |
| Onnxruntime(linux,windows,macos) | >=1.14.1 |
| OpenCV | >=4.0.0 |
| C++ Standard | >=17 |
| Cmake | >=3.5 |
| Cuda (Optional) | >=11.4 \<12.0 |
| cuDNN (Cuda required) | =8 |
Note: The dependency on C++17 is due to the usage of the C++17 filesystem feature.
Note (2): Due to ONNX Runtime, we need to use CUDA 11 and cuDNN 8. Keep in mind that this requirement might change in the future.
## Build 🛠️
1. Clone the repository to your local machine.
2. Navigate to the root directory of the repository.
3. Create a build directory and navigate to it:
```console
mkdir build && cd build
```
4. Run CMake to generate the build files:
```console
cmake ..
```
**Notice**:
If you encounter an error indicating that the `ONNXRUNTIME_ROOT` variable is not set correctly, you can resolve this by building the project using the appropriate command tailored to your system.
```console
# compiled in a win32 system
cmake -D WIN32=TRUE ..
# compiled in a linux system
cmake -D LINUX=TRUE ..
# compiled in an apple system
cmake -D APPLE=TRUE ..
```
5. Build the project:
```console
make
```
6. The built executable should now be located in the `build` directory.
## Usage 🚀
```c++
//change your param as you like
//Pay attention to your device and the onnx model type(fp32 or fp16)
DL_INIT_PARAM params;
params.rectConfidenceThreshold = 0.1;
params.iouThreshold = 0.5;
params.modelPath = "yolov8n.onnx";
params.imgSize = { 640, 640 };
params.cudaEnable = true;
params.modelType = YOLO_DETECT_V8;
yoloDetector->CreateSession(params);
Detector(yoloDetector);
```
#include "inference.h"
#include <regex>
#define benchmark
#define min(a,b) (((a) < (b)) ? (a) : (b))
YOLO_V8::YOLO_V8() {
}
YOLO_V8::~YOLO_V8() {
delete session;
}
#ifdef USE_CUDA
namespace Ort
{
template<>
struct TypeToTensorType<half> { static constexpr ONNXTensorElementDataType type = ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16; };
}
#endif
template<typename T>
char* BlobFromImage(cv::Mat& iImg, T& iBlob) {
int channels = iImg.channels();
int imgHeight = iImg.rows;
int imgWidth = iImg.cols;
for (int c = 0; c < channels; c++)
{
for (int h = 0; h < imgHeight; h++)
{
for (int w = 0; w < imgWidth; w++)
{
iBlob[c * imgWidth * imgHeight + h * imgWidth + w] = typename std::remove_pointer<T>::type(
(iImg.at<cv::Vec3b>(h, w)[c]) / 255.0f);
}
}
}
return RET_OK;
}
char* YOLO_V8::PreProcess(cv::Mat& iImg, std::vector<int> iImgSize, cv::Mat& oImg)
{
if (iImg.channels() == 3)
{
oImg = iImg.clone();
cv::cvtColor(oImg, oImg, cv::COLOR_BGR2RGB);
}
else
{
cv::cvtColor(iImg, oImg, cv::COLOR_GRAY2RGB);
}
switch (modelType)
{
case YOLO_DETECT_V8:
case YOLO_POSE:
case YOLO_DETECT_V8_HALF:
case YOLO_POSE_V8_HALF://LetterBox
{
if (iImg.cols >= iImg.rows)
{
resizeScales = iImg.cols / (float)iImgSize.at(0);
cv::resize(oImg, oImg, cv::Size(iImgSize.at(0), int(iImg.rows / resizeScales)));
}
else
{
resizeScales = iImg.rows / (float)iImgSize.at(0);
cv::resize(oImg, oImg, cv::Size(int(iImg.cols / resizeScales), iImgSize.at(1)));
}
cv::Mat tempImg = cv::Mat::zeros(iImgSize.at(0), iImgSize.at(1), CV_8UC3);
oImg.copyTo(tempImg(cv::Rect(0, 0, oImg.cols, oImg.rows)));
oImg = tempImg;
break;
}
case YOLO_CLS://CenterCrop
{
int h = iImg.rows;
int w = iImg.cols;
int m = min(h, w);
int top = (h - m) / 2;
int left = (w - m) / 2;
cv::resize(oImg(cv::Rect(left, top, m, m)), oImg, cv::Size(iImgSize.at(0), iImgSize.at(1)));
break;
}
}
return RET_OK;
}
char* YOLO_V8::CreateSession(DL_INIT_PARAM& iParams) {
char* Ret = RET_OK;
std::regex pattern("[\u4e00-\u9fa5]");
bool result = std::regex_search(iParams.modelPath, pattern);
if (result)
{
Ret = "[YOLO_V8]:Your model path is error.Change your model path without chinese characters.";
std::cout << Ret << std::endl;
return Ret;
}
try
{
rectConfidenceThreshold = iParams.rectConfidenceThreshold;
iouThreshold = iParams.iouThreshold;
imgSize = iParams.imgSize;
modelType = iParams.modelType;
env = Ort::Env(ORT_LOGGING_LEVEL_WARNING, "Yolo");
Ort::SessionOptions sessionOption;
if (iParams.cudaEnable)
{
cudaEnable = iParams.cudaEnable;
OrtCUDAProviderOptions cudaOption;
cudaOption.device_id = 0;
sessionOption.AppendExecutionProvider_CUDA(cudaOption);
}
sessionOption.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL);
sessionOption.SetIntraOpNumThreads(iParams.intraOpNumThreads);
sessionOption.SetLogSeverityLevel(iParams.logSeverityLevel);
#ifdef _WIN32
int ModelPathSize = MultiByteToWideChar(CP_UTF8, 0, iParams.modelPath.c_str(), static_cast<int>(iParams.modelPath.length()), nullptr, 0);
wchar_t* wide_cstr = new wchar_t[ModelPathSize + 1];
MultiByteToWideChar(CP_UTF8, 0, iParams.modelPath.c_str(), static_cast<int>(iParams.modelPath.length()), wide_cstr, ModelPathSize);
wide_cstr[ModelPathSize] = L'\0';
const wchar_t* modelPath = wide_cstr;
#else
const char* modelPath = iParams.modelPath.c_str();
#endif // _WIN32
session = new Ort::Session(env, modelPath, sessionOption);
Ort::AllocatorWithDefaultOptions allocator;
size_t inputNodesNum = session->GetInputCount();
for (size_t i = 0; i < inputNodesNum; i++)
{
Ort::AllocatedStringPtr input_node_name = session->GetInputNameAllocated(i, allocator);
char* temp_buf = new char[50];
strcpy(temp_buf, input_node_name.get());
inputNodeNames.push_back(temp_buf);
}
size_t OutputNodesNum = session->GetOutputCount();
for (size_t i = 0; i < OutputNodesNum; i++)
{
Ort::AllocatedStringPtr output_node_name = session->GetOutputNameAllocated(i, allocator);
char* temp_buf = new char[10];
strcpy(temp_buf, output_node_name.get());
outputNodeNames.push_back(temp_buf);
}
options = Ort::RunOptions{ nullptr };
WarmUpSession();
return RET_OK;
}
catch (const std::exception& e)
{
const char* str1 = "[YOLO_V8]:";
const char* str2 = e.what();
std::string result = std::string(str1) + std::string(str2);
char* merged = new char[result.length() + 1];
std::strcpy(merged, result.c_str());
std::cout << merged << std::endl;
delete[] merged;
return "[YOLO_V8]:Create session failed.";
}
}
char* YOLO_V8::RunSession(cv::Mat& iImg, std::vector<DL_RESULT>& oResult) {
#ifdef benchmark
clock_t starttime_1 = clock();
#endif // benchmark
char* Ret = RET_OK;
cv::Mat processedImg;
PreProcess(iImg, imgSize, processedImg);
if (modelType < 4)
{
float* blob = new float[processedImg.total() * 3];
BlobFromImage(processedImg, blob);
std::vector<int64_t> inputNodeDims = { 1, 3, imgSize.at(0), imgSize.at(1) };
TensorProcess(starttime_1, iImg, blob, inputNodeDims, oResult);
}
else
{
#ifdef USE_CUDA
half* blob = new half[processedImg.total() * 3];
BlobFromImage(processedImg, blob);
std::vector<int64_t> inputNodeDims = { 1,3,imgSize.at(0),imgSize.at(1) };
TensorProcess(starttime_1, iImg, blob, inputNodeDims, oResult);
#endif
}
return Ret;
}
template<typename N>
char* YOLO_V8::TensorProcess(clock_t& starttime_1, cv::Mat& iImg, N& blob, std::vector<int64_t>& inputNodeDims,
std::vector<DL_RESULT>& oResult) {
Ort::Value inputTensor = Ort::Value::CreateTensor<typename std::remove_pointer<N>::type>(
Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), blob, 3 * imgSize.at(0) * imgSize.at(1),
inputNodeDims.data(), inputNodeDims.size());
#ifdef benchmark
clock_t starttime_2 = clock();
#endif // benchmark
auto outputTensor = session->Run(options, inputNodeNames.data(), &inputTensor, 1, outputNodeNames.data(),
outputNodeNames.size());
#ifdef benchmark
clock_t starttime_3 = clock();
#endif // benchmark
Ort::TypeInfo typeInfo = outputTensor.front().GetTypeInfo();
auto tensor_info = typeInfo.GetTensorTypeAndShapeInfo();
std::vector<int64_t> outputNodeDims = tensor_info.GetShape();
auto output = outputTensor.front().GetTensorMutableData<typename std::remove_pointer<N>::type>();
delete[] blob;
switch (modelType)
{
case YOLO_DETECT_V8:
case YOLO_DETECT_V8_HALF:
{
int signalResultNum = outputNodeDims[1];//84
int strideNum = outputNodeDims[2];//8400
std::vector<int> class_ids;
std::vector<float> confidences;
std::vector<cv::Rect> boxes;
cv::Mat rawData;
if (modelType == YOLO_DETECT_V8)
{
// FP32
rawData = cv::Mat(signalResultNum, strideNum, CV_32F, output);
}
else
{
// FP16
rawData = cv::Mat(signalResultNum, strideNum, CV_16F, output);
rawData.convertTo(rawData, CV_32F);
}
// Note:
// ultralytics add transpose operator to the output of yolov8 model.which make yolov8/v5/v7 has same shape
// https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8n.pt
rawData = rawData.t();
float* data = (float*)rawData.data;
for (int i = 0; i < strideNum; ++i)
{
float* classesScores = data + 4;
cv::Mat scores(1, this->classes.size(), CV_32FC1, classesScores);
cv::Point class_id;
double maxClassScore;
cv::minMaxLoc(scores, 0, &maxClassScore, 0, &class_id);
if (maxClassScore > rectConfidenceThreshold)
{
confidences.push_back(maxClassScore);
class_ids.push_back(class_id.x);
float x = data[0];
float y = data[1];
float w = data[2];
float h = data[3];
int left = int((x - 0.5 * w) * resizeScales);
int top = int((y - 0.5 * h) * resizeScales);
int width = int(w * resizeScales);
int height = int(h * resizeScales);
boxes.push_back(cv::Rect(left, top, width, height));
}
data += signalResultNum;
}
std::vector<int> nmsResult;
cv::dnn::NMSBoxes(boxes, confidences, rectConfidenceThreshold, iouThreshold, nmsResult);
for (int i = 0; i < nmsResult.size(); ++i)
{
int idx = nmsResult[i];
DL_RESULT result;
result.classId = class_ids[idx];
result.confidence = confidences[idx];
result.box = boxes[idx];
oResult.push_back(result);
}
#ifdef benchmark
clock_t starttime_4 = clock();
double pre_process_time = (double)(starttime_2 - starttime_1) / CLOCKS_PER_SEC * 1000;
double process_time = (double)(starttime_3 - starttime_2) / CLOCKS_PER_SEC * 1000;
double post_process_time = (double)(starttime_4 - starttime_3) / CLOCKS_PER_SEC * 1000;
if (cudaEnable)
{
std::cout << "[YOLO_V8(CUDA)]: " << pre_process_time << "ms pre-process, " << process_time << "ms inference, " << post_process_time << "ms post-process." << std::endl;
}
else
{
std::cout << "[YOLO_V8(CPU)]: " << pre_process_time << "ms pre-process, " << process_time << "ms inference, " << post_process_time << "ms post-process." << std::endl;
}
#endif // benchmark
break;
}
case YOLO_CLS:
case YOLO_CLS_HALF:
{
cv::Mat rawData;
if (modelType == YOLO_CLS) {
// FP32
rawData = cv::Mat(1, this->classes.size(), CV_32F, output);
} else {
// FP16
rawData = cv::Mat(1, this->classes.size(), CV_16F, output);
rawData.convertTo(rawData, CV_32F);
}
float *data = (float *) rawData.data;
DL_RESULT result;
for (int i = 0; i < this->classes.size(); i++)
{
result.classId = i;
result.confidence = data[i];
oResult.push_back(result);
}
break;
}
default:
std::cout << "[YOLO_V8]: " << "Not support model type." << std::endl;
}
return RET_OK;
}
char* YOLO_V8::WarmUpSession() {
clock_t starttime_1 = clock();
cv::Mat iImg = cv::Mat(cv::Size(imgSize.at(0), imgSize.at(1)), CV_8UC3);
cv::Mat processedImg;
PreProcess(iImg, imgSize, processedImg);
if (modelType < 4)
{
float* blob = new float[iImg.total() * 3];
BlobFromImage(processedImg, blob);
std::vector<int64_t> YOLO_input_node_dims = { 1, 3, imgSize.at(0), imgSize.at(1) };
Ort::Value input_tensor = Ort::Value::CreateTensor<float>(
Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), blob, 3 * imgSize.at(0) * imgSize.at(1),
YOLO_input_node_dims.data(), YOLO_input_node_dims.size());
auto output_tensors = session->Run(options, inputNodeNames.data(), &input_tensor, 1, outputNodeNames.data(),
outputNodeNames.size());
delete[] blob;
clock_t starttime_4 = clock();
double post_process_time = (double)(starttime_4 - starttime_1) / CLOCKS_PER_SEC * 1000;
if (cudaEnable)
{
std::cout << "[YOLO_V8(CUDA)]: " << "Cuda warm-up cost " << post_process_time << " ms. " << std::endl;
}
}
else
{
#ifdef USE_CUDA
half* blob = new half[iImg.total() * 3];
BlobFromImage(processedImg, blob);
std::vector<int64_t> YOLO_input_node_dims = { 1,3,imgSize.at(0),imgSize.at(1) };
Ort::Value input_tensor = Ort::Value::CreateTensor<half>(Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), blob, 3 * imgSize.at(0) * imgSize.at(1), YOLO_input_node_dims.data(), YOLO_input_node_dims.size());
auto output_tensors = session->Run(options, inputNodeNames.data(), &input_tensor, 1, outputNodeNames.data(), outputNodeNames.size());
delete[] blob;
clock_t starttime_4 = clock();
double post_process_time = (double)(starttime_4 - starttime_1) / CLOCKS_PER_SEC * 1000;
if (cudaEnable)
{
std::cout << "[YOLO_V8(CUDA)]: " << "Cuda warm-up cost " << post_process_time << " ms. " << std::endl;
}
#endif
}
return RET_OK;
}
#pragma once
#define RET_OK nullptr
#ifdef _WIN32
#include <Windows.h>
#include <direct.h>
#include <io.h>
#endif
#include <string>
#include <vector>
#include <cstdio>
#include <opencv2/opencv.hpp>
#include "onnxruntime_cxx_api.h"
#ifdef USE_CUDA
#include <cuda_fp16.h>
#endif
enum MODEL_TYPE
{
//FLOAT32 MODEL
YOLO_DETECT_V8 = 1,
YOLO_POSE = 2,
YOLO_CLS = 3,
//FLOAT16 MODEL
YOLO_DETECT_V8_HALF = 4,
YOLO_POSE_V8_HALF = 5,
YOLO_CLS_HALF = 6
};
typedef struct _DL_INIT_PARAM
{
std::string modelPath;
MODEL_TYPE modelType = YOLO_DETECT_V8;
std::vector<int> imgSize = { 640, 640 };
float rectConfidenceThreshold = 0.6;
float iouThreshold = 0.5;
int keyPointsNum = 2;//Note:kpt number for pose
bool cudaEnable = false;
int logSeverityLevel = 3;
int intraOpNumThreads = 1;
} DL_INIT_PARAM;
typedef struct _DL_RESULT
{
int classId;
float confidence;
cv::Rect box;
std::vector<cv::Point2f> keyPoints;
} DL_RESULT;
class YOLO_V8
{
public:
YOLO_V8();
~YOLO_V8();
public:
char* CreateSession(DL_INIT_PARAM& iParams);
char* RunSession(cv::Mat& iImg, std::vector<DL_RESULT>& oResult);
char* WarmUpSession();
template<typename N>
char* TensorProcess(clock_t& starttime_1, cv::Mat& iImg, N& blob, std::vector<int64_t>& inputNodeDims,
std::vector<DL_RESULT>& oResult);
char* PreProcess(cv::Mat& iImg, std::vector<int> iImgSize, cv::Mat& oImg);
std::vector<std::string> classes{};
private:
Ort::Env env;
Ort::Session* session;
bool cudaEnable;
Ort::RunOptions options;
std::vector<const char*> inputNodeNames;
std::vector<const char*> outputNodeNames;
MODEL_TYPE modelType;
std::vector<int> imgSize;
float rectConfidenceThreshold;
float iouThreshold;
float resizeScales;//letterbox scale
};
#include <iostream>
#include <iomanip>
#include "inference.h"
#include <filesystem>
#include <fstream>
#include <random>
void Detector(YOLO_V8*& p) {
std::filesystem::path current_path = std::filesystem::current_path();
std::filesystem::path imgs_path = current_path / "images";
for (auto& i : std::filesystem::directory_iterator(imgs_path))
{
if (i.path().extension() == ".jpg" || i.path().extension() == ".png" || i.path().extension() == ".jpeg")
{
std::string img_path = i.path().string();
cv::Mat img = cv::imread(img_path);
std::vector<DL_RESULT> res;
p->RunSession(img, res);
for (auto& re : res)
{
cv::RNG rng(cv::getTickCount());
cv::Scalar color(rng.uniform(0, 256), rng.uniform(0, 256), rng.uniform(0, 256));
cv::rectangle(img, re.box, color, 3);
float confidence = floor(100 * re.confidence) / 100;
std::cout << std::fixed << std::setprecision(2);
std::string label = p->classes[re.classId] + " " +
std::to_string(confidence).substr(0, std::to_string(confidence).size() - 4);
cv::rectangle(
img,
cv::Point(re.box.x, re.box.y - 25),
cv::Point(re.box.x + label.length() * 15, re.box.y),
color,
cv::FILLED
);
cv::putText(
img,
label,
cv::Point(re.box.x, re.box.y - 5),
cv::FONT_HERSHEY_SIMPLEX,
0.75,
cv::Scalar(0, 0, 0),
2
);
}
std::cout << "Press any key to exit" << std::endl;
cv::imshow("Result of Detection", img);
cv::waitKey(0);
cv::destroyAllWindows();
}
}
}
void Classifier(YOLO_V8*& p)
{
std::filesystem::path current_path = std::filesystem::current_path();
std::filesystem::path imgs_path = current_path;// / "images"
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_int_distribution<int> dis(0, 255);
for (auto& i : std::filesystem::directory_iterator(imgs_path))
{
if (i.path().extension() == ".jpg" || i.path().extension() == ".png")
{
std::string img_path = i.path().string();
//std::cout << img_path << std::endl;
cv::Mat img = cv::imread(img_path);
std::vector<DL_RESULT> res;
char* ret = p->RunSession(img, res);
float positionY = 50;
for (int i = 0; i < res.size(); i++)
{
int r = dis(gen);
int g = dis(gen);
int b = dis(gen);
cv::putText(img, std::to_string(i) + ":", cv::Point(10, positionY), cv::FONT_HERSHEY_SIMPLEX, 1, cv::Scalar(b, g, r), 2);
cv::putText(img, std::to_string(res.at(i).confidence), cv::Point(70, positionY), cv::FONT_HERSHEY_SIMPLEX, 1, cv::Scalar(b, g, r), 2);
positionY += 50;
}
cv::imshow("TEST_CLS", img);
cv::waitKey(0);
cv::destroyAllWindows();
//cv::imwrite("E:\\output\\" + std::to_string(k) + ".png", img);
}
}
}
int ReadCocoYaml(YOLO_V8*& p) {
// Open the YAML file
std::ifstream file("coco.yaml");
if (!file.is_open())
{
std::cerr << "Failed to open file" << std::endl;
return 1;
}
// Read the file line by line
std::string line;
std::vector<std::string> lines;
while (std::getline(file, line))
{
lines.push_back(line);
}
// Find the start and end of the names section
std::size_t start = 0;
std::size_t end = 0;
for (std::size_t i = 0; i < lines.size(); i++)
{
if (lines[i].find("names:") != std::string::npos)
{
start = i + 1;
}
else if (start > 0 && lines[i].find(':') == std::string::npos)
{
end = i;
break;
}
}
// Extract the names
std::vector<std::string> names;
for (std::size_t i = start; i < end; i++)
{
std::stringstream ss(lines[i]);
std::string name;
std::getline(ss, name, ':'); // Extract the number before the delimiter
std::getline(ss, name); // Extract the string after the delimiter
names.push_back(name);
}
p->classes = names;
return 0;
}
void DetectTest()
{
YOLO_V8* yoloDetector = new YOLO_V8;
ReadCocoYaml(yoloDetector);
DL_INIT_PARAM params;
params.rectConfidenceThreshold = 0.1;
params.iouThreshold = 0.5;
params.modelPath = "yolov8n.onnx";
params.imgSize = { 640, 640 };
#ifdef USE_CUDA
params.cudaEnable = true;
// GPU FP32 inference
params.modelType = YOLO_DETECT_V8;
// GPU FP16 inference
//Note: change fp16 onnx model
//params.modelType = YOLO_DETECT_V8_HALF;
#else
// CPU inference
params.modelType = YOLO_DETECT_V8;
params.cudaEnable = false;
#endif
yoloDetector->CreateSession(params);
Detector(yoloDetector);
}
void ClsTest()
{
YOLO_V8* yoloDetector = new YOLO_V8;
std::string model_path = "cls.onnx";
ReadCocoYaml(yoloDetector);
DL_INIT_PARAM params{ model_path, YOLO_CLS, {224, 224} };
yoloDetector->CreateSession(params);
Classifier(yoloDetector);
}
int main()
{
//DetectTest();
ClsTest();
}
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
[package]
name = "yolov8-rs"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
clap = { version = "4.2.4", features = ["derive"] }
image = { version = "0.25.2"}
imageproc = { version = "0.25.0"}
ndarray = { version = "0.16" }
ort = { version = "2.0.0-rc.5", features = ["cuda", "tensorrt", "load-dynamic", "copy-dylibs", "half"]}
rusttype = { version = "0.9.3" }
anyhow = { version = "1.0.75" }
regex = { version = "1.5.4" }
rand = { version = "0.8.5" }
chrono = { version = "0.4.30" }
half = { version = "2.3.1" }
dirs = { version = "5.0.1" }
ureq = { version = "2.9.1" }
ab_glyph = "0.2.29"
# YOLOv8-ONNXRuntime-Rust for All the Key YOLO Tasks
This repository provides a Rust demo for performing YOLOv8 tasks like `Classification`, `Segmentation`, `Detection`, `Pose Detection` and `OBB` using ONNXRuntime.
## Recently Updated
- Add YOLOv8-OBB demo
- Update ONNXRuntime to 1.19.x
Newly updated YOLOv8 example code is located in [this repository](https://github.com/jamjamjon/usls/tree/main/examples/yolo)
## Features
- Support `Classification`, `Segmentation`, `Detection`, `Pose(Keypoints)-Detection`, `OBB` tasks.
- Support `FP16` & `FP32` ONNX models.
- Support `CPU`, `CUDA` and `TensorRT` execution provider to accelerate computation.
- Support dynamic input shapes(`batch`, `width`, `height`).
## Installation
### 1. Install Rust
Please follow the Rust official installation. (https://www.rust-lang.org/tools/install)
### 2. ONNXRuntime Linking
- #### For detailed setup instructions, refer to the [ORT documentation](https://ort.pyke.io/setup/linking).
- #### For Linux or macOS Users:
- Download the ONNX Runtime package from the [Releases page](https://github.com/microsoft/onnxruntime/releases).
- Set up the library path by exporting the `ORT_DYLIB_PATH` environment variable:
```shell
export ORT_DYLIB_PATH=/path/to/onnxruntime/lib/libonnxruntime.so.1.19.0
```
### 3. \[Optional\] Install CUDA & CuDNN & TensorRT
- CUDA execution provider requires CUDA v11.6+.
- TensorRT execution provider requires CUDA v11.4+ and TensorRT v8.4+.
## Get Started
### 1. Export the YOLOv8 ONNX Models
```bash
pip install -U ultralytics
# export onnx model with dynamic shapes
yolo export model=yolov8m.pt format=onnx simplify dynamic
yolo export model=yolov8m-cls.pt format=onnx simplify dynamic
yolo export model=yolov8m-pose.pt format=onnx simplify dynamic
yolo export model=yolov8m-seg.pt format=onnx simplify dynamic
# export onnx model with constant shapes
yolo export model=yolov8m.pt format=onnx simplify
yolo export model=yolov8m-cls.pt format=onnx simplify
yolo export model=yolov8m-pose.pt format=onnx simplify
yolo export model=yolov8m-seg.pt format=onnx simplify
```
### 2. Run Inference
It will perform inference with the ONNX model on the source image.
```bash
cargo run --release -- --model <MODEL> --source <SOURCE>
```
Set `--cuda` to use CUDA execution provider to speed up inference.
```bash
cargo run --release -- --cuda --model <MODEL> --source <SOURCE>
```
Set `--trt` to use TensorRT execution provider, and you can set `--fp16` at the same time to use TensorRT FP16 engine.
```bash
cargo run --release -- --trt --fp16 --model <MODEL> --source <SOURCE>
```
Set `--device_id` to select which device to run. When you have only one GPU, and you set `device_id` to 1 will not cause program panic, the `ort` would automatically fall back to `CPU` EP.
```bash
cargo run --release -- --cuda --device_id 0 --model <MODEL> --source <SOURCE>
```
Set `--batch` to do multi-batch-size inference.
If you're using `--trt`, you can also set `--batch-min` and `--batch-max` to explicitly specify min/max/opt batch for dynamic batch input.(https://onnxruntime.ai/docs/execution-providers/TensorRT-ExecutionProvider.html#explicit-shape-range-for-dynamic-shape-input).(Note that the ONNX model should be exported with dynamic shapes.)
```bash
cargo run --release -- --cuda --batch 2 --model <MODEL> --source <SOURCE>
```
Set `--height` and `--width` to do dynamic image size inference. (Note that the ONNX model should be exported with dynamic shapes.)
```bash
cargo run --release -- --cuda --width 480 --height 640 --model <MODEL> --source <SOURCE>
```
Set `--profile` to check time consumed in each stage.(Note that the model usually needs to take 1~3 times dry run to warmup. Make sure to run enough times to evaluate the result.)
```bash
cargo run --release -- --trt --fp16 --profile --model <MODEL> --source <SOURCE>
```
Results: (yolov8m.onnx, batch=1, 3 times, trt, fp16, RTX 3060Ti)
```bash
==> 0
[Model Preprocess]: 12.75788ms
[ORT H2D]: 237.118µs
[ORT Inference]: 507.895469ms
[ORT D2H]: 191.655µs
[Model Inference]: 508.34589ms
[Model Postprocess]: 1.061122ms
==> 1
[Model Preprocess]: 13.658655ms
[ORT H2D]: 209.975µs
[ORT Inference]: 5.12372ms
[ORT D2H]: 182.389µs
[Model Inference]: 5.530022ms
[Model Postprocess]: 1.04851ms
==> 2
[Model Preprocess]: 12.475332ms
[ORT H2D]: 246.127µs
[ORT Inference]: 5.048432ms
[ORT D2H]: 187.117µs
[Model Inference]: 5.493119ms
[Model Postprocess]: 1.040906ms
```
And also:
`--conf`: confidence threshold \[default: 0.3\]
`--iou`: iou threshold in NMS \[default: 0.45\]
`--kconf`: confidence threshold of keypoint \[default: 0.55\]
`--plot`: plot inference result with random RGB color and save
you can check out all CLI arguments by:
```bash
git clone https://github.com/ultralytics/ultralytics
cd ultralytics/examples/YOLOv8-ONNXRuntime-Rust
cargo run --release -- --help
```
## Examples
![Ultralytics YOLO Tasks](https://raw.githubusercontent.com/ultralytics/assets/main/im/banner-tasks.png)
### Classification
Running dynamic shape ONNX model on `CPU` with image size `--height 224 --width 224`. Saving plotted image in `runs` directory.
```bash
cargo run --release -- --model ../assets/weights/yolov8m-cls-dyn.onnx --source ../assets/images/dog.jpg --height 224 --width 224 --plot --profile
```
You will see result like:
```bash
Summary:
> Task: Classify (Ultralytics 8.0.217)
> EP: Cpu
> Dtype: Float32
> Batch: 1 (Dynamic), Height: 224 (Dynamic), Width: 224 (Dynamic)
> nc: 1000 nk: 0, nm: 0, conf: 0.3, kconf: 0.55, iou: 0.45
[Model Preprocess]: 16.363477ms
[ORT H2D]: 50.722µs
[ORT Inference]: 16.295808ms
[ORT D2H]: 8.37µs
[Model Inference]: 16.367046ms
[Model Postprocess]: 3.527µs
[
YOLOResult {
Probs(top5): Some([(208, 0.6950566), (209, 0.13823675), (178, 0.04849795), (215, 0.019029364), (212, 0.016506357)]),
Bboxes: None,
Keypoints: None,
Masks: None,
},
]
```
### Object Detection
Using `CUDA` EP and dynamic image size `--height 640 --width 480`
```bash
cargo run --release -- --cuda --model ../assets/weights/yolov8m-dynamic.onnx --source ../assets/images/bus.jpg --plot --height 640 --width 480
```
### Pose Detection
using `TensorRT` EP
```bash
cargo run --release -- --trt --model ../assets/weights/yolov8m-pose.onnx --source ../assets/images/bus.jpg --plot
```
### Instance Segmentation
using `TensorRT` EP and FP16 model `--fp16`
```bash
cargo run --release -- --trt --fp16 --model ../assets/weights/yolov8m-seg.onnx --source ../assets/images/0172.jpg --plot
```
use clap::Parser;
use crate::YOLOTask;
#[derive(Parser, Clone)]
#[command(author, version, about, long_about = None)]
pub struct Args {
/// ONNX model path
#[arg(long, required = true)]
pub model: String,
/// input path
#[arg(long, required = true)]
pub source: String,
/// device id
#[arg(long, default_value_t = 0)]
pub device_id: i32,
/// using TensorRT EP
#[arg(long)]
pub trt: bool,
/// using CUDA EP
#[arg(long)]
pub cuda: bool,
/// input batch size
#[arg(long, default_value_t = 1)]
pub batch: u32,
/// trt input min_batch size
#[arg(long, default_value_t = 1)]
pub batch_min: u32,
/// trt input max_batch size
#[arg(long, default_value_t = 32)]
pub batch_max: u32,
/// using TensorRT --fp16
#[arg(long)]
pub fp16: bool,
/// specify YOLO task
#[arg(long, value_enum)]
pub task: Option<YOLOTask>,
/// num_classes
#[arg(long)]
pub nc: Option<u32>,
/// num_keypoints
#[arg(long)]
pub nk: Option<u32>,
/// num_masks
#[arg(long)]
pub nm: Option<u32>,
/// input image width
#[arg(long)]
pub width: Option<u32>,
/// input image height
#[arg(long)]
pub height: Option<u32>,
/// confidence threshold
#[arg(long, required = false, default_value_t = 0.3)]
pub conf: f32,
/// iou threshold in NMS
#[arg(long, required = false, default_value_t = 0.45)]
pub iou: f32,
/// confidence threshold of keypoint
#[arg(long, required = false, default_value_t = 0.55)]
pub kconf: f32,
/// plot inference result and save
#[arg(long)]
pub plot: bool,
/// check time consumed in each stage
#[arg(long)]
pub profile: bool,
}
#![allow(clippy::type_complexity)]
use std::io::{Read, Write};
pub mod cli;
pub mod model;
pub mod ort_backend;
pub mod yolo_result;
pub use crate::cli::Args;
pub use crate::model::YOLOv8;
pub use crate::ort_backend::{Batch, OrtBackend, OrtConfig, OrtEP, YOLOTask};
pub use crate::yolo_result::{Bbox, Embedding, Point2, YOLOResult};
pub fn non_max_suppression(
xs: &mut Vec<(Bbox, Option<Vec<Point2>>, Option<Vec<f32>>)>,
iou_threshold: f32,
) {
xs.sort_by(|b1, b2| b2.0.confidence().partial_cmp(&b1.0.confidence()).unwrap());
let mut current_index = 0;
for index in 0..xs.len() {
let mut drop = false;
for prev_index in 0..current_index {
let iou = xs[prev_index].0.iou(&xs[index].0);
if iou > iou_threshold {
drop = true;
break;
}
}
if !drop {
xs.swap(current_index, index);
current_index += 1;
}
}
xs.truncate(current_index);
}
pub fn gen_time_string(delimiter: &str) -> String {
let offset = chrono::FixedOffset::east_opt(8 * 60 * 60).unwrap(); // Beijing
let t_now = chrono::Utc::now().with_timezone(&offset);
let fmt = format!(
"%Y{}%m{}%d{}%H{}%M{}%S{}%f",
delimiter, delimiter, delimiter, delimiter, delimiter, delimiter
);
t_now.format(&fmt).to_string()
}
pub const SKELETON: [(usize, usize); 16] = [
(0, 1),
(0, 2),
(1, 3),
(2, 4),
(5, 6),
(5, 11),
(6, 12),
(11, 12),
(5, 7),
(6, 8),
(7, 9),
(8, 10),
(11, 13),
(12, 14),
(13, 15),
(14, 16),
];
pub fn check_font(font: &str) -> rusttype::Font<'static> {
// check then load font
// ultralytics font path
let font_path_config = match dirs::config_dir() {
Some(mut d) => {
d.push("Ultralytics");
d.push(font);
d
}
None => panic!("Unsupported operating system. Now support Linux, MacOS, Windows."),
};
// current font path
let font_path_current = std::path::PathBuf::from(font);
// check font
let font_path = if font_path_config.exists() {
font_path_config
} else if font_path_current.exists() {
font_path_current
} else {
println!("Downloading font...");
let source_url = "https://ultralytics.com/assets/Arial.ttf";
let resp = ureq::get(source_url)
.timeout(std::time::Duration::from_secs(500))
.call()
.unwrap_or_else(|err| panic!("> Failed to download font: {source_url}: {err:?}"));
// read to buffer
let mut buffer = vec![];
let total_size = resp
.header("Content-Length")
.and_then(|s| s.parse::<u64>().ok())
.unwrap();
let _reader = resp
.into_reader()
.take(total_size)
.read_to_end(&mut buffer)
.unwrap();
// save
let _path = std::fs::File::create(font).unwrap();
let mut writer = std::io::BufWriter::new(_path);
writer.write_all(&buffer).unwrap();
println!("Font saved at: {:?}", font_path_current.display());
font_path_current
};
// load font
let buffer = std::fs::read(font_path).unwrap();
rusttype::Font::try_from_vec(buffer).unwrap()
}
use ab_glyph::FontArc;
pub fn load_font() -> FontArc {
use std::path::Path;
let font_path = Path::new("./font/Arial.ttf");
match font_path.try_exists() {
Ok(true) => {
let buffer = std::fs::read(font_path).unwrap();
FontArc::try_from_vec(buffer).unwrap()
}
Ok(false) => {
std::fs::create_dir_all("./font").unwrap();
println!("Downloading font...");
let source_url = "https://ultralytics.com/assets/Arial.ttf";
let resp = ureq::get(source_url)
.timeout(std::time::Duration::from_secs(500))
.call()
.unwrap_or_else(|err| panic!("> Failed to download font: {source_url}: {err:?}"));
// read to buffer
let mut buffer = vec![];
let total_size = resp
.header("Content-Length")
.and_then(|s| s.parse::<u64>().ok())
.unwrap();
let _reader = resp
.into_reader()
.take(total_size)
.read_to_end(&mut buffer)
.unwrap();
// save
let mut fd = std::fs::File::create(font_path).unwrap();
fd.write_all(&buffer).unwrap();
println!("Font saved at: {:?}", font_path.display());
FontArc::try_from_vec(buffer).unwrap()
}
Err(e) => {
panic!("Failed to load font {}", e);
}
}
}
use clap::Parser;
use yolov8_rs::{Args, YOLOv8};
fn main() -> Result<(), Box<dyn std::error::Error>> {
let args = Args::parse();
// 1. load image
let x = image::ImageReader::open(&args.source)?
.with_guessed_format()?
.decode()?;
// 2. model support dynamic batch inference, so input should be a Vec
let xs = vec![x];
// You can test `--batch 2` with this
// let xs = vec![x.clone(), x];
// 3. build yolov8 model
let mut model = YOLOv8::new(args)?;
model.summary(); // model info
// 4. run
let ys = model.run(&xs)?;
println!("{:?}", ys);
Ok(())
}
#![allow(clippy::type_complexity)]
use ab_glyph::FontArc;
use anyhow::Result;
use image::{DynamicImage, GenericImageView, ImageBuffer};
use ndarray::{s, Array, Axis, IxDyn};
use rand::{thread_rng, Rng};
use std::path::PathBuf;
use crate::{
gen_time_string, load_font, non_max_suppression, Args, Batch, Bbox, Embedding, OrtBackend,
OrtConfig, OrtEP, Point2, YOLOResult, YOLOTask, SKELETON,
};
pub struct YOLOv8 {
// YOLOv8 model for all yolo-tasks
engine: OrtBackend,
nc: u32,
nk: u32,
nm: u32,
height: u32,
width: u32,
batch: u32,
task: YOLOTask,
conf: f32,
kconf: f32,
iou: f32,
names: Vec<String>,
color_palette: Vec<(u8, u8, u8)>,
profile: bool,
plot: bool,
}
impl YOLOv8 {
pub fn new(config: Args) -> Result<Self> {
// execution provider
let ep = if config.trt {
OrtEP::Trt(config.device_id)
} else if config.cuda {
OrtEP::CUDA(config.device_id)
} else {
OrtEP::CPU
};
// batch
let batch = Batch {
opt: config.batch,
min: config.batch_min,
max: config.batch_max,
};
// build ort engine
let ort_args = OrtConfig {
ep,
batch,
f: config.model,
task: config.task,
trt_fp16: config.fp16,
image_size: (config.height, config.width),
};
let engine = OrtBackend::build(ort_args)?;
// get batch, height, width, tasks, nc, nk, nm
let (batch, height, width, task) = (
engine.batch(),
engine.height(),
engine.width(),
engine.task(),
);
let nc = engine.nc().or(config.nc).unwrap_or_else(|| {
panic!("Failed to get num_classes, make it explicit with `--nc`");
});
let (nk, nm) = match task {
YOLOTask::Pose => {
let nk = engine.nk().or(config.nk).unwrap_or_else(|| {
panic!("Failed to get num_keypoints, make it explicit with `--nk`");
});
(nk, 0)
}
YOLOTask::Segment => {
let nm = engine.nm().or(config.nm).unwrap_or_else(|| {
panic!("Failed to get num_masks, make it explicit with `--nm`");
});
(0, nm)
}
_ => (0, 0),
};
// class names
let names = engine.names().unwrap_or(vec!["Unknown".to_string()]);
// color palette
let mut rng = thread_rng();
let color_palette: Vec<_> = names
.iter()
.map(|_| {
(
rng.gen_range(0..=255),
rng.gen_range(0..=255),
rng.gen_range(0..=255),
)
})
.collect();
Ok(Self {
engine,
names,
conf: config.conf,
kconf: config.kconf,
iou: config.iou,
color_palette,
profile: config.profile,
plot: config.plot,
nc,
nk,
nm,
height,
width,
batch,
task,
})
}
pub fn scale_wh(&self, w0: f32, h0: f32, w1: f32, h1: f32) -> (f32, f32, f32) {
let r = (w1 / w0).min(h1 / h0);
(r, (w0 * r).round(), (h0 * r).round())
}
pub fn preprocess(&mut self, xs: &Vec<DynamicImage>) -> Result<Array<f32, IxDyn>> {
let mut ys =
Array::ones((xs.len(), 3, self.height() as usize, self.width() as usize)).into_dyn();
ys.fill(144.0 / 255.0);
for (idx, x) in xs.iter().enumerate() {
let img = match self.task() {
YOLOTask::Classify => x.resize_exact(
self.width(),
self.height(),
image::imageops::FilterType::Triangle,
),
_ => {
let (w0, h0) = x.dimensions();
let w0 = w0 as f32;
let h0 = h0 as f32;
let (_, w_new, h_new) =
self.scale_wh(w0, h0, self.width() as f32, self.height() as f32); // f32 round
x.resize_exact(
w_new as u32,
h_new as u32,
if let YOLOTask::Segment = self.task() {
image::imageops::FilterType::CatmullRom
} else {
image::imageops::FilterType::Triangle
},
)
}
};
for (x, y, rgb) in img.pixels() {
let x = x as usize;
let y = y as usize;
let [r, g, b, _] = rgb.0;
ys[[idx, 0, y, x]] = (r as f32) / 255.0;
ys[[idx, 1, y, x]] = (g as f32) / 255.0;
ys[[idx, 2, y, x]] = (b as f32) / 255.0;
}
}
Ok(ys)
}
pub fn run(&mut self, xs: &Vec<DynamicImage>) -> Result<Vec<YOLOResult>> {
// pre-process
let t_pre = std::time::Instant::now();
let xs_ = self.preprocess(xs)?;
if self.profile {
println!("[Model Preprocess]: {:?}", t_pre.elapsed());
}
// run
let t_run = std::time::Instant::now();
let ys = self.engine.run(xs_, self.profile)?;
if self.profile {
println!("[Model Inference]: {:?}", t_run.elapsed());
}
// post-process
let t_post = std::time::Instant::now();
let ys = self.postprocess(ys, xs)?;
if self.profile {
println!("[Model Postprocess]: {:?}", t_post.elapsed());
}
// plot and save
if self.plot {
self.plot_and_save(&ys, xs, Some(&SKELETON));
}
Ok(ys)
}
pub fn postprocess(
&self,
xs: Vec<Array<f32, IxDyn>>,
xs0: &[DynamicImage],
) -> Result<Vec<YOLOResult>> {
if let YOLOTask::Classify = self.task() {
let mut ys = Vec::new();
let preds = &xs[0];
for batch in preds.axis_iter(Axis(0)) {
ys.push(YOLOResult::new(
Some(Embedding::new(batch.into_owned())),
None,
None,
None,
));
}
Ok(ys)
} else {
const CXYWH_OFFSET: usize = 4; // cxcywh
const KPT_STEP: usize = 3; // xyconf
let preds = &xs[0];
let protos = {
if xs.len() > 1 {
Some(&xs[1])
} else {
None
}
};
let mut ys = Vec::new();
for (idx, anchor) in preds.axis_iter(Axis(0)).enumerate() {
// [bs, 4 + nc + nm, anchors]
// input image
let width_original = xs0[idx].width() as f32;
let height_original = xs0[idx].height() as f32;
let ratio = (self.width() as f32 / width_original)
.min(self.height() as f32 / height_original);
// save each result
let mut data: Vec<(Bbox, Option<Vec<Point2>>, Option<Vec<f32>>)> = Vec::new();
for pred in anchor.axis_iter(Axis(1)) {
// split preds for different tasks
let bbox = pred.slice(s![0..CXYWH_OFFSET]);
let clss = pred.slice(s![CXYWH_OFFSET..CXYWH_OFFSET + self.nc() as usize]);
let kpts = {
if let YOLOTask::Pose = self.task() {
Some(pred.slice(s![pred.len() - KPT_STEP * self.nk() as usize..]))
} else {
None
}
};
let coefs = {
if let YOLOTask::Segment = self.task() {
Some(pred.slice(s![pred.len() - self.nm() as usize..]).to_vec())
} else {
None
}
};
// confidence and id
let (id, &confidence) = clss
.into_iter()
.enumerate()
.reduce(|max, x| if x.1 > max.1 { x } else { max })
.unwrap(); // definitely will not panic!
// confidence filter
if confidence < self.conf {
continue;
}
// bbox re-scale
let cx = bbox[0] / ratio;
let cy = bbox[1] / ratio;
let w = bbox[2] / ratio;
let h = bbox[3] / ratio;
let x = cx - w / 2.;
let y = cy - h / 2.;
let y_bbox = Bbox::new(
x.max(0.0f32).min(width_original),
y.max(0.0f32).min(height_original),
w,
h,
id,
confidence,
);
// kpts
let y_kpts = {
if let Some(kpts) = kpts {
let mut kpts_ = Vec::new();
// rescale
for i in 0..self.nk() as usize {
let kx = kpts[KPT_STEP * i] / ratio;
let ky = kpts[KPT_STEP * i + 1] / ratio;
let kconf = kpts[KPT_STEP * i + 2];
if kconf < self.kconf {
kpts_.push(Point2::default());
} else {
kpts_.push(Point2::new_with_conf(
kx.max(0.0f32).min(width_original),
ky.max(0.0f32).min(height_original),
kconf,
));
}
}
Some(kpts_)
} else {
None
}
};
// data merged
data.push((y_bbox, y_kpts, coefs));
}
// nms
non_max_suppression(&mut data, self.iou);
// decode
let mut y_bboxes: Vec<Bbox> = Vec::new();
let mut y_kpts: Vec<Vec<Point2>> = Vec::new();
let mut y_masks: Vec<Vec<u8>> = Vec::new();
for elem in data.into_iter() {
if let Some(kpts) = elem.1 {
y_kpts.push(kpts)
}
// decode masks
if let Some(coefs) = elem.2 {
let proto = protos.unwrap().slice(s![idx, .., .., ..]);
let (nm, nh, nw) = proto.dim();
// coefs * proto -> mask
let coefs = Array::from_shape_vec((1, nm), coefs)?; // (n, nm)
let proto = proto.to_owned();
let proto = proto.to_shape((nm, nh * nw))?; // (nm, nh*nw)
let mask = coefs.dot(&proto); // (nh, nw, n)
let mask = mask.to_shape((nh, nw, 1))?;
// build image from ndarray
let mask_im: ImageBuffer<image::Luma<_>, Vec<f32>> =
match ImageBuffer::from_raw(
nw as u32,
nh as u32,
mask.to_owned().into_raw_vec_and_offset().0,
) {
Some(image) => image,
None => panic!("can not create image from ndarray"),
};
let mut mask_im = image::DynamicImage::from(mask_im); // -> dyn
// rescale masks
let (_, w_mask, h_mask) =
self.scale_wh(width_original, height_original, nw as f32, nh as f32);
let mask_cropped = mask_im.crop(0, 0, w_mask as u32, h_mask as u32);
let mask_original = mask_cropped.resize_exact(
// resize_to_fill
width_original as u32,
height_original as u32,
match self.task() {
YOLOTask::Segment => image::imageops::FilterType::CatmullRom,
_ => image::imageops::FilterType::Triangle,
},
);
// crop-mask with bbox
let mut mask_original_cropped = mask_original.into_luma8();
for y in 0..height_original as usize {
for x in 0..width_original as usize {
if x < elem.0.xmin() as usize
|| x > elem.0.xmax() as usize
|| y < elem.0.ymin() as usize
|| y > elem.0.ymax() as usize
{
mask_original_cropped.put_pixel(
x as u32,
y as u32,
image::Luma([0u8]),
);
}
}
}
y_masks.push(mask_original_cropped.into_raw());
}
y_bboxes.push(elem.0);
}
// save each result
let y = YOLOResult {
probs: None,
bboxes: if !y_bboxes.is_empty() {
Some(y_bboxes)
} else {
None
},
keypoints: if !y_kpts.is_empty() {
Some(y_kpts)
} else {
None
},
masks: if !y_masks.is_empty() {
Some(y_masks)
} else {
None
},
};
ys.push(y);
}
Ok(ys)
}
}
pub fn plot_and_save(
&self,
ys: &[YOLOResult],
xs0: &[DynamicImage],
skeletons: Option<&[(usize, usize)]>,
) {
// check font then load
let font: FontArc = load_font();
for (_idb, (img0, y)) in xs0.iter().zip(ys.iter()).enumerate() {
let mut img = img0.to_rgb8();
// draw for classifier
if let Some(probs) = y.probs() {
for (i, k) in probs.topk(5).iter().enumerate() {
let legend = format!("{} {:.2}%", self.names[k.0], k.1);
let scale = 32;
let legend_size = img.width().max(img.height()) / scale;
let x = img.width() / 20;
let y = img.height() / 20 + i as u32 * legend_size;
imageproc::drawing::draw_text_mut(
&mut img,
image::Rgb([0, 255, 0]),
x as i32,
y as i32,
legend_size as f32,
&font,
&legend,
);
}
}
// draw bboxes & keypoints
if let Some(bboxes) = y.bboxes() {
for (_idx, bbox) in bboxes.iter().enumerate() {
// rect
imageproc::drawing::draw_hollow_rect_mut(
&mut img,
imageproc::rect::Rect::at(bbox.xmin() as i32, bbox.ymin() as i32)
.of_size(bbox.width() as u32, bbox.height() as u32),
image::Rgb(self.color_palette[bbox.id()].into()),
);
// text
let legend = format!("{} {:.2}%", self.names[bbox.id()], bbox.confidence());
let scale = 40;
let legend_size = img.width().max(img.height()) / scale;
imageproc::drawing::draw_text_mut(
&mut img,
image::Rgb(self.color_palette[bbox.id()].into()),
bbox.xmin() as i32,
(bbox.ymin() - legend_size as f32) as i32,
legend_size as f32,
&font,
&legend,
);
}
}
// draw kpts
if let Some(keypoints) = y.keypoints() {
for kpts in keypoints.iter() {
for kpt in kpts.iter() {
// filter
if kpt.confidence() < self.kconf {
continue;
}
// draw point
imageproc::drawing::draw_filled_circle_mut(
&mut img,
(kpt.x() as i32, kpt.y() as i32),
2,
image::Rgb([0, 255, 0]),
);
}
// draw skeleton if has
if let Some(skeletons) = skeletons {
for &(idx1, idx2) in skeletons.iter() {
let kpt1 = &kpts[idx1];
let kpt2 = &kpts[idx2];
if kpt1.confidence() < self.kconf || kpt2.confidence() < self.kconf {
continue;
}
imageproc::drawing::draw_line_segment_mut(
&mut img,
(kpt1.x(), kpt1.y()),
(kpt2.x(), kpt2.y()),
image::Rgb([233, 14, 57]),
);
}
}
}
}
// draw mask
if let Some(masks) = y.masks() {
for (mask, _bbox) in masks.iter().zip(y.bboxes().unwrap().iter()) {
let mask_nd: ImageBuffer<image::Luma<_>, Vec<u8>> =
match ImageBuffer::from_vec(img.width(), img.height(), mask.to_vec()) {
Some(image) => image,
None => panic!("can not crate image from ndarray"),
};
for _x in 0..img.width() {
for _y in 0..img.height() {
let mask_p = imageproc::drawing::Canvas::get_pixel(&mask_nd, _x, _y);
if mask_p.0[0] > 0 {
let mut img_p = imageproc::drawing::Canvas::get_pixel(&img, _x, _y);
// img_p.0[2] = self.color_palette[bbox.id()].2 / 2;
// img_p.0[1] = self.color_palette[bbox.id()].1 / 2;
// img_p.0[0] = self.color_palette[bbox.id()].0 / 2;
img_p.0[2] /= 2;
img_p.0[1] = 255 - (255 - img_p.0[2]) / 2;
img_p.0[0] /= 2;
imageproc::drawing::Canvas::draw_pixel(&mut img, _x, _y, img_p)
}
}
}
}
}
// mkdir and save
let mut runs = PathBuf::from("runs");
if !runs.exists() {
std::fs::create_dir_all(&runs).unwrap();
}
runs.push(gen_time_string("-"));
let saveout = format!("{}.jpg", runs.to_str().unwrap());
let _ = img.save(saveout);
}
}
pub fn summary(&self) {
println!(
"\nSummary:\n\
> Task: {:?}{}\n\
> EP: {:?} {}\n\
> Dtype: {:?}\n\
> Batch: {} ({}), Height: {} ({}), Width: {} ({})\n\
> nc: {} nk: {}, nm: {}, conf: {}, kconf: {}, iou: {}\n\
",
self.task(),
match self.engine.author().zip(self.engine.version()) {
Some((author, ver)) => format!(" ({} {})", author, ver),
None => String::from(""),
},
self.engine.ep(),
if let OrtEP::CPU = self.engine.ep() {
""
} else {
"(May still fall back to CPU)"
},
self.engine.dtype(),
self.batch(),
if self.engine.is_batch_dynamic() {
"Dynamic"
} else {
"Const"
},
self.height(),
if self.engine.is_height_dynamic() {
"Dynamic"
} else {
"Const"
},
self.width(),
if self.engine.is_width_dynamic() {
"Dynamic"
} else {
"Const"
},
self.nc(),
self.nk(),
self.nm(),
self.conf,
self.kconf,
self.iou,
);
}
pub fn engine(&self) -> &OrtBackend {
&self.engine
}
pub fn conf(&self) -> f32 {
self.conf
}
pub fn set_conf(&mut self, val: f32) {
self.conf = val;
}
pub fn conf_mut(&mut self) -> &mut f32 {
&mut self.conf
}
pub fn kconf(&self) -> f32 {
self.kconf
}
pub fn iou(&self) -> f32 {
self.iou
}
pub fn task(&self) -> &YOLOTask {
&self.task
}
pub fn batch(&self) -> u32 {
self.batch
}
pub fn width(&self) -> u32 {
self.width
}
pub fn height(&self) -> u32 {
self.height
}
pub fn nc(&self) -> u32 {
self.nc
}
pub fn nk(&self) -> u32 {
self.nk
}
pub fn nm(&self) -> u32 {
self.nm
}
pub fn names(&self) -> &Vec<String> {
&self.names
}
}
use anyhow::Result;
use clap::ValueEnum;
use half::f16;
use ndarray::{Array, CowArray, IxDyn};
use ort::{
CPUExecutionProvider, CUDAExecutionProvider, ExecutionProvider, ExecutionProviderDispatch,
TensorRTExecutionProvider,
};
use ort::{Session, SessionBuilder};
use ort::{TensorElementType, ValueType};
use regex::Regex;
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, ValueEnum)]
pub enum YOLOTask {
// YOLO tasks
Classify,
Detect,
Pose,
Segment,
}
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
pub enum OrtEP {
// ONNXRuntime execution provider
CPU,
CUDA(i32),
Trt(i32),
}
#[derive(Debug)]
pub struct Batch {
pub opt: u32,
pub min: u32,
pub max: u32,
}
impl Default for Batch {
fn default() -> Self {
Self {
opt: 1,
min: 1,
max: 1,
}
}
}
#[derive(Debug, Default)]
pub struct OrtInputs {
// ONNX model inputs attrs
pub shapes: Vec<Vec<i64>>,
//pub dtypes: Vec<TensorElementDataType>,
pub dtypes: Vec<TensorElementType>,
pub names: Vec<String>,
pub sizes: Vec<Vec<u32>>,
}
impl OrtInputs {
pub fn new(session: &Session) -> Self {
let mut shapes = Vec::new();
let mut dtypes = Vec::new();
let mut names = Vec::new();
for i in session.inputs.iter() {
/* let shape: Vec<i32> = i
.dimensions()
.map(|x| if let Some(x) = x { x as i32 } else { -1i32 })
.collect();
shapes.push(shape); */
if let ort::ValueType::Tensor { ty, dimensions } = &i.input_type {
dtypes.push(ty.clone());
let shape = dimensions.clone();
shapes.push(shape);
} else {
panic!("不支持的数据格式, {} - {}", file!(), line!());
}
//dtypes.push(i.input_type);
names.push(i.name.clone());
}
Self {
shapes,
dtypes,
names,
..Default::default()
}
}
}
#[derive(Debug)]
pub struct OrtConfig {
// ORT config
pub f: String,
pub task: Option<YOLOTask>,
pub ep: OrtEP,
pub trt_fp16: bool,
pub batch: Batch,
pub image_size: (Option<u32>, Option<u32>),
}
#[derive(Debug)]
pub struct OrtBackend {
// ORT engine
session: Session,
task: YOLOTask,
ep: OrtEP,
batch: Batch,
inputs: OrtInputs,
}
impl OrtBackend {
pub fn build(args: OrtConfig) -> Result<Self> {
// build env & session
// in version 2.x environment is removed
/* let env = ort::EnvironmentBuilder
::with_name("YOLOv8")
.build()?
.into_arc(); */
let sessionbuilder = SessionBuilder::new()?;
let session = sessionbuilder.commit_from_file(&args.f)?;
//let session = SessionBuilder::new(&env)?.with_model_from_file(&args.f)?;
// get inputs
let mut inputs = OrtInputs::new(&session);
// batch size
let mut batch = args.batch;
let batch = if inputs.shapes[0][0] == -1 {
batch
} else {
assert_eq!(
inputs.shapes[0][0] as u32, batch.opt,
"Expected batch size: {}, got {}. Try using `--batch {}`.",
inputs.shapes[0][0] as u32, batch.opt, inputs.shapes[0][0] as u32
);
batch.opt = inputs.shapes[0][0] as u32;
batch
};
// input size: height and width
let height = if inputs.shapes[0][2] == -1 {
match args.image_size.0 {
Some(height) => height,
None => panic!("Failed to get model height. Make it explicit with `--height`"),
}
} else {
inputs.shapes[0][2] as u32
};
let width = if inputs.shapes[0][3] == -1 {
match args.image_size.1 {
Some(width) => width,
None => panic!("Failed to get model width. Make it explicit with `--width`"),
}
} else {
inputs.shapes[0][3] as u32
};
inputs.sizes.push(vec![height, width]);
// build provider
let (ep, provider) = match args.ep {
OrtEP::CUDA(device_id) => Self::set_ep_cuda(device_id),
OrtEP::Trt(device_id) => Self::set_ep_trt(device_id, args.trt_fp16, &batch, &inputs),
_ => (
OrtEP::CPU,
ExecutionProviderDispatch::from(CPUExecutionProvider::default()),
),
};
// build session again with the new provider
let session = SessionBuilder::new()?
// .with_optimization_level(ort::GraphOptimizationLevel::Level3)?
.with_execution_providers([provider])?
.commit_from_file(args.f)?;
// task: using given one or guessing
let task = match args.task {
Some(task) => task,
None => match session.metadata() {
Err(_) => panic!("No metadata found. Try making it explicit by `--task`"),
Ok(metadata) => match metadata.custom("task") {
Err(_) => panic!("Can not get custom value. Try making it explicit by `--task`"),
Ok(value) => match value {
None => panic!("No corresponding value of `task` found in metadata. Make it explicit by `--task`"),
Some(task) => match task.as_str() {
"classify" => YOLOTask::Classify,
"detect" => YOLOTask::Detect,
"pose" => YOLOTask::Pose,
"segment" => YOLOTask::Segment,
x => todo!("{:?} is not supported for now!", x),
},
},
},
},
};
Ok(Self {
session,
task,
ep,
batch,
inputs,
})
}
pub fn fetch_inputs_from_session(
session: &Session,
) -> (Vec<Vec<i64>>, Vec<TensorElementType>, Vec<String>) {
// get inputs attrs from ONNX model
let mut shapes = Vec::new();
let mut dtypes = Vec::new();
let mut names = Vec::new();
for i in session.inputs.iter() {
if let ort::ValueType::Tensor { ty, dimensions } = &i.input_type {
dtypes.push(ty.clone());
let shape = dimensions.clone();
shapes.push(shape);
} else {
panic!("不支持的数据格式, {} - {}", file!(), line!());
}
names.push(i.name.clone());
}
(shapes, dtypes, names)
}
pub fn set_ep_cuda(device_id: i32) -> (OrtEP, ExecutionProviderDispatch) {
let cuda_provider = CUDAExecutionProvider::default().with_device_id(device_id);
if let Ok(true) = cuda_provider.is_available() {
(
OrtEP::CUDA(device_id),
ExecutionProviderDispatch::from(cuda_provider), //PlantForm::CUDA(cuda_provider)
)
} else {
println!("> CUDA is not available! Using CPU.");
(
OrtEP::CPU,
ExecutionProviderDispatch::from(CPUExecutionProvider::default()), //PlantForm::CPU(CPUExecutionProvider::default())
)
}
}
pub fn set_ep_trt(
device_id: i32,
fp16: bool,
batch: &Batch,
inputs: &OrtInputs,
) -> (OrtEP, ExecutionProviderDispatch) {
// set TensorRT
let trt_provider = TensorRTExecutionProvider::default().with_device_id(device_id);
//trt_provider.
if let Ok(true) = trt_provider.is_available() {
let (height, width) = (inputs.sizes[0][0], inputs.sizes[0][1]);
if inputs.dtypes[0] == TensorElementType::Float16 && !fp16 {
panic!(
"Dtype mismatch! Expected: Float32, got: {:?}. You should use `--fp16`",
inputs.dtypes[0]
);
}
// dynamic shape: input_tensor_1:dim_1xdim_2x...,input_tensor_2:dim_3xdim_4x...,...
let mut opt_string = String::new();
let mut min_string = String::new();
let mut max_string = String::new();
for name in inputs.names.iter() {
let s_opt = format!("{}:{}x3x{}x{},", name, batch.opt, height, width);
let s_min = format!("{}:{}x3x{}x{},", name, batch.min, height, width);
let s_max = format!("{}:{}x3x{}x{},", name, batch.max, height, width);
opt_string.push_str(s_opt.as_str());
min_string.push_str(s_min.as_str());
max_string.push_str(s_max.as_str());
}
let _ = opt_string.pop();
let _ = min_string.pop();
let _ = max_string.pop();
let trt_provider = trt_provider
.with_profile_opt_shapes(opt_string)
.with_profile_min_shapes(min_string)
.with_profile_max_shapes(max_string)
.with_fp16(fp16)
.with_timing_cache(true);
(
OrtEP::Trt(device_id),
ExecutionProviderDispatch::from(trt_provider),
)
} else {
println!("> TensorRT is not available! Try using CUDA...");
Self::set_ep_cuda(device_id)
}
}
pub fn fetch_from_metadata(&self, key: &str) -> Option<String> {
// fetch value from onnx model file by key
match self.session.metadata() {
Err(_) => None,
Ok(metadata) => match metadata.custom(key) {
Err(_) => None,
Ok(value) => value,
},
}
}
pub fn run(&self, xs: Array<f32, IxDyn>, profile: bool) -> Result<Vec<Array<f32, IxDyn>>> {
// ORT inference
match self.dtype() {
TensorElementType::Float16 => self.run_fp16(xs, profile),
TensorElementType::Float32 => self.run_fp32(xs, profile),
_ => todo!(),
}
}
pub fn run_fp16(&self, xs: Array<f32, IxDyn>, profile: bool) -> Result<Vec<Array<f32, IxDyn>>> {
// f32->f16
let t = std::time::Instant::now();
let xs = xs.mapv(f16::from_f32);
if profile {
println!("[ORT f32->f16]: {:?}", t.elapsed());
}
// h2d
let t = std::time::Instant::now();
let xs = CowArray::from(xs);
if profile {
println!("[ORT H2D]: {:?}", t.elapsed());
}
// run
let t = std::time::Instant::now();
let ys = self.session.run(ort::inputs![xs.view()]?)?;
if profile {
println!("[ORT Inference]: {:?}", t.elapsed());
}
// d2h
Ok(ys
.iter()
.map(|(_k, v)| {
// d2h
let t = std::time::Instant::now();
let v = v.try_extract_tensor().unwrap();
//let v = v.try_extract::<_>().unwrap().view().clone().into_owned();
if profile {
println!("[ORT D2H]: {:?}", t.elapsed());
}
// f16->f32
let t_ = std::time::Instant::now();
let v = v.mapv(f16::to_f32);
if profile {
println!("[ORT f16->f32]: {:?}", t_.elapsed());
}
v
})
.collect::<Vec<Array<_, _>>>())
}
pub fn run_fp32(&self, xs: Array<f32, IxDyn>, profile: bool) -> Result<Vec<Array<f32, IxDyn>>> {
// h2d
let t = std::time::Instant::now();
let xs = CowArray::from(xs);
if profile {
println!("[ORT H2D]: {:?}", t.elapsed());
}
// run
let t = std::time::Instant::now();
let ys = self.session.run(ort::inputs![xs.view()]?)?;
if profile {
println!("[ORT Inference]: {:?}", t.elapsed());
}
// d2h
Ok(ys
.iter()
.map(|(_k, v)| {
let t = std::time::Instant::now();
let v = v.try_extract_tensor::<f32>().unwrap().into_owned();
//let x = x.try_extract::<_>().unwrap().view().clone().into_owned();
if profile {
println!("[ORT D2H]: {:?}", t.elapsed());
}
v
})
.collect::<Vec<Array<_, _>>>())
}
pub fn output_shapes(&self) -> Vec<Vec<i64>> {
let mut shapes = Vec::new();
for output in &self.session.outputs {
if let ValueType::Tensor { ty: _, dimensions } = &output.output_type {
let shape = dimensions.clone();
shapes.push(shape);
} else {
panic!("not support data format, {} - {}", file!(), line!());
}
}
shapes
}
pub fn output_dtypes(&self) -> Vec<TensorElementType> {
let mut dtypes = Vec::new();
for output in &self.session.outputs {
if let ValueType::Tensor { ty, dimensions: _ } = &output.output_type {
dtypes.push(ty.clone());
} else {
panic!("not support data format, {} - {}", file!(), line!());
}
}
dtypes
}
pub fn input_shapes(&self) -> &Vec<Vec<i64>> {
&self.inputs.shapes
}
pub fn input_names(&self) -> &Vec<String> {
&self.inputs.names
}
pub fn input_dtypes(&self) -> &Vec<TensorElementType> {
&self.inputs.dtypes
}
pub fn dtype(&self) -> TensorElementType {
self.input_dtypes()[0]
}
pub fn height(&self) -> u32 {
self.inputs.sizes[0][0]
}
pub fn width(&self) -> u32 {
self.inputs.sizes[0][1]
}
pub fn is_height_dynamic(&self) -> bool {
self.input_shapes()[0][2] == -1
}
pub fn is_width_dynamic(&self) -> bool {
self.input_shapes()[0][3] == -1
}
pub fn batch(&self) -> u32 {
self.batch.opt
}
pub fn is_batch_dynamic(&self) -> bool {
self.input_shapes()[0][0] == -1
}
pub fn ep(&self) -> &OrtEP {
&self.ep
}
pub fn task(&self) -> YOLOTask {
self.task.clone()
}
pub fn names(&self) -> Option<Vec<String>> {
// class names, metadata parsing
// String format: `{0: 'person', 1: 'bicycle', 2: 'sports ball', ..., 27: "yellow_lady's_slipper"}`
match self.fetch_from_metadata("names") {
Some(names) => {
let re = Regex::new(r#"(['"])([-()\w '"]+)(['"])"#).unwrap();
let mut names_ = vec![];
for (_, [_, name, _]) in re.captures_iter(&names).map(|x| x.extract()) {
names_.push(name.to_string());
}
Some(names_)
}
None => None,
}
}
pub fn nk(&self) -> Option<u32> {
// num_keypoints, metadata parsing: String `nk` in onnx model: `[17, 3]`
match self.fetch_from_metadata("kpt_shape") {
None => None,
Some(kpt_string) => {
let re = Regex::new(r"([0-9]+), ([0-9]+)").unwrap();
let caps = re.captures(&kpt_string).unwrap();
Some(caps.get(1).unwrap().as_str().parse::<u32>().unwrap())
}
}
}
pub fn nc(&self) -> Option<u32> {
// num_classes
match self.names() {
// by names
Some(names) => Some(names.len() as u32),
None => match self.task() {
// by task calculation
YOLOTask::Classify => Some(self.output_shapes()[0][1] as u32),
YOLOTask::Detect => {
if self.output_shapes()[0][1] == -1 {
None
} else {
// cxywhclss
Some(self.output_shapes()[0][1] as u32 - 4)
}
}
YOLOTask::Pose => {
match self.nk() {
None => None,
Some(nk) => {
if self.output_shapes()[0][1] == -1 {
None
} else {
// cxywhclss3*kpt
Some(self.output_shapes()[0][1] as u32 - 4 - 3 * nk)
}
}
}
}
YOLOTask::Segment => {
if self.output_shapes()[0][1] == -1 {
None
} else {
// cxywhclssnm
Some((self.output_shapes()[0][1] - self.output_shapes()[1][1]) as u32 - 4)
}
}
},
}
}
pub fn nm(&self) -> Option<u32> {
// num_masks
match self.task() {
YOLOTask::Segment => Some(self.output_shapes()[1][1] as u32),
_ => None,
}
}
pub fn na(&self) -> Option<u32> {
// num_anchors
match self.task() {
YOLOTask::Segment | YOLOTask::Detect | YOLOTask::Pose => {
if self.output_shapes()[0][2] == -1 {
None
} else {
Some(self.output_shapes()[0][2] as u32)
}
}
_ => None,
}
}
pub fn author(&self) -> Option<String> {
self.fetch_from_metadata("author")
}
pub fn version(&self) -> Option<String> {
self.fetch_from_metadata("version")
}
}
use ndarray::{Array, Axis, IxDyn};
#[derive(Clone, PartialEq, Default)]
pub struct YOLOResult {
// YOLO tasks results of an image
pub probs: Option<Embedding>,
pub bboxes: Option<Vec<Bbox>>,
pub keypoints: Option<Vec<Vec<Point2>>>,
pub masks: Option<Vec<Vec<u8>>>,
}
impl std::fmt::Debug for YOLOResult {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("YOLOResult")
.field(
"Probs(top5)",
&format_args!("{:?}", self.probs().map(|probs| probs.topk(5))),
)
.field("Bboxes", &self.bboxes)
.field("Keypoints", &self.keypoints)
.field(
"Masks",
&format_args!("{:?}", self.masks().map(|masks| masks.len())),
)
.finish()
}
}
impl YOLOResult {
pub fn new(
probs: Option<Embedding>,
bboxes: Option<Vec<Bbox>>,
keypoints: Option<Vec<Vec<Point2>>>,
masks: Option<Vec<Vec<u8>>>,
) -> Self {
Self {
probs,
bboxes,
keypoints,
masks,
}
}
pub fn probs(&self) -> Option<&Embedding> {
self.probs.as_ref()
}
pub fn keypoints(&self) -> Option<&Vec<Vec<Point2>>> {
self.keypoints.as_ref()
}
pub fn masks(&self) -> Option<&Vec<Vec<u8>>> {
self.masks.as_ref()
}
pub fn bboxes(&self) -> Option<&Vec<Bbox>> {
self.bboxes.as_ref()
}
pub fn bboxes_mut(&mut self) -> Option<&mut Vec<Bbox>> {
self.bboxes.as_mut()
}
}
#[derive(Debug, PartialEq, Clone, Default)]
pub struct Point2 {
// A point2d with x, y, conf
x: f32,
y: f32,
confidence: f32,
}
impl Point2 {
pub fn new_with_conf(x: f32, y: f32, confidence: f32) -> Self {
Self { x, y, confidence }
}
pub fn new(x: f32, y: f32) -> Self {
Self {
x,
y,
..Default::default()
}
}
pub fn x(&self) -> f32 {
self.x
}
pub fn y(&self) -> f32 {
self.y
}
pub fn confidence(&self) -> f32 {
self.confidence
}
}
#[derive(Debug, Clone, PartialEq, Default)]
pub struct Embedding {
// An float32 n-dims tensor
data: Array<f32, IxDyn>,
}
impl Embedding {
pub fn new(data: Array<f32, IxDyn>) -> Self {
Self { data }
}
pub fn data(&self) -> &Array<f32, IxDyn> {
&self.data
}
pub fn topk(&self, k: usize) -> Vec<(usize, f32)> {
let mut probs = self
.data
.iter()
.enumerate()
.map(|(a, b)| (a, *b))
.collect::<Vec<_>>();
probs.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
let mut topk = Vec::new();
for &(id, confidence) in probs.iter().take(k) {
topk.push((id, confidence));
}
topk
}
pub fn norm(&self) -> Array<f32, IxDyn> {
let std_ = self.data.mapv(|x| x * x).sum_axis(Axis(0)).mapv(f32::sqrt);
self.data.clone() / std_
}
pub fn top1(&self) -> (usize, f32) {
self.topk(1)[0]
}
}
#[derive(Debug, Clone, PartialEq, Default)]
pub struct Bbox {
// a bounding box around an object
xmin: f32,
ymin: f32,
width: f32,
height: f32,
id: usize,
confidence: f32,
}
impl Bbox {
pub fn new_from_xywh(xmin: f32, ymin: f32, width: f32, height: f32) -> Self {
Self {
xmin,
ymin,
width,
height,
..Default::default()
}
}
pub fn new(xmin: f32, ymin: f32, width: f32, height: f32, id: usize, confidence: f32) -> Self {
Self {
xmin,
ymin,
width,
height,
id,
confidence,
}
}
pub fn width(&self) -> f32 {
self.width
}
pub fn height(&self) -> f32 {
self.height
}
pub fn xmin(&self) -> f32 {
self.xmin
}
pub fn ymin(&self) -> f32 {
self.ymin
}
pub fn xmax(&self) -> f32 {
self.xmin + self.width
}
pub fn ymax(&self) -> f32 {
self.ymin + self.height
}
pub fn tl(&self) -> Point2 {
Point2::new(self.xmin, self.ymin)
}
pub fn br(&self) -> Point2 {
Point2::new(self.xmax(), self.ymax())
}
pub fn cxcy(&self) -> Point2 {
Point2::new(self.xmin + self.width / 2., self.ymin + self.height / 2.)
}
pub fn id(&self) -> usize {
self.id
}
pub fn confidence(&self) -> f32 {
self.confidence
}
pub fn area(&self) -> f32 {
self.width * self.height
}
pub fn intersection_area(&self, another: &Bbox) -> f32 {
let l = self.xmin.max(another.xmin);
let r = (self.xmin + self.width).min(another.xmin + another.width);
let t = self.ymin.max(another.ymin);
let b = (self.ymin + self.height).min(another.ymin + another.height);
(r - l + 1.).max(0.) * (b - t + 1.).max(0.)
}
pub fn union(&self, another: &Bbox) -> f32 {
self.area() + another.area() - self.intersection_area(another)
}
pub fn iou(&self, another: &Bbox) -> f32 {
self.intersection_area(another) / self.union(another)
}
}
# YOLOv8 - ONNX Runtime
This project implements YOLOv8 using ONNX Runtime.
## Installation
To run this project, you need to install the required dependencies. The following instructions will guide you through the installation process.
### Installing Required Dependencies
You can install the required dependencies by running the following command:
```bash
pip install -r requirements.txt
```
### Installing `onnxruntime-gpu`
If you have an NVIDIA GPU and want to leverage GPU acceleration, you can install the onnxruntime-gpu package using the following command:
```bash
pip install onnxruntime-gpu
```
Note: Make sure you have the appropriate GPU drivers installed on your system.
### Installing `onnxruntime` (CPU version)
If you don't have an NVIDIA GPU or prefer to use the CPU version of onnxruntime, you can install the onnxruntime package using the following command:
```bash
pip install onnxruntime
```
### Usage
After successfully installing the required packages, you can run the YOLOv8 implementation using the following command:
```bash
python main.py --model yolov8n.onnx --img image.jpg --conf-thres 0.5 --iou-thres 0.5
```
Make sure to replace yolov8n.onnx with the path to your YOLOv8 ONNX model file, image.jpg with the path to your input image, and adjust the confidence threshold (conf-thres) and IoU threshold (iou-thres) values as needed.
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
import argparse
import cv2
import numpy as np
import onnxruntime as ort
import torch
from ultralytics.utils import ASSETS, yaml_load
from ultralytics.utils.checks import check_requirements, check_yaml
class YOLOv8:
"""YOLOv8 object detection model class for handling inference and visualization."""
def __init__(self, onnx_model, input_image, confidence_thres, iou_thres):
"""
Initializes an instance of the YOLOv8 class.
Args:
onnx_model: Path to the ONNX model.
input_image: Path to the input image.
confidence_thres: Confidence threshold for filtering detections.
iou_thres: IoU (Intersection over Union) threshold for non-maximum suppression.
"""
self.onnx_model = onnx_model
self.input_image = input_image
self.confidence_thres = confidence_thres
self.iou_thres = iou_thres
# Load the class names from the COCO dataset
self.classes = yaml_load(check_yaml("coco8.yaml"))["names"]
# Generate a color palette for the classes
self.color_palette = np.random.uniform(0, 255, size=(len(self.classes), 3))
def draw_detections(self, img, box, score, class_id):
"""
Draws bounding boxes and labels on the input image based on the detected objects.
Args:
img: The input image to draw detections on.
box: Detected bounding box.
score: Corresponding detection score.
class_id: Class ID for the detected object.
Returns:
None
"""
# Extract the coordinates of the bounding box
x1, y1, w, h = box
# Retrieve the color for the class ID
color = self.color_palette[class_id]
# Draw the bounding box on the image
cv2.rectangle(img, (int(x1), int(y1)), (int(x1 + w), int(y1 + h)), color, 2)
# Create the label text with class name and score
label = f"{self.classes[class_id]}: {score:.2f}"
# Calculate the dimensions of the label text
(label_width, label_height), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
# Calculate the position of the label text
label_x = x1
label_y = y1 - 10 if y1 - 10 > label_height else y1 + 10
# Draw a filled rectangle as the background for the label text
cv2.rectangle(
img, (label_x, label_y - label_height), (label_x + label_width, label_y + label_height), color, cv2.FILLED
)
# Draw the label text on the image
cv2.putText(img, label, (label_x, label_y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA)
def preprocess(self):
"""
Preprocesses the input image before performing inference.
Returns:
image_data: Preprocessed image data ready for inference.
"""
# Read the input image using OpenCV
self.img = cv2.imread(self.input_image)
# Get the height and width of the input image
self.img_height, self.img_width = self.img.shape[:2]
# Convert the image color space from BGR to RGB
img = cv2.cvtColor(self.img, cv2.COLOR_BGR2RGB)
# Resize the image to match the input shape
img = cv2.resize(img, (self.input_width, self.input_height))
# Normalize the image data by dividing it by 255.0
image_data = np.array(img) / 255.0
# Transpose the image to have the channel dimension as the first dimension
image_data = np.transpose(image_data, (2, 0, 1)) # Channel first
# Expand the dimensions of the image data to match the expected input shape
image_data = np.expand_dims(image_data, axis=0).astype(np.float32)
# Return the preprocessed image data
return image_data
def postprocess(self, input_image, output):
"""
Performs post-processing on the model's output to extract bounding boxes, scores, and class IDs.
Args:
input_image (numpy.ndarray): The input image.
output (numpy.ndarray): The output of the model.
Returns:
numpy.ndarray: The input image with detections drawn on it.
"""
# Transpose and squeeze the output to match the expected shape
outputs = np.transpose(np.squeeze(output[0]))
# Get the number of rows in the outputs array
rows = outputs.shape[0]
# Lists to store the bounding boxes, scores, and class IDs of the detections
boxes = []
scores = []
class_ids = []
# Calculate the scaling factors for the bounding box coordinates
x_factor = self.img_width / self.input_width
y_factor = self.img_height / self.input_height
# Iterate over each row in the outputs array
for i in range(rows):
# Extract the class scores from the current row
classes_scores = outputs[i][4:]
# Find the maximum score among the class scores
max_score = np.amax(classes_scores)
# If the maximum score is above the confidence threshold
if max_score >= self.confidence_thres:
# Get the class ID with the highest score
class_id = np.argmax(classes_scores)
# Extract the bounding box coordinates from the current row
x, y, w, h = outputs[i][0], outputs[i][1], outputs[i][2], outputs[i][3]
# Calculate the scaled coordinates of the bounding box
left = int((x - w / 2) * x_factor)
top = int((y - h / 2) * y_factor)
width = int(w * x_factor)
height = int(h * y_factor)
# Add the class ID, score, and box coordinates to the respective lists
class_ids.append(class_id)
scores.append(max_score)
boxes.append([left, top, width, height])
# Apply non-maximum suppression to filter out overlapping bounding boxes
indices = cv2.dnn.NMSBoxes(boxes, scores, self.confidence_thres, self.iou_thres)
# Iterate over the selected indices after non-maximum suppression
for i in indices:
# Get the box, score, and class ID corresponding to the index
box = boxes[i]
score = scores[i]
class_id = class_ids[i]
# Draw the detection on the input image
self.draw_detections(input_image, box, score, class_id)
# Return the modified input image
return input_image
def main(self):
"""
Performs inference using an ONNX model and returns the output image with drawn detections.
Returns:
output_img: The output image with drawn detections.
"""
# Create an inference session using the ONNX model and specify execution providers
session = ort.InferenceSession(self.onnx_model, providers=["CUDAExecutionProvider", "CPUExecutionProvider"])
# Get the model inputs
model_inputs = session.get_inputs()
# Store the shape of the input for later use
input_shape = model_inputs[0].shape
self.input_width = input_shape[2]
self.input_height = input_shape[3]
# Preprocess the image data
img_data = self.preprocess()
# Run inference using the preprocessed image data
outputs = session.run(None, {model_inputs[0].name: img_data})
# Perform post-processing on the outputs to obtain output image.
return self.postprocess(self.img, outputs) # output image
if __name__ == "__main__":
# Create an argument parser to handle command-line arguments
parser = argparse.ArgumentParser()
parser.add_argument("--model", type=str, default="yolov8n.onnx", help="Input your ONNX model.")
parser.add_argument("--img", type=str, default=str(ASSETS / "bus.jpg"), help="Path to input image.")
parser.add_argument("--conf-thres", type=float, default=0.5, help="Confidence threshold")
parser.add_argument("--iou-thres", type=float, default=0.5, help="NMS IoU threshold")
args = parser.parse_args()
# Check the requirements and select the appropriate backend (CPU or GPU)
check_requirements("onnxruntime-gpu" if torch.cuda.is_available() else "onnxruntime")
# Create an instance of the YOLOv8 class with the specified arguments
detection = YOLOv8(args.model, args.img, args.conf_thres, args.iou_thres)
# Perform object detection and obtain the output image
output_image = detection.main()
# Display the output image in a window
cv2.namedWindow("Output", cv2.WINDOW_NORMAL)
cv2.imshow("Output", output_image)
# Wait for a key press to exit
cv2.waitKey(0)
# YOLOv8 - OpenCV
Implementation YOLOv8 on OpenCV using ONNX Format.
Just simply clone and run
```bash
pip install -r requirements.txt
python main.py --model yolov8n.onnx --img image.jpg
```
If you start from scratch:
```bash
pip install ultralytics
yolo export model=yolov8n.pt imgsz=640 format=onnx opset=12
```
_\*Make sure to include "opset=12"_
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
import argparse
import cv2.dnn
import numpy as np
from ultralytics.utils import ASSETS, yaml_load
from ultralytics.utils.checks import check_yaml
CLASSES = yaml_load(check_yaml("coco8.yaml"))["names"]
colors = np.random.uniform(0, 255, size=(len(CLASSES), 3))
def draw_bounding_box(img, class_id, confidence, x, y, x_plus_w, y_plus_h):
"""
Draws bounding boxes on the input image based on the provided arguments.
Args:
img (numpy.ndarray): The input image to draw the bounding box on.
class_id (int): Class ID of the detected object.
confidence (float): Confidence score of the detected object.
x (int): X-coordinate of the top-left corner of the bounding box.
y (int): Y-coordinate of the top-left corner of the bounding box.
x_plus_w (int): X-coordinate of the bottom-right corner of the bounding box.
y_plus_h (int): Y-coordinate of the bottom-right corner of the bounding box.
"""
label = f"{CLASSES[class_id]} ({confidence:.2f})"
color = colors[class_id]
cv2.rectangle(img, (x, y), (x_plus_w, y_plus_h), color, 2)
cv2.putText(img, label, (x - 10, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
def main(onnx_model, input_image):
"""
Main function to load ONNX model, perform inference, draw bounding boxes, and display the output image.
Args:
onnx_model (str): Path to the ONNX model.
input_image (str): Path to the input image.
Returns:
list: List of dictionaries containing detection information such as class_id, class_name, confidence, etc.
"""
# Load the ONNX model
model: cv2.dnn.Net = cv2.dnn.readNetFromONNX(onnx_model)
# Read the input image
original_image: np.ndarray = cv2.imread(input_image)
[height, width, _] = original_image.shape
# Prepare a square image for inference
length = max((height, width))
image = np.zeros((length, length, 3), np.uint8)
image[0:height, 0:width] = original_image
# Calculate scale factor
scale = length / 640
# Preprocess the image and prepare blob for model
blob = cv2.dnn.blobFromImage(image, scalefactor=1 / 255, size=(640, 640), swapRB=True)
model.setInput(blob)
# Perform inference
outputs = model.forward()
# Prepare output array
outputs = np.array([cv2.transpose(outputs[0])])
rows = outputs.shape[1]
boxes = []
scores = []
class_ids = []
# Iterate through output to collect bounding boxes, confidence scores, and class IDs
for i in range(rows):
classes_scores = outputs[0][i][4:]
(minScore, maxScore, minClassLoc, (x, maxClassIndex)) = cv2.minMaxLoc(classes_scores)
if maxScore >= 0.25:
box = [
outputs[0][i][0] - (0.5 * outputs[0][i][2]),
outputs[0][i][1] - (0.5 * outputs[0][i][3]),
outputs[0][i][2],
outputs[0][i][3],
]
boxes.append(box)
scores.append(maxScore)
class_ids.append(maxClassIndex)
# Apply NMS (Non-maximum suppression)
result_boxes = cv2.dnn.NMSBoxes(boxes, scores, 0.25, 0.45, 0.5)
detections = []
# Iterate through NMS results to draw bounding boxes and labels
for i in range(len(result_boxes)):
index = result_boxes[i]
box = boxes[index]
detection = {
"class_id": class_ids[index],
"class_name": CLASSES[class_ids[index]],
"confidence": scores[index],
"box": box,
"scale": scale,
}
detections.append(detection)
draw_bounding_box(
original_image,
class_ids[index],
scores[index],
round(box[0] * scale),
round(box[1] * scale),
round((box[0] + box[2]) * scale),
round((box[1] + box[3]) * scale),
)
# Display the image with bounding boxes
cv2.imshow("image", original_image)
cv2.waitKey(0)
cv2.destroyAllWindows()
return detections
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--model", default="yolov8n.onnx", help="Input your ONNX model.")
parser.add_argument("--img", default=str(ASSETS / "bus.jpg"), help="Path to input image.")
args = parser.parse_args()
main(args.model, args.img)
cmake_minimum_required(VERSION 3.12)
project(yolov8_openvino_example)
set(CMAKE_CXX_STANDARD 14)
find_package(OpenCV REQUIRED)
include_directories(
${OpenCV_INCLUDE_DIRS}
/path/to/intel/openvino/runtime/include
)
add_executable(detect
main.cc
inference.cc
)
target_link_libraries(detect
${OpenCV_LIBS}
/path/to/intel/openvino/runtime/lib/intel64/libopenvino.so
)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment