"src/vscode:/vscode.git/clone" did not exist on "bd0bd7ef2cdd81ad60022607af723b257ee4f7a4"
Commit 0b55404d authored by lijian6's avatar lijian6
Browse files

first init

parent e012f86c
#include "yolov8Predictor.h"
YOLOPredictor::YOLOPredictor(const std::string &modelPath,
const bool &isGPU,
float confThreshold,
float iouThreshold,
float maskThreshold)
{
this->confThreshold = confThreshold;
this->iouThreshold = iouThreshold;
this->maskThreshold = maskThreshold;
env = Ort::Env(OrtLoggingLevel::ORT_LOGGING_LEVEL_WARNING, "YOLOV8");
sessionOptions = Ort::SessionOptions();
// std::vector<std::string> availableProviders = Ort::GetAvailableProviders();
// auto cudaAvailable = std::find(availableProviders.begin(), availableProviders.end(), "CUDAExecutionProvider");
// OrtCUDAProviderOptions cudaOption;
OrtROCMProviderOptions rocmOption;
rocmOption.device_id = 0;
std::cout << "Inference device: DCU" << std::endl;
// sessionOptions.AppendExecutionProvider_CUDA(cudaOption);
sessionOptions.AppendExecutionProvider_ROCM(rocmOption);
session = Ort::Session(env, modelPath.c_str(), sessionOptions);
const size_t num_input_nodes = session.GetInputCount(); //==1
const size_t num_output_nodes = session.GetOutputCount(); //==1,2
if (num_output_nodes > 1)
{
this->hasMask = true;
std::cout << "Instance Segmentation" << std::endl;
}
else
std::cout << "Object Detection" << std::endl;
Ort::AllocatorWithDefaultOptions allocator;
for (int i = 0; i < num_input_nodes; i++)
{
auto input_name = session.GetInputNameAllocated(i, allocator);
this->inputNames.push_back(input_name.get());
input_names_ptr.push_back(std::move(input_name));
Ort::TypeInfo inputTypeInfo = session.GetInputTypeInfo(i);
std::vector<int64_t> inputTensorShape = inputTypeInfo.GetTensorTypeAndShapeInfo().GetShape();
this->inputShapes.push_back(inputTensorShape);
this->isDynamicInputShape = false;
// checking if width and height are dynamic
if (inputTensorShape[2] == -1 && inputTensorShape[3] == -1)
{
std::cout << "Dynamic input shape" << std::endl;
this->isDynamicInputShape = true;
}
}
for (int i = 0; i < num_output_nodes; i++)
{
auto output_name = session.GetOutputNameAllocated(i, allocator);
this->outputNames.push_back(output_name.get());
output_names_ptr.push_back(std::move(output_name));
Ort::TypeInfo outputTypeInfo = session.GetOutputTypeInfo(i);
std::vector<int64_t> outputTensorShape = outputTypeInfo.GetTensorTypeAndShapeInfo().GetShape();
this->outputShapes.push_back(outputTensorShape);
if (i == 0)
{
if (!this->hasMask)
classNums = outputTensorShape[1] - 4;
else
classNums = outputTensorShape[1] - 4 - 32;
}
}
}
void YOLOPredictor::getBestClassInfo(std::vector<float>::iterator it,
float &bestConf,
int &bestClassId,
const int _classNums)
{
// first 4 element are box
bestClassId = 4;
bestConf = 0;
for (int i = 4; i < _classNums + 4; i++)
{
if (it[i] > bestConf)
{
bestConf = it[i];
bestClassId = i - 4;
}
}
}
cv::Mat YOLOPredictor::getMask(const cv::Mat &maskProposals,
const cv::Mat &maskProtos)
{
cv::Mat protos = maskProtos.reshape(0, {(int)this->outputShapes[1][1], (int)this->outputShapes[1][2] * (int)this->outputShapes[1][3]});
cv::Mat matmul_res = (maskProposals * protos).t();
cv::Mat masks = matmul_res.reshape(1, {(int)this->outputShapes[1][2], (int)this->outputShapes[1][3]});
cv::Mat dest;
// sigmoid
cv::exp(-masks, dest);
dest = 1.0 / (1.0 + dest);
cv::resize(dest, dest, cv::Size((int)this->inputShapes[0][2], (int)this->inputShapes[0][3]), cv::INTER_LINEAR);
return dest;
}
void YOLOPredictor::preprocessing(std::vector<cv::Mat> &images, float *&blob, std::vector<int64_t> &inputTensorShape)
{
int batchSize = images.size();
if(batchSize == 0) return;
int targetWidth = (int)this->inputShapes[0][2];
int targetHeight = (int)this->inputShapes[0][3];
size_t totalElements = batchSize * 3 * targetWidth * targetHeight;
blob = new float[totalElements];
inputTensorShape = {batchSize, 3, targetHeight, targetWidth};
for (int b = 0; b < batchSize; ++b)
{
cv::Mat resizedImage, floatImage;
cv::cvtColor(images[b], resizedImage, cv::COLOR_BGR2RGB);
utils::letterbox(resizedImage, resizedImage, cv::Size((int)this->inputShapes[0][2], (int)this->inputShapes[0][3]),
cv::Scalar(114, 114, 114), this->isDynamicInputShape,
false, true, 32);
resizedImage.convertTo(floatImage, CV_32FC3, 1 / 255.0);
cv::Size floatImageSize{floatImage.cols, floatImage.rows};
// hwc -> chw
std::vector<cv::Mat> chw(floatImage.channels());
for (int i = 0; i < floatImage.channels(); ++i)
{
chw[i] = cv::Mat(floatImageSize, CV_32FC1, blob + b * 3 * floatImageSize.width * floatImageSize.height + i * floatImageSize.width * floatImageSize.height);
}
cv::split(floatImage, chw);
}
}
std::vector<std::vector<Yolov8Result>> YOLOPredictor::postprocessing(const cv::Size &resizedImageShape,
const std::vector<cv::Size> &originalImageShapes,
std::vector<Ort::Value> &outputTensors)
{
int batchsize = originalImageShapes.size();
if(batchsize == 0) return {};
// for box
std::vector<std::vector<cv::Rect>> boxes(batchsize);
std::vector<std::vector<float>> confs(batchsize);
std::vector<std::vector<int>> classIds(batchsize);
float *boxOutput = outputTensors[0].GetTensorMutableData<float>();
int rows = (int)this->outputShapes[0][2];
int cols = (int)this->outputShapes[0][1];
// if hasMask
std::vector<std::vector<std::vector<float>>> picked_proposals(batchsize);
std::vector<cv::Mat> mask_protos(batchsize);
//[1,4+n,8400]=>[1,8400,4+n] or [1,4+n+32,8400]=>[1,8400,4+n+32]
cv::Mat output0 = cv::Mat(cv::Size((int)this->outputShapes[0][2], (int)this->outputShapes[0][1]), CV_32F, boxOutput).t();
float *output0ptr = (float *)output0.data;
// std::cout << rows << cols << std::endl;
for (int b = 0; b < batchsize; ++b)
{
//[1,4+n,8400]=>[1,8400,4+n] or [1,4+n+32,8400]=>[1,8400,4+n+32]
cv::Mat output0 = cv::Mat(cv::Size((int)this->outputShapes[0][2], (int)this->outputShapes[0][1]), CV_32F, boxOutput + b * rows * cols).t();
float *output0ptr = (float *)output0.data;
for (int i = 0; i < rows; i++)
{
std::vector<float> it(output0ptr + i * cols, output0ptr + (i + 1) * cols);
float confidence;
int classId;
this->getBestClassInfo(it.begin(), confidence, classId, classNums);
if (confidence > this->confThreshold)
{
if (this->hasMask)
{
std::vector<float> temp(it.begin() + 4 + classNums, it.end());
picked_proposals[b].push_back(temp);
}
int centerX = (int)(it[0]);
int centerY = (int)(it[1]);
int width = (int)(it[2]);
int height = (int)(it[3]);
int left = centerX - width / 2;
int top = centerY - height / 2;
boxes[b].emplace_back(left, top, width, height);
confs[b].emplace_back(confidence);
classIds[b].emplace_back(classId);
}
}
if (this->hasMask)
{
float* maskOutput = outputTensors[1].GetTensorMutableData<float>() + b * this->outputShapes[1][1] * this->outputShapes[1][2] * this->outputShapes[1][3];
std::vector<int> mask_protos_shape = {1, (int)this->outputShapes[1][1], (int)this->outputShapes[1][2], (int)this->outputShapes[1][3]};
mask_protos[b] = cv::Mat(mask_protos_shape, CV_32F, maskOutput);
}
}
std::vector<std::vector<Yolov8Result>> results(batchsize);
for (int b = 0; b < batchsize; ++b) {
std::vector<int> indices;
cv::dnn::NMSBoxes(boxes[b], confs[b], this->confThreshold, this->iouThreshold, indices);
for (int idx : indices) {
Yolov8Result res;
res.box = cv::Rect(boxes[b][idx]);
if (this->hasMask)
res.boxMask = this->getMask(cv::Mat(picked_proposals[b][idx]).t(), mask_protos[b]);
else
res.boxMask = cv::Mat::zeros((int)this->inputShapes[0][2], (int)this->inputShapes[0][3], CV_8U);
utils::scaleCoords(res.box, res.boxMask, this->maskThreshold, resizedImageShape, originalImageShapes[b]);
res.conf = confs[b][idx];
res.classId = classIds[b][idx];
results[b].emplace_back(res);
}
}
return results;
}
std::vector<std::vector<Yolov8Result>> YOLOPredictor::predict(std::vector<cv::Mat>& images) {
if (images.empty()) return {};
float* blob = nullptr;
std::vector<int64_t> inputTensorShape{1, 3, -1, -1};
this->preprocessing(images, blob, inputTensorShape);
size_t inputTensorSize = utils::vectorProduct(inputTensorShape);
std::vector<float> inputTensorValues(blob, blob + inputTensorSize);
std::vector<Ort::Value> inputTensors;
Ort::MemoryInfo memoryInfo = Ort::MemoryInfo::CreateCpu(OrtAllocatorType::OrtArenaAllocator, OrtMemType::OrtMemTypeDefault);
inputTensors.push_back(Ort::Value::CreateTensor<float>(
memoryInfo, inputTensorValues.data(), inputTensorSize,
inputTensorShape.data(), inputTensorShape.size()));
std::vector<Ort::Value> outputTensors = this->session.Run(Ort::RunOptions{nullptr},
this->inputNames.data(),
inputTensors.data(),
1,
this->outputNames.data(),
this->outputNames.size());
cv::Size resizedShapes(cv::Size((int)inputTensorShape[3], (int)inputTensorShape[2]));
std::vector<cv::Size> originalShapes;
for (const auto& image : images) {
originalShapes.push_back(image.size());
}
std::vector<std::vector<Yolov8Result>> results = this->postprocessing(resizedShapes,
originalShapes,
outputTensors);
delete[] blob;
return results;
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment