#include "DetectorYOLOV3.h" using namespace std; using namespace cv::dnn; DetectorYOLOV3::~DetectorYOLOV3() { delete session; inputNamesPtr.clear(); outputNamesPtr.clear(); } void DetectorYOLOV3::setGpuIndex(int gpuIndex) { if (gpuIndex >= 0) { OrtROCMProviderOptions rocm_options; rocm_options.device_id = gpuIndex; rocm_options.arena_extend_strategy = 0; rocm_options.miopen_conv_exhaustive_search = 0; rocm_options.do_copy_in_default_stream = 1; sessionOptions.AppendExecutionProvider_ROCM(rocm_options); } else { printf("det use CPU\n"); } } void DetectorYOLOV3::setNumThread(int numOfThread) { numThread = numOfThread; sessionOptions.SetInterOpNumThreads(numThread); sessionOptions.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_BASIC); } void DetectorYOLOV3::initModel(const std::string &pathStr, const std::string &pathOfClassNameFile) { fprintf(stdout, "Start init model for %s\n", pathStr.c_str()); session = new Ort::Session(env, pathStr.c_str(), sessionOptions); inputNamesPtr = getInputNames(session); inputDim = getInputDim(session); outputNamesPtr = getOutputNames(session); std::ifstream classNameFile(pathOfClassNameFile); string line; while (getline(classNameFile, line)) { classNames.push_back(line); } } void DetectorYOLOV3::Detect(cv::Mat srcImage) { std::vector resultsOfDetection; cv::Size inputSize = cv::Size(inputDim[3], inputDim[2]); cv::Mat srcResize; resize(srcImage, srcResize, inputSize, 0, 0, cv::INTER_LINEAR); std::vector inputTensorValues = substractMeanNormalize(srcResize, meanValues, normValues); std::array inputShape{1, srcResize.channels(), srcResize.rows, srcResize.cols}; auto memoryInfo = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU); Ort::Value inputTensor = Ort::Value::CreateTensor(memoryInfo, inputTensorValues.data(), inputTensorValues.size(), inputShape.data(), inputShape.size()); std::vector inputNames = {inputNamesPtr.data()->get()}; std::vector outputNames = {outputNamesPtr.data()->get()}; // 推理 auto outputTensor = session->Run(Ort::RunOptions{nullptr}, inputNames.data(), &inputTensor, inputNames.size(), outputNames.data(), outputNames.size()); // 获取推理结果 std::vector outs; std::vector outputShape = outputTensor[0].GetTensorTypeAndShapeInfo().GetShape(); float *floatArray = outputTensor.front().GetTensorMutableData(); //int64_t outputCount = std::accumulate(outputShape.begin(), outputShape.end(), 1, std::multiplies()); //std::vector outputData(floatArray, floatArray + outputCount); // 转换为cv::Mat int shape[]={static_cast(outputShape[0]), static_cast(outputShape[1]), static_cast(outputShape[2])}; cv::Mat out(3, shape, CV_32F, floatArray); //memcpy(out.data, result.data(), sizeof(float)*outputCount); outs.push_back(out); //获取先验框的个数 int numProposal = outs[0].size[1]; int numOut = outs[0].size[2]; //变换输出的维度 outs[0] = outs[0].reshape(0, numProposal); //生成先验框 std::vector confidences; std::vector boxes; std::vector classIds; float ratioh = (float)srcImage.rows / inputSize.height, ratiow = (float)srcImage.cols / inputSize.width; //计算cx,cy,w,h,box_sore,class_sore int n = 0, rowInd = 0; float* pdata = (float*)outs[0].data; for (n = 0; n < numProposal; n++) { float boxScores = pdata[4]; if (boxScores > 0.5) { cv::Mat scores = outs[0].row(rowInd).colRange(5, numOut); cv::Point classIdPoint; double maxClassScore; cv::minMaxLoc(scores, 0, &maxClassScore, 0, &classIdPoint); maxClassScore *= boxScores; if (maxClassScore > 0.25) { const int classIdx = classIdPoint.x; float cx = pdata[0] * ratiow; float cy = pdata[1] * ratioh; float w = pdata[2] * ratiow; float h = pdata[3] * ratioh; int left = int(cx - 0.5 * w); int top = int(cy - 0.5 * h); confidences.push_back((float)maxClassScore); boxes.push_back(cv::Rect(left, top, (int)(w), (int)(h))); classIds.push_back(classIdx); } } rowInd++; pdata += numOut; } //执行non maximum suppression消除冗余重叠boxes std::vector indices; dnn::NMSBoxes(boxes, confidences, 0.25, 0.5, indices); for (size_t i = 0; i < indices.size(); ++i) { int idx = indices[i]; int classID=classIds[idx]; string className=classNames[classID]; float confidence=confidences[idx]; cv::Rect box = boxes[idx]; ResultOfDetection result; result.boundingBox=box; result.confidence=confidence; result.classID=classID; result.className=className; resultsOfDetection.push_back(result); } fprintf(stdout,"//////////////Detection Results//////////////\n"); for( size_t i = 0; i < resultsOfDetection.size(); ++i) { ResultOfDetection result = resultsOfDetection[i]; cv::rectangle(srcImage, result.boundingBox, Scalar(0,255,255),2); cv::putText(srcImage, result.className, cv::Point(result.boundingBox.x, result.boundingBox.y-20), cv::FONT_HERSHEY_PLAIN, 2.0, Scalar(0, 0, 255), 2); fprintf(stdout,"box:%d %d %d %d,label:%d,confidence:%.3f\n",result.boundingBox.x, result.boundingBox.y,result.boundingBox.width,result.boundingBox.height,result.classID,result.confidence); } // X11 display can`t support in docker. /* namedWindow("video", WINDOW_NORMAL | WINDOW_KEEPRATIO); imshow("video", srcImage); if (waitKey(5) == 'q') { return; } */ }