DetectorYOLOV7.cpp

#include "DetectorYOLOV7.h"

using namespace std;
using namespace cv::dnn;

DetectorYOLOV7::~DetectorYOLOV7() {
    delete session;
    inputNamesPtr.clear();
    outputNamesPtr.clear();
}

void DetectorYOLOV7::setGpuIndex(int gpuIndex)
{
    if (gpuIndex >= 0)
    {
        OrtROCMProviderOptions rocm_options;
        rocm_options.device_id = gpuIndex;
        rocm_options.arena_extend_strategy = 0;
        rocm_options.miopen_conv_exhaustive_search = 0;
        rocm_options.do_copy_in_default_stream = 1;

        sessionOptions.AppendExecutionProvider_ROCM(rocm_options);
    }
    else
    {
        printf("det use CPU\n");
    }
}

void DetectorYOLOV7::setNumThread(int numOfThread)
{
    numThread = numOfThread;
    sessionOptions.SetInterOpNumThreads(numThread);
    sessionOptions.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_BASIC);
}

void DetectorYOLOV7::initModel(const std::string &pathStr, const std::string &pathOfClassNameFile)
{
    fprintf(stdout, "Start init model for %s\n", pathStr.c_str());
    session = new Ort::Session(env, pathStr.c_str(), sessionOptions);
    inputDim = getInputDim(session);
    inputNamesPtr = getInputNames(session);
    outputNamesPtr = getOutputNames(session);
    std::ifstream classNameFile(pathOfClassNameFile);
    string line;
    while (getline(classNameFile, line))
    {
        classNames.push_back(line);
    }
}

void DetectorYOLOV7::Detect(cv::Mat srcImage)
{
    std::vector<ResultOfDetection> resultsOfDetection;
    cv::Size inputSize = cv::Size(inputDim[3], inputDim[2]);

    cv::Mat srcResize;
    resize(srcImage, srcResize, inputSize, 0, 0, cv::INTER_LINEAR);
    std::vector<float> inputTensorValues = substractMeanNormalize(srcResize, meanValues, normValues);
    std::array<int64_t, 4> inputShape{1, srcResize.channels(), srcResize.rows, srcResize.cols};
    auto memoryInfo = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
    Ort::Value inputTensor = Ort::Value::CreateTensor<float>(memoryInfo, inputTensorValues.data(), inputTensorValues.size(), inputShape.data(), inputShape.size());

    std::vector<const char *> inputNames = {inputNamesPtr.data()->get()};
    std::vector<const char *> outputNames = {outputNamesPtr.data()->get()};

    // 推理
    auto outputTensor = session->Run(Ort::RunOptions{nullptr}, inputNames.data(), &inputTensor, inputNames.size(), outputNames.data(), outputNames.size());

    // 获取推理结果
    std::vector<cv::Mat> outs;
    std::vector<int64_t> outputShape = outputTensor[0].GetTensorTypeAndShapeInfo().GetShape();
    float *floatArray = outputTensor.front().GetTensorMutableData<float>();
    //int64_t outputCount = std::accumulate(outputShape.begin(), outputShape.end(), 1, std::multiplies<int64_t>());
    //std::vector<float> outputData(floatArray, floatArray + outputCount);

    // 转换为cv::Mat
    int shape[]={static_cast<int>(outputShape[0]), static_cast<int>(outputShape[1]), static_cast<int>(outputShape[2])};
    cv::Mat out(3, shape, CV_32F, floatArray);
    //memcpy(out.data, floatArray, sizeof(float)*outputCount);
    outs.push_back(out);

    //获取先验框的个数
    int numProposal = outs[0].size[1];
    int numOut = outs[0].size[2];
    //变换输出的维度
    outs[0] = outs[0].reshape(0, numProposal);
    
    //生成先验框
    std::vector<float> confidences;
    std::vector<cv::Rect> boxes;
    std::vector<int> classIds;
    float ratioh = (float)srcImage.rows / inputSize.height, ratiow = (float)srcImage.cols / inputSize.width;

    //计算cx,cy,w,h,box_sore,class_sore
    int n = 0, rowInd = 0;
    float* pdata = (float*)outs[0].data;
    for (n = 0; n < numProposal; n++)
    {
        float boxScores = pdata[4];
        if (boxScores > 0.5)
        {
            cv::Mat scores = outs[0].row(rowInd).colRange(5, numOut);
            cv::Point classIdPoint;
            double maxClassScore;
            cv::minMaxLoc(scores, 0, &maxClassScore, 0, &classIdPoint);
            maxClassScore *= boxScores;
            if (maxClassScore > 0.25)
            {
                const int classIdx = classIdPoint.x;
                float cx = pdata[0] * ratiow;
                float cy = pdata[1] * ratioh;
                float w = pdata[2] * ratiow;
                float h = pdata[3] * ratioh;

                int left = int(cx - 0.5 * w);
                int top = int(cy - 0.5 * h);

                confidences.push_back((float)maxClassScore);
                boxes.push_back(cv::Rect(left, top, (int)(w), (int)(h)));
                classIds.push_back(classIdx);
            }
        }
        rowInd++;
        pdata += numOut;
    }

    //执行non maximum suppression消除冗余重叠boxes
    std::vector<int> indices;
    dnn::NMSBoxes(boxes, confidences, 0.25, 0.5, indices);
    for (size_t i = 0; i < indices.size(); ++i)
    {
        int idx = indices[i];
        int classID=classIds[idx];
        string className=classNames[classID];
        float confidence=confidences[idx];
        cv::Rect box = boxes[idx];

        ResultOfDetection result;
        result.boundingBox=box;
        result.confidence=confidence;// confidence
        result.classID=classID; // label
        result.className=className;
        resultsOfDetection.push_back(result);
    }

    fprintf(stdout,"//////////////Detection Results//////////////\n");
    for( size_t i = 0; i < resultsOfDetection.size(); ++i)
    {
        ResultOfDetection result = resultsOfDetection[i];
        cv::rectangle(srcImage, result.boundingBox, Scalar(0,255,255),2);
        cv::putText(srcImage, result.className, cv::Point(result.boundingBox.x, result.boundingBox.y-20), cv::FONT_HERSHEY_PLAIN, 2.0, Scalar(0, 0, 255), 2);

        fprintf(stdout,"box:%d %d %d %d,label:%d,confidence:%.3f\n",result.boundingBox.x,
        result.boundingBox.y,result.boundingBox.width,result.boundingBox.height,result.classID,result.confidence);
    }
    // X11 display can`t support in docker.
    /* namedWindow("video", WINDOW_NORMAL | WINDOW_KEEPRATIO);
    imshow("video", srcImage);
    if (waitKey(10) == 'q') {
        exit(0);
    } */
}