DetectorYOLOV7.cpp 5.98 KB
Newer Older
lijian6's avatar
lijian6 committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
#include "DetectorYOLOV7.h"

using namespace std;
using namespace cv::dnn;

DetectorYOLOV7::~DetectorYOLOV7() {
    delete session;
    inputNamesPtr.clear();
    outputNamesPtr.clear();
}

void DetectorYOLOV7::setGpuIndex(int gpuIndex)
{
    if (gpuIndex >= 0)
    {
        OrtROCMProviderOptions rocm_options;
        rocm_options.device_id = gpuIndex;
        rocm_options.arena_extend_strategy = 0;
        rocm_options.miopen_conv_exhaustive_search = 0;
        rocm_options.do_copy_in_default_stream = 1;

        sessionOptions.AppendExecutionProvider_ROCM(rocm_options);
    }
    else
    {
        printf("det use CPU\n");
    }
}

void DetectorYOLOV7::setNumThread(int numOfThread)
{
    numThread = numOfThread;
    sessionOptions.SetInterOpNumThreads(numThread);
    sessionOptions.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_BASIC);
}

void DetectorYOLOV7::initModel(const std::string &pathStr, const std::string &pathOfClassNameFile)
{
    fprintf(stdout, "Start init model for %s\n", pathStr.c_str());
    session = new Ort::Session(env, pathStr.c_str(), sessionOptions);
    inputDim = getInputDim(session);
    inputNamesPtr = getInputNames(session);
    outputNamesPtr = getOutputNames(session);
    std::ifstream classNameFile(pathOfClassNameFile);
    string line;
    while (getline(classNameFile, line))
    {
        classNames.push_back(line);
    }
}

void DetectorYOLOV7::Detect(cv::Mat srcImage)
{
    std::vector<ResultOfDetection> resultsOfDetection;
    cv::Size inputSize = cv::Size(inputDim[3], inputDim[2]);

    cv::Mat srcResize;
    resize(srcImage, srcResize, inputSize, 0, 0, cv::INTER_LINEAR);
    std::vector<float> inputTensorValues = substractMeanNormalize(srcResize, meanValues, normValues);
    std::array<int64_t, 4> inputShape{1, srcResize.channels(), srcResize.rows, srcResize.cols};
    auto memoryInfo = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
    Ort::Value inputTensor = Ort::Value::CreateTensor<float>(memoryInfo, inputTensorValues.data(), inputTensorValues.size(), inputShape.data(), inputShape.size());

    std::vector<const char *> inputNames = {inputNamesPtr.data()->get()};
    std::vector<const char *> outputNames = {outputNamesPtr.data()->get()};

    // 推理
    auto outputTensor = session->Run(Ort::RunOptions{nullptr}, inputNames.data(), &inputTensor, inputNames.size(), outputNames.data(), outputNames.size());

    // 获取推理结果
    std::vector<cv::Mat> outs;
    std::vector<int64_t> outputShape = outputTensor[0].GetTensorTypeAndShapeInfo().GetShape();
    float *floatArray = outputTensor.front().GetTensorMutableData<float>();
    //int64_t outputCount = std::accumulate(outputShape.begin(), outputShape.end(), 1, std::multiplies<int64_t>());
    //std::vector<float> outputData(floatArray, floatArray + outputCount);

    // 转换为cv::Mat
    int shape[]={static_cast<int>(outputShape[0]), static_cast<int>(outputShape[1]), static_cast<int>(outputShape[2])};
    cv::Mat out(3, shape, CV_32F, floatArray);
    //memcpy(out.data, floatArray, sizeof(float)*outputCount);
    outs.push_back(out);

    //获取先验框的个数
    int numProposal = outs[0].size[1];
    int numOut = outs[0].size[2];
    //变换输出的维度
    outs[0] = outs[0].reshape(0, numProposal);
    
    //生成先验框
    std::vector<float> confidences;
    std::vector<cv::Rect> boxes;
    std::vector<int> classIds;
    float ratioh = (float)srcImage.rows / inputSize.height, ratiow = (float)srcImage.cols / inputSize.width;

    //计算cx,cy,w,h,box_sore,class_sore
    int n = 0, rowInd = 0;
    float* pdata = (float*)outs[0].data;
    for (n = 0; n < numProposal; n++)
    {
        float boxScores = pdata[4];
        if (boxScores > 0.5)
        {
            cv::Mat scores = outs[0].row(rowInd).colRange(5, numOut);
            cv::Point classIdPoint;
            double maxClassScore;
            cv::minMaxLoc(scores, 0, &maxClassScore, 0, &classIdPoint);
            maxClassScore *= boxScores;
            if (maxClassScore > 0.25)
            {
                const int classIdx = classIdPoint.x;
                float cx = pdata[0] * ratiow;
                float cy = pdata[1] * ratioh;
                float w = pdata[2] * ratiow;
                float h = pdata[3] * ratioh;

                int left = int(cx - 0.5 * w);
                int top = int(cy - 0.5 * h);

                confidences.push_back((float)maxClassScore);
                boxes.push_back(cv::Rect(left, top, (int)(w), (int)(h)));
                classIds.push_back(classIdx);
            }
        }
        rowInd++;
        pdata += numOut;
    }

    //执行non maximum suppression消除冗余重叠boxes
    std::vector<int> indices;
    dnn::NMSBoxes(boxes, confidences, 0.25, 0.5, indices);
    for (size_t i = 0; i < indices.size(); ++i)
    {
        int idx = indices[i];
        int classID=classIds[idx];
        string className=classNames[classID];
        float confidence=confidences[idx];
        cv::Rect box = boxes[idx];

        ResultOfDetection result;
        result.boundingBox=box;
        result.confidence=confidence;// confidence
        result.classID=classID; // label
        result.className=className;
        resultsOfDetection.push_back(result);
    }

    fprintf(stdout,"//////////////Detection Results//////////////\n");
    for( size_t i = 0; i < resultsOfDetection.size(); ++i)
    {
        ResultOfDetection result = resultsOfDetection[i];
        cv::rectangle(srcImage, result.boundingBox, Scalar(0,255,255),2);
        cv::putText(srcImage, result.className, cv::Point(result.boundingBox.x, result.boundingBox.y-20), cv::FONT_HERSHEY_PLAIN, 2.0, Scalar(0, 0, 255), 2);

        fprintf(stdout,"box:%d %d %d %d,label:%d,confidence:%.3f\n",result.boundingBox.x,
        result.boundingBox.y,result.boundingBox.width,result.boundingBox.height,result.classID,result.confidence);
    }
    // X11 display can`t support in docker.
    /* namedWindow("video", WINDOW_NORMAL | WINDOW_KEEPRATIO);
    imshow("video", srcImage);
    if (waitKey(10) == 'q') {
        exit(0);
    } */
}