First commit

Signed-off-by: lijian <lijian6@sugon.com>

First commit
Signed-off-by: lijian <lijian6@sugon.com>
17a25564 · lijian6 · 17a25564 · 17a25564 · 17a25564 · 17a25564
Commit 17a25564 authored Jun 09, 2023 by lijian6
9 changed files
--- a/model.properties
+++ b/model.properties
+# 模型名称
+modelName=Video_Ort
+# 模型描述
+modelDescription=Video Ort是用于视频类的推理，使用ffmpeg cpu解码，OnnxRunTime推理框架
+# 应用场景
+appScenario=推理,inference,OnnxRunTime,目标检测,视频解码,video
+# 框架类型
+frameType=OnnxRunTime
--- a/src/CommonUtils.cpp
+++ b/src/CommonUtils.cpp
+#include <opencv2/imgproc.hpp>
+#include <opencv2/imgcodecs.hpp>
+#include <numeric>
+#include "CommonUtils.h"
+
+std::vector<float> substractMeanNormalize(cv::Mat &src, const float *meanVals, const float *normVals) {
+    auto inputTensorSize = src.cols * src.rows * src.channels();
+    std::vector<float> inputTensorValues(inputTensorSize);
+    size_t numChannels = src.channels();
+    size_t imageSize = src.cols * src.rows;
+
+    for (size_t pid = 0; pid < imageSize; pid++) {
+        for (size_t ch = 0; ch < numChannels; ++ch) {
+            float data = (float) (src.data[pid * numChannels + ch] * normVals[ch] - meanVals[ch] * normVals[ch]);
+            inputTensorValues[ch * imageSize + pid] = data;
+        }
+    }
+    return inputTensorValues;
+}
+
+std::vector<Ort::AllocatedStringPtr> getInputNames(Ort::Session *session) {
+    Ort::AllocatorWithDefaultOptions allocator;
+    const size_t numInputNodes = session->GetInputCount();
+
+    std::vector<Ort::AllocatedStringPtr> inputNamesPtr;
+    inputNamesPtr.reserve(numInputNodes);
+    std::vector<int64_t> input_node_dims;
+
+    // iterate over all input nodes
+    for (size_t i = 0; i < numInputNodes; i++) {
+        auto inputName = session->GetInputNameAllocated(i, allocator);
+        inputNamesPtr.push_back(std::move(inputName));
+        /*printf("inputName[%zu] = %s\n", i, inputName.get());
+
+        // print input node types
+        auto typeInfo = session->GetInputTypeInfo(i);
+        auto tensorInfo = typeInfo.GetTensorTypeAndShapeInfo();
+
+        ONNXTensorElementDataType type = tensorInfo.GetElementType();
+        printf("inputType[%zu] = %u\n", i, type);
+
+        // print input shapes/dims
+        input_node_dims = tensorInfo.GetShape();
+        printf("Input num_dims = %zu\n", input_node_dims.size());
+        for (size_t j = 0; j < input_node_dims.size(); j++) {
+            printf("Input dim[%zu] = %llu\n",j, input_node_dims[j]);
+        }*/
+    }
+    return inputNamesPtr;
+}
+
+std::vector<int64_t> getInputDim(Ort::Session *session)
+{
+    std::vector<int64_t> input_node_dims;
+    auto typeInfo = session->GetInputTypeInfo(0);
+    auto tensorInfo = typeInfo.GetTensorTypeAndShapeInfo();
+    input_node_dims = tensorInfo.GetShape();
+
+    return input_node_dims;
+}
+
+std::vector<Ort::AllocatedStringPtr> getOutputNames(Ort::Session *session) {
+    Ort::AllocatorWithDefaultOptions allocator;
+    const size_t numOutputNodes = session->GetOutputCount();
+
+    std::vector<Ort::AllocatedStringPtr> outputNamesPtr;
+    outputNamesPtr.reserve(numOutputNodes);
+    std::vector<int64_t> output_node_dims;
+
+    for (size_t i = 0; i < numOutputNodes; i++) {
+        auto outputName = session->GetOutputNameAllocated(i, allocator);
+        outputNamesPtr.push_back(std::move(outputName));
+        /*printf("outputName[%zu] = %s\n", i, outputName.get());
+
+        // print input node types
+        auto type_info = session->GetOutputTypeInfo(i);
+        auto tensor_info = type_info.GetTensorTypeAndShapeInfo();
+
+        ONNXTensorElementDataType type = tensor_info.GetElementType();
+        printf("outputType[%zu] = %u\n", i, type);
+
+        // print input shapes/dims
+        output_node_dims = tensor_info.GetShape();
+        printf("output num_dims = %zu\n", output_node_dims.size());
+        for (size_t j = 0; j < output_node_dims.size(); j++) {
+            printf("output dim[%zu] = %llu\n",j, output_node_dims[j]);
+        }*/
+    }
+    return outputNamesPtr;
+}
+
+std::string getSrcImgFilePath(const char *path, const char *imgName) {
+    std::string filePath;
+    filePath.append(path).append(imgName);
+    return filePath;
+}
--- a/src/DetectorYOLOV3.cpp
+++ b/src/DetectorYOLOV3.cpp
+#include "DetectorYOLOV3.h"
+
+using namespace std;
+using namespace cv::dnn;
+
+DetectorYOLOV3::~DetectorYOLOV3()
+{
+    delete session;
+    inputNamesPtr.clear();
+    outputNamesPtr.clear();
+}
+
+void DetectorYOLOV3::setGpuIndex(int gpuIndex)
+{
+    if (gpuIndex >= 0) {
+        OrtROCMProviderOptions rocm_options;
+        rocm_options.device_id = gpuIndex;
+        rocm_options.arena_extend_strategy = 0;
+        rocm_options.miopen_conv_exhaustive_search = 0;
+        rocm_options.do_copy_in_default_stream = 1;
+
+        sessionOptions.AppendExecutionProvider_ROCM(rocm_options);
+    }
+    else {
+        printf("det use CPU\n");
+    }
+}
+
+void DetectorYOLOV3::setNumThread(int numOfThread)
+{
+    numThread = numOfThread;
+    sessionOptions.SetInterOpNumThreads(numThread);
+    sessionOptions.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_BASIC);
+}
+
+void DetectorYOLOV3::initModel(const std::string &pathStr, const std::string &pathOfClassNameFile)
+{
+    fprintf(stdout, "Start init model for %s\n", pathStr.c_str());
+    session = new Ort::Session(env, pathStr.c_str(), sessionOptions);
+    inputNamesPtr = getInputNames(session);
+    inputDim = getInputDim(session);
+    outputNamesPtr = getOutputNames(session);
+    std::ifstream classNameFile(pathOfClassNameFile);
+    string line;
+    while (getline(classNameFile, line))
+    {
+        classNames.push_back(line);
+    }
+}
+
+void DetectorYOLOV3::Detect(cv::Mat srcImage)
+{
+    std::vector<ResultOfDetection> resultsOfDetection;
+    cv::Size inputSize = cv::Size(inputDim[3], inputDim[2]);
+
+    cv::Mat srcResize;
+    resize(srcImage, srcResize, inputSize, 0, 0, cv::INTER_LINEAR);
+    std::vector<float> inputTensorValues = substractMeanNormalize(srcResize, meanValues, normValues);
+    std::array<int64_t, 4> inputShape{1, srcResize.channels(), srcResize.rows, srcResize.cols};
+    auto memoryInfo = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
+    Ort::Value inputTensor = Ort::Value::CreateTensor<float>(memoryInfo, inputTensorValues.data(), inputTensorValues.size(), inputShape.data(), inputShape.size());
+
+    std::vector<const char *> inputNames = {inputNamesPtr.data()->get()};
+    std::vector<const char *> outputNames = {outputNamesPtr.data()->get()};
+
+    // 推理
+    auto outputTensor = session->Run(Ort::RunOptions{nullptr}, inputNames.data(), &inputTensor, inputNames.size(), outputNames.data(), outputNames.size());
+
+    // 获取推理结果
+    std::vector<cv::Mat> outs;
+    std::vector<int64_t> outputShape = outputTensor[0].GetTensorTypeAndShapeInfo().GetShape();
+    float *floatArray = outputTensor.front().GetTensorMutableData<float>();
+    //int64_t outputCount = std::accumulate(outputShape.begin(), outputShape.end(), 1, std::multiplies<int64_t>());
+    //std::vector<float> outputData(floatArray, floatArray + outputCount);
+
+    // 转换为cv::Mat
+    int shape[]={static_cast<int>(outputShape[0]), static_cast<int>(outputShape[1]), static_cast<int>(outputShape[2])};
+    cv::Mat out(3, shape, CV_32F, floatArray);
+    //memcpy(out.data, result.data(), sizeof(float)*outputCount);
+    outs.push_back(out);
+
+    //获取先验框的个数
+    int numProposal = outs[0].size[1];
+    int numOut = outs[0].size[2];
+    //变换输出的维度
+    outs[0] = outs[0].reshape(0, numProposal);
+    
+    //生成先验框
+    std::vector<float> confidences;
+    std::vector<cv::Rect> boxes;
+    std::vector<int> classIds;
+    float ratioh = (float)srcImage.rows / inputSize.height, ratiow = (float)srcImage.cols / inputSize.width;
+
+    //计算cx,cy,w,h,box_sore,class_sore
+    int n = 0, rowInd = 0;
+    float* pdata = (float*)outs[0].data;
+    for (n = 0; n < numProposal; n++)
+    {
+        float boxScores = pdata[4];
+        if (boxScores > 0.5)
+        {
+            cv::Mat scores = outs[0].row(rowInd).colRange(5, numOut);
+            cv::Point classIdPoint;
+            double maxClassScore;
+            cv::minMaxLoc(scores, 0, &maxClassScore, 0, &classIdPoint);
+            maxClassScore *= boxScores;
+            if (maxClassScore > 0.25)
+            {
+                const int classIdx = classIdPoint.x;
+                float cx = pdata[0] * ratiow;
+                float cy = pdata[1] * ratioh;
+                float w = pdata[2] * ratiow;
+                float h = pdata[3] * ratioh;
+
+                int left = int(cx - 0.5 * w);
+                int top = int(cy - 0.5 * h);
+
+                confidences.push_back((float)maxClassScore);
+                boxes.push_back(cv::Rect(left, top, (int)(w), (int)(h)));
+                classIds.push_back(classIdx);
+            }
+        }
+        rowInd++;
+        pdata += numOut;
+    }
+
+    //执行non maximum suppression消除冗余重叠boxes
+    std::vector<int> indices;
+    dnn::NMSBoxes(boxes, confidences, 0.25, 0.5, indices);
+    for (size_t i = 0; i < indices.size(); ++i)
+    {
+        int idx = indices[i];
+        int classID=classIds[idx];
+        string className=classNames[classID];
+        float confidence=confidences[idx];
+        cv::Rect box = boxes[idx];
+
+        ResultOfDetection result;
+        result.boundingBox=box;
+        result.confidence=confidence;
+        result.classID=classID;
+        result.className=className;
+        resultsOfDetection.push_back(result);
+    }
+
+    fprintf(stdout,"//////////////Detection Results//////////////\n");
+    for( size_t i = 0; i < resultsOfDetection.size(); ++i)
+    {
+        ResultOfDetection result = resultsOfDetection[i];
+        cv::rectangle(srcImage, result.boundingBox, Scalar(0,255,255),2);
+        cv::putText(srcImage, result.className, cv::Point(result.boundingBox.x, result.boundingBox.y-20), cv::FONT_HERSHEY_PLAIN, 2.0, Scalar(0, 0, 255), 2);
+
+        fprintf(stdout,"box:%d %d %d %d,label:%d,confidence:%.3f\n",result.boundingBox.x,
+        result.boundingBox.y,result.boundingBox.width,result.boundingBox.height,result.classID,result.confidence);
+    }
+    // X11 display can`t support in docker.
+    /* namedWindow("video", WINDOW_NORMAL | WINDOW_KEEPRATIO);
+    imshow("video", srcImage);
+    if (waitKey(5) == 'q') {
+        return;
+    } */
+}
--- a/src/DetectorYOLOV5.cpp
+++ b/src/DetectorYOLOV5.cpp
+#include "DetectorYOLOV5.h"
+
+using namespace std;
+using namespace cv::dnn;
+
+DetectorYOLOV5::~DetectorYOLOV5()
+{
+    delete session;
+    inputNamesPtr.clear();
+    outputNamesPtr.clear();
+}
+
+void DetectorYOLOV5::setGpuIndex(int gpuIndex)
+{
+    if (gpuIndex >= 0) {
+        OrtROCMProviderOptions rocm_options;
+        rocm_options.device_id = gpuIndex;
+        rocm_options.arena_extend_strategy = 0;
+        rocm_options.miopen_conv_exhaustive_search = 0;
+        rocm_options.do_copy_in_default_stream = 1;
+
+        sessionOptions.AppendExecutionProvider_ROCM(rocm_options);
+    }
+    else {
+        printf("det use CPU\n");
+    }
+}
+
+void DetectorYOLOV5::setNumThread(int numOfThread)
+{
+    numThread = numOfThread;
+    sessionOptions.SetInterOpNumThreads(numThread);
+    sessionOptions.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_BASIC);
+}
+
+void DetectorYOLOV5::initModel(const std::string &pathStr, const std::string &pathOfClassNameFile)
+{
+    fprintf(stdout, "Start init model for %s\n", pathStr.c_str());
+    session = new Ort::Session(env, pathStr.c_str(), sessionOptions);
+    inputDim = getInputDim(session);
+    inputNamesPtr = getInputNames(session);
+    outputNamesPtr = getOutputNames(session);
+    std::ifstream classNameFile(pathOfClassNameFile);
+    string line;
+    while (getline(classNameFile, line))
+    {
+        classNames.push_back(line);
+    }
+}
+
+void DetectorYOLOV5::Detect(cv::Mat originSrc)
+{
+    std::vector<ResultOfDetection> resultsOfDetection;
+    cv::Size inputSize = cv::Size(inputDim[3], inputDim[2]);
+
+    cv::Mat srcResize;
+    resize(originSrc, srcResize, inputSize, 0, 0, cv::INTER_LINEAR);
+    std::vector<float> inputTensorValues = substractMeanNormalize(srcResize, meanValues, normValues);
+    std::array<int64_t, 4> inputShape{1, srcResize.channels(), srcResize.rows, srcResize.cols};
+    auto memoryInfo = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
+    Ort::Value inputTensor = Ort::Value::CreateTensor<float>(memoryInfo, inputTensorValues.data(), inputTensorValues.size(), inputShape.data(), inputShape.size());
+
+    std::vector<const char *> inputNames = {inputNamesPtr.data()->get()};
+    std::vector<const char *> outputNames = {outputNamesPtr.data()->get()};
+
+    // 推理
+    auto outputTensor = session->Run(Ort::RunOptions{nullptr}, inputNames.data(), &inputTensor, inputNames.size(), outputNames.data(), outputNames.size());
+
+    // 获取推理结果
+    std::vector<cv::Mat> outs;
+    std::vector<int64_t> outputShape = outputTensor[0].GetTensorTypeAndShapeInfo().GetShape();
+    float *floatArray = outputTensor.front().GetTensorMutableData<float>();
+    //int64_t outputCount = std::accumulate(outputShape.begin(), outputShape.end(), 1, std::multiplies<int64_t>());
+    //std::vector<float> outputData(floatArray, floatArray + outputCount);
+
+    // 转换为cv::Mat
+    int shape[]={static_cast<int>(outputShape[0]), static_cast<int>(outputShape[1]), static_cast<int>(outputShape[2])};
+    cv::Mat out(3, shape, CV_32F, floatArray);
+    //memcpy(out.data, result.data(), sizeof(float)*outputCount);
+    outs.push_back(out);
+
+    //获取先验框的个数
+    int numProposal = outs[0].size[1];
+    int numOut = outs[0].size[2];
+    //变换输出的维度
+    outs[0] = outs[0].reshape(0, numProposal);
+    
+    //生成先验框
+    std::vector<float> confidences;
+    std::vector<cv::Rect> boxes;
+    std::vector<int> classIds;
+    float ratioh = (float)originSrc.rows / inputSize.height, ratiow = (float)originSrc.cols / inputSize.width;
+
+    //计算cx,cy,w,h,box_sore,class_sore
+    int n = 0, rowInd = 0;
+    float* pdata = (float*)outs[0].data;
+    for (n = 0; n < numProposal; n++)
+    {
+        float boxScores = pdata[4];
+        if (boxScores > 0.5)
+        {
+            cv::Mat scores = outs[0].row(rowInd).colRange(5, numOut);
+            cv::Point classIdPoint;
+            double maxClassScore;
+            cv::minMaxLoc(scores, 0, &maxClassScore, 0, &classIdPoint);
+            maxClassScore *= boxScores;
+            if (maxClassScore > 0.25)
+            {
+                const int classIdx = classIdPoint.x;
+                float cx = pdata[0] * ratiow;
+                float cy = pdata[1] * ratioh;
+                float w = pdata[2] * ratiow;
+                float h = pdata[3] * ratioh;
+
+                int left = int(cx - 0.5 * w);
+                int top = int(cy - 0.5 * h);
+
+                confidences.push_back((float)maxClassScore);
+                boxes.push_back(cv::Rect(left, top, (int)(w), (int)(h)));
+                classIds.push_back(classIdx);
+            }
+        }
+        rowInd++;
+        pdata += numOut;
+    }
+
+    //执行non maximum suppression消除冗余重叠boxes
+    std::vector<int> indices;
+    dnn::NMSBoxes(boxes, confidences, 0.25, 0.5, indices);
+    for (size_t i = 0; i < indices.size(); ++i)
+    {
+        int idx = indices[i];
+        int classID=classIds[idx];
+        string className=classNames[classID];
+        float confidence=confidences[idx];
+        cv::Rect box = boxes[idx];
+
+        ResultOfDetection result;
+        result.boundingBox=box;
+        result.confidence=confidence;// confidence
+        result.classID=classID; // label
+        result.className=className;
+        resultsOfDetection.push_back(result);
+    }
+
+    fprintf(stdout,"//////////////Detection Results//////////////\n");
+    for( size_t i = 0; i < resultsOfDetection.size(); ++i)
+    {
+        ResultOfDetection result = resultsOfDetection[i];
+        cv::rectangle(originSrc, result.boundingBox, Scalar(0,255,255),2);
+        cv::putText(originSrc, result.className, cv::Point(result.boundingBox.x, result.boundingBox.y-20), cv::FONT_HERSHEY_PLAIN, 2.0, Scalar(0, 0, 255), 2);
+
+        fprintf(stdout,"box:%d %d %d %d,label:%d,confidence:%.3f\n",result.boundingBox.x,
+        result.boundingBox.y,result.boundingBox.width,result.boundingBox.height,result.classID,result.confidence);
+    }
+    // X11 display can`t support in docker.
+    /* namedWindow("video", WINDOW_NORMAL | WINDOW_KEEPRATIO);
+    imshow("video", originSrc);
+    if (waitKey(5) == 'q') {
+        return;
+    } */
+}
--- a/src/DetectorYOLOV7.cpp
+++ b/src/DetectorYOLOV7.cpp
+#include "DetectorYOLOV7.h"
+
+using namespace std;
+using namespace cv::dnn;
+
+DetectorYOLOV7::~DetectorYOLOV7() {
+    delete session;
+    inputNamesPtr.clear();
+    outputNamesPtr.clear();
+}
+
+void DetectorYOLOV7::setGpuIndex(int gpuIndex)
+{
+    if (gpuIndex >= 0)
+    {
+        OrtROCMProviderOptions rocm_options;
+        rocm_options.device_id = gpuIndex;
+        rocm_options.arena_extend_strategy = 0;
+        rocm_options.miopen_conv_exhaustive_search = 0;
+        rocm_options.do_copy_in_default_stream = 1;
+
+        sessionOptions.AppendExecutionProvider_ROCM(rocm_options);
+    }
+    else
+    {
+        printf("det use CPU\n");
+    }
+}
+
+void DetectorYOLOV7::setNumThread(int numOfThread)
+{
+    numThread = numOfThread;
+    sessionOptions.SetInterOpNumThreads(numThread);
+    sessionOptions.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_BASIC);
+}
+
+void DetectorYOLOV7::initModel(const std::string &pathStr, const std::string &pathOfClassNameFile)
+{
+    fprintf(stdout, "Start init model for %s\n", pathStr.c_str());
+    session = new Ort::Session(env, pathStr.c_str(), sessionOptions);
+    inputDim = getInputDim(session);
+    inputNamesPtr = getInputNames(session);
+    outputNamesPtr = getOutputNames(session);
+    std::ifstream classNameFile(pathOfClassNameFile);
+    string line;
+    while (getline(classNameFile, line))
+    {
+        classNames.push_back(line);
+    }
+}
+
+void DetectorYOLOV7::Detect(cv::Mat srcImage)
+{
+    std::vector<ResultOfDetection> resultsOfDetection;
+    cv::Size inputSize = cv::Size(inputDim[3], inputDim[2]);
+
+    cv::Mat srcResize;
+    resize(srcImage, srcResize, inputSize, 0, 0, cv::INTER_LINEAR);
+    std::vector<float> inputTensorValues = substractMeanNormalize(srcResize, meanValues, normValues);
+    std::array<int64_t, 4> inputShape{1, srcResize.channels(), srcResize.rows, srcResize.cols};
+    auto memoryInfo = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
+    Ort::Value inputTensor = Ort::Value::CreateTensor<float>(memoryInfo, inputTensorValues.data(), inputTensorValues.size(), inputShape.data(), inputShape.size());
+
+    std::vector<const char *> inputNames = {inputNamesPtr.data()->get()};
+    std::vector<const char *> outputNames = {outputNamesPtr.data()->get()};
+
+    // 推理
+    auto outputTensor = session->Run(Ort::RunOptions{nullptr}, inputNames.data(), &inputTensor, inputNames.size(), outputNames.data(), outputNames.size());
+
+    // 获取推理结果
+    std::vector<cv::Mat> outs;
+    std::vector<int64_t> outputShape = outputTensor[0].GetTensorTypeAndShapeInfo().GetShape();
+    float *floatArray = outputTensor.front().GetTensorMutableData<float>();
+    //int64_t outputCount = std::accumulate(outputShape.begin(), outputShape.end(), 1, std::multiplies<int64_t>());
+    //std::vector<float> outputData(floatArray, floatArray + outputCount);
+
+    // 转换为cv::Mat
+    int shape[]={static_cast<int>(outputShape[0]), static_cast<int>(outputShape[1]), static_cast<int>(outputShape[2])};
+    cv::Mat out(3, shape, CV_32F, floatArray);
+    //memcpy(out.data, floatArray, sizeof(float)*outputCount);
+    outs.push_back(out);
+
+    //获取先验框的个数
+    int numProposal = outs[0].size[1];
+    int numOut = outs[0].size[2];
+    //变换输出的维度
+    outs[0] = outs[0].reshape(0, numProposal);
+    
+    //生成先验框
+    std::vector<float> confidences;
+    std::vector<cv::Rect> boxes;
+    std::vector<int> classIds;
+    float ratioh = (float)srcImage.rows / inputSize.height, ratiow = (float)srcImage.cols / inputSize.width;
+
+    //计算cx,cy,w,h,box_sore,class_sore
+    int n = 0, rowInd = 0;
+    float* pdata = (float*)outs[0].data;
+    for (n = 0; n < numProposal; n++)
+    {
+        float boxScores = pdata[4];
+        if (boxScores > 0.5)
+        {
+            cv::Mat scores = outs[0].row(rowInd).colRange(5, numOut);
+            cv::Point classIdPoint;
+            double maxClassScore;
+            cv::minMaxLoc(scores, 0, &maxClassScore, 0, &classIdPoint);
+            maxClassScore *= boxScores;
+            if (maxClassScore > 0.25)
+            {
+                const int classIdx = classIdPoint.x;
+                float cx = pdata[0] * ratiow;
+                float cy = pdata[1] * ratioh;
+                float w = pdata[2] * ratiow;
+                float h = pdata[3] * ratioh;
+
+                int left = int(cx - 0.5 * w);
+                int top = int(cy - 0.5 * h);
+
+                confidences.push_back((float)maxClassScore);
+                boxes.push_back(cv::Rect(left, top, (int)(w), (int)(h)));
+                classIds.push_back(classIdx);
+            }
+        }
+        rowInd++;
+        pdata += numOut;
+    }
+
+    //执行non maximum suppression消除冗余重叠boxes
+    std::vector<int> indices;
+    dnn::NMSBoxes(boxes, confidences, 0.25, 0.5, indices);
+    for (size_t i = 0; i < indices.size(); ++i)
+    {
+        int idx = indices[i];
+        int classID=classIds[idx];
+        string className=classNames[classID];
+        float confidence=confidences[idx];
+        cv::Rect box = boxes[idx];
+
+        ResultOfDetection result;
+        result.boundingBox=box;
+        result.confidence=confidence;// confidence
+        result.classID=classID; // label
+        result.className=className;
+        resultsOfDetection.push_back(result);
+    }
+
+    fprintf(stdout,"//////////////Detection Results//////////////\n");
+    for( size_t i = 0; i < resultsOfDetection.size(); ++i)
+    {
+        ResultOfDetection result = resultsOfDetection[i];
+        cv::rectangle(srcImage, result.boundingBox, Scalar(0,255,255),2);
+        cv::putText(srcImage, result.className, cv::Point(result.boundingBox.x, result.boundingBox.y-20), cv::FONT_HERSHEY_PLAIN, 2.0, Scalar(0, 0, 255), 2);
+
+        fprintf(stdout,"box:%d %d %d %d,label:%d,confidence:%.3f\n",result.boundingBox.x,
+        result.boundingBox.y,result.boundingBox.width,result.boundingBox.height,result.classID,result.confidence);
+    }
+    // X11 display can`t support in docker.
+    /* namedWindow("video", WINDOW_NORMAL | WINDOW_KEEPRATIO);
+    imshow("video", srcImage);
+    if (waitKey(10) == 'q') {
+        exit(0);
+    } */
+}
--- a/src/main.cpp
+++ b/src/main.cpp
+#include "main.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+void OrtSamplesUsage(char* programName)
+{
+    printf("Usage : %s <index> \n", programName);
+    printf("index:\n");
+    printf("\t 0) YOLOV3 sample.\n");
+    printf("\t 1) YOLOV5 sample.\n");
+    printf("\t 2) YOLOV7 sample.\n");
+}
+
+int main(int argc, char *argv[])
+{
+    if (argc < 2 || argc > 2)
+    {
+        OrtSamplesUsage(argv[0]);
+        return -1;
+    }
+    if (!strncmp(argv[1], "-h", 2))
+    {
+        OrtSamplesUsage(argv[0]);
+        return 0;
+    }
+    switch (*argv[1])
+    {
+        case '0':
+            {
+                Ort_DetectorYOLOV3();
+            }
+            break;
+        case '1':
+            {
+                Ort_DetectorYOLOV5();
+            }
+            break;
+        case '2':
+            {
+                Ort_DetectorYOLOV7();
+            }
+            break;
+        default :
+            {
+                OrtSamplesUsage(argv[0]);
+            }
+            break;
+    }
+    return 0;
+}
--- a/src/yolov3-tiny.cpp
+++ b/src/yolov3-tiny.cpp
+#include "main.h"
+#include <sys/time.h>
+#include "DetectorYOLOV3.h"
+
+static void DecoderThreadFunc(Queue* queue)
+{
+    int ret, end = 0;
+    int decode_num = 0;
+    Queue* que = queue;
+    Decoder decoder;
+    ret = decoder.DecoderInit();
+    if (ret == -1)
+    {
+        av_log(NULL, AV_LOG_ERROR, "decoder init failed.\n");
+        que->finish();
+        return;
+    }
+    while(true)
+    {
+        if (av_read_frame(decoder.fmt_ctx, decoder.pkt) < 0)
+        {
+            if(end == 2)
+            {
+                que->DecodeEnd = true;
+                break;
+            }
+            end = 1;
+        }
+        if (decoder.pkt->stream_index == decoder.video_stream_idx) {
+            if(!end) {
+                ret = avcodec_send_packet(decoder.video_dec_ctx, decoder.pkt);
+            } else {
+                ret = avcodec_send_packet(decoder.video_dec_ctx, NULL);
+            }
+            if (ret < 0 && ret != AVERROR_EOF) {
+                fprintf(stderr, "Error submitting a packet for decoding\n");
+                que->DecodeEnd = true;
+                break;
+            }
+            while (ret >= 0 || end == 1)
+            {
+                ret = avcodec_receive_frame(decoder.video_dec_ctx, decoder.frame);
+                if (ret == AVERROR(EAGAIN)) {
+                    break;
+                } else if (ret == AVERROR_EOF ) {
+                    end = 2;
+                    break;
+                } else if (ret < 0) {
+                    av_log(NULL, AV_LOG_ERROR, "Error while receiving a frame from the decoder\n");
+                    que->finish();
+                    return;
+                }
+                decode_num++;
+
+                if (decoder.frame->format == AV_PIX_FMT_YUV420P)
+                {
+                    cv::Mat srcImage;
+                    srcImage = cv::Mat::zeros(decoder.frame->height*3/2, decoder.frame->width, CV_8UC1);
+                    memcpy(srcImage.data,                                                              (unsigned char*)decoder.frame->data[0], decoder.frame->width * decoder.frame->height);
+                    memcpy(srcImage.data + decoder.frame->width * decoder.frame->height,     (unsigned char*)decoder.frame->data[1], decoder.frame->width * decoder.frame->height/4);
+                    memcpy(srcImage.data + decoder.frame->width * decoder.frame->height*5/4, (unsigned char*)decoder.frame->data[2], decoder.frame->width * decoder.frame->height/4);
+                    que->enQueue(srcImage);
+                    av_frame_unref(decoder.frame);
+                }
+                else
+                {
+                    av_log(NULL, AV_LOG_ERROR, "video format not yuv420p, fix the code first.\n");
+                    que->finish();
+                    return;
+                }
+            }
+        }
+        av_packet_unref(decoder.pkt);
+    }
+    fprintf(stdout, "Decoder: ####### decode finish, frame count: %d\n", decode_num);
+    que->finish();
+}
+
+void Ort_DetectorYOLOV3()
+{
+    const std::string modelPath = "../Resource/Models/Yolov3/yolov3-tiny.onnx";
+    const std::string keysPath = "../Resource/Models/Yolov3/coco.names";
+
+    Queue* queue = new Queue(1);
+
+    DetectorYOLOV3 detector;
+    detector.setNumThread(1);
+    detector.setGpuIndex(0);
+    detector.initModel(modelPath, keysPath);
+
+    std::thread ThreadDecoder(DecoderThreadFunc, queue);
+
+    while (!queue->DecodeEnd) {
+        cv::Mat InferImage;
+        queue->deQueue(&InferImage);
+        if (InferImage.empty()) {
+            continue;
+        }
+
+        cvtColor(InferImage, InferImage, COLOR_YUV420p2RGB);
+
+        double time1 = getTickCount();
+        detector.Detect(InferImage);
+        double time2 = getTickCount();
+        double elapsedTime = (time2 - time1)*1000 / getTickFrequency();
+        fprintf(stdout, "inference time: %.3f ms\n", elapsedTime);
+    }
+    ThreadDecoder.join();
+    delete queue;
+    queue = NULL;
+    return;
+}
--- a/src/yolov5s.cpp
+++ b/src/yolov5s.cpp
+#include "main.h"
+#include <sys/time.h>
+#include "DetectorYOLOV5.h"
+
+static void DecoderThreadFunc(Queue* queue)
+{
+    int ret, end = 0;
+    int decode_num = 0;
+    Queue* que = queue;
+    Decoder decoder;
+    ret = decoder.DecoderInit();
+    if (ret == -1)
+    {
+        av_log(NULL, AV_LOG_ERROR, "decoder init failed.\n");
+        que->finish();
+        return;
+    }
+    while(true)
+    {
+        if (av_read_frame(decoder.fmt_ctx, decoder.pkt) < 0)
+        {
+            if(end == 2)
+            {
+                que->DecodeEnd = true;
+                break;
+            }
+            end = 1;
+        }
+        if (decoder.pkt->stream_index == decoder.video_stream_idx) {
+            if(!end) {
+                ret = avcodec_send_packet(decoder.video_dec_ctx, decoder.pkt);
+            } else {
+                ret = avcodec_send_packet(decoder.video_dec_ctx, NULL);
+            }
+            if (ret < 0 && ret != AVERROR_EOF) {
+                fprintf(stderr, "Error submitting a packet for decoding\n");
+                que->DecodeEnd = true;
+                break;
+            }
+            while (ret >= 0 || end == 1)
+            {
+                ret = avcodec_receive_frame(decoder.video_dec_ctx, decoder.frame);
+                if (ret == AVERROR(EAGAIN)) {
+                    break;
+                } else if (ret == AVERROR_EOF ) {
+                    end = 2;
+                    break;
+                } else if (ret < 0) {
+                    av_log(NULL, AV_LOG_ERROR, "Error while receiving a frame from the decoder\n");
+                    que->finish();
+                    return;
+                }
+                decode_num++;
+
+                if (decoder.frame->format == AV_PIX_FMT_YUV420P)
+                {
+                    cv::Mat srcImage;
+                    srcImage = cv::Mat::zeros(decoder.frame->height*3/2, decoder.frame->width, CV_8UC1);
+                    memcpy(srcImage.data,                                                              (unsigned char*)decoder.frame->data[0], decoder.frame->width * decoder.frame->height);
+                    memcpy(srcImage.data + decoder.frame->width * decoder.frame->height,     (unsigned char*)decoder.frame->data[1], decoder.frame->width * decoder.frame->height/4);
+                    memcpy(srcImage.data + decoder.frame->width * decoder.frame->height*5/4, (unsigned char*)decoder.frame->data[2], decoder.frame->width * decoder.frame->height/4);
+                    que->enQueue(srcImage);
+                    av_frame_unref(decoder.frame);
+                }
+                else
+                {
+                    av_log(NULL, AV_LOG_ERROR, "video format not yuv420p, fix the code first.\n");
+                    que->finish();
+                    return;
+                }
+            }
+        }
+        av_packet_unref(decoder.pkt);
+    }
+    fprintf(stdout, "Decoder: ####### decode finish, frame count: %d\n", decode_num);
+    que->finish();
+}
+
+void Ort_DetectorYOLOV5()
+{
+    const std::string modelPath = "../Resource/Models/Yolov5/yolov5s.onnx";
+    const std::string keysPath = "../Resource/Models/Yolov5/coco.names";
+
+    Queue* queue = new Queue(1);
+
+    DetectorYOLOV5 detector;
+    detector.setNumThread(1);
+    detector.setGpuIndex(0);
+    detector.initModel(modelPath, keysPath);
+
+    std::thread ThreadDecoder(DecoderThreadFunc, queue);
+
+    while (!queue->DecodeEnd) {
+        cv::Mat InferImage;
+        queue->deQueue(&InferImage);
+        if (InferImage.empty()) {
+            continue;
+        }
+
+        cvtColor(InferImage, InferImage, COLOR_YUV420p2RGB);
+
+        double time1 = getTickCount();
+        detector.Detect(InferImage);
+        double time2 = getTickCount();
+        double elapsedTime = (time2 - time1)*1000 / getTickFrequency();
+        fprintf(stdout, "inference time: %.3f ms\n", elapsedTime);
+    }
+    ThreadDecoder.join();
+    delete queue;
+    queue = NULL;
+    return;
+}
--- a/src/yolov7-tiny.cpp
+++ b/src/yolov7-tiny.cpp
+#include "main.h"
+#include <sys/time.h>
+#include "DetectorYOLOV7.h"
+
+static void DecoderThreadFunc(Queue* queue)
+{
+    int ret, end = 0;
+    int decode_num = 0;
+    Queue* que = queue;
+    Decoder decoder;
+    ret = decoder.DecoderInit();
+    if (ret == -1)
+    {
+        av_log(NULL, AV_LOG_ERROR, "decoder init failed.\n");
+        que->finish();
+        return;
+    }
+    while(true)
+    {
+        if (av_read_frame(decoder.fmt_ctx, decoder.pkt) < 0)
+        {
+            if(end == 2)
+            {
+                que->DecodeEnd = true;
+                break;
+            }
+            end = 1;
+        }
+        if (decoder.pkt->stream_index == decoder.video_stream_idx) {
+            if(!end) {
+                ret = avcodec_send_packet(decoder.video_dec_ctx, decoder.pkt);
+            } else {
+                ret = avcodec_send_packet(decoder.video_dec_ctx, NULL);
+            }
+            if (ret < 0 && ret != AVERROR_EOF) {
+                fprintf(stderr, "Error submitting a packet for decoding\n");
+                que->DecodeEnd = true;
+                break;
+            }
+            while (ret >= 0 || end == 1)
+            {
+                ret = avcodec_receive_frame(decoder.video_dec_ctx, decoder.frame);
+                if (ret == AVERROR(EAGAIN)) {
+                    break;
+                } else if (ret == AVERROR_EOF ) {
+                    end = 2;
+                    break;
+                } else if (ret < 0) {
+                    av_log(NULL, AV_LOG_ERROR, "Error while receiving a frame from the decoder\n");
+                    que->finish();
+                    return;
+                }
+                decode_num++;
+
+                if (decoder.frame->format == AV_PIX_FMT_YUV420P)
+                {
+                    cv::Mat srcImage;
+                    srcImage = cv::Mat::zeros(decoder.frame->height*3/2, decoder.frame->width, CV_8UC1);
+                    memcpy(srcImage.data,                                                              (unsigned char*)decoder.frame->data[0], decoder.frame->width * decoder.frame->height);
+                    memcpy(srcImage.data + decoder.frame->width * decoder.frame->height,     (unsigned char*)decoder.frame->data[1], decoder.frame->width * decoder.frame->height/4);
+                    memcpy(srcImage.data + decoder.frame->width * decoder.frame->height*5/4, (unsigned char*)decoder.frame->data[2], decoder.frame->width * decoder.frame->height/4);
+                    que->enQueue(srcImage);
+                    av_frame_unref(decoder.frame);
+                }
+                else
+                {
+                    av_log(NULL, AV_LOG_ERROR, "video format not yuv420p, fix the code first.\n");
+                    que->finish();
+                    return;
+                }
+            }
+        }
+        av_packet_unref(decoder.pkt);
+    }
+    fprintf(stdout, "Decoder: ####### decode finish, frame count: %d\n", decode_num);
+    que->finish();
+}
+
+void Ort_DetectorYOLOV7()
+{
+    const std::string modelPath = "../Resource/Models/Yolov7/yolov7-tiny.onnx";
+    const std::string keysPath = "../Resource/Models/Yolov7/coco.names";
+
+    Queue* queue = new Queue(1);
+
+    DetectorYOLOV7 detector;
+    detector.setNumThread(1);
+    detector.setGpuIndex(0);
+    detector.initModel(modelPath, keysPath);
+
+    std::thread ThreadDecoder(DecoderThreadFunc, queue);
+
+    while (!queue->DecodeEnd) {
+        cv::Mat InferImage;
+        queue->deQueue(&InferImage);
+        if (InferImage.empty()) {
+            continue;
+        }
+
+        cvtColor(InferImage, InferImage, COLOR_YUV420p2RGB);
+
+        double time1 = getTickCount();
+        detector.Detect(InferImage);
+        double time2 = getTickCount();
+        double elapsedTime = (time2 - time1)*1000 / getTickFrequency();
+        fprintf(stdout, "inference time: %.3f ms\n", elapsedTime);
+    }
+    ThreadDecoder.join();
+    delete queue;
+    queue = NULL;
+    return;
+}