add yolov8

0332ddcb · wangsen · b10952cf · 0332ddcb · 0332ddcb · 0332ddcb
Commit 0332ddcb authored Nov 15, 2024 by wangsen
9 changed files
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -35,11 +35,13 @@ set(SOURCE_FILES    ${CMAKE_CURRENT_SOURCE_DIR}/src/main.cpp
                    ${CMAKE_CURRENT_SOURCE_DIR}/src/YOLOV3.cpp
                    ${CMAKE_CURRENT_SOURCE_DIR}/src/YOLOV5.cpp
                    ${CMAKE_CURRENT_SOURCE_DIR}/src/YOLOV7.cpp
+                    ${CMAKE_CURRENT_SOURCE_DIR}/src/YOLOV8.cpp
                    ${CMAKE_CURRENT_SOURCE_DIR}/src/RetinaFace.cpp
                    ${CMAKE_CURRENT_SOURCE_DIR}/src/Inference/DetectorSSD.cpp
                    ${CMAKE_CURRENT_SOURCE_DIR}/src/Inference/DetectorYOLOV3.cpp
                    ${CMAKE_CURRENT_SOURCE_DIR}/src/Inference/DetectorYOLOV5.cpp
                    ${CMAKE_CURRENT_SOURCE_DIR}/src/Inference/DetectorYOLOV7.cpp
+                    ${CMAKE_CURRENT_SOURCE_DIR}/src/Inference/DetectorYOLOV8.cpp
                    ${CMAKE_CURRENT_SOURCE_DIR}/src/Inference/DetectorRetinaFace.cpp
                    ${CMAKE_CURRENT_SOURCE_DIR}/src/Utility/CommonUtility.cpp
                    ${CMAKE_CURRENT_SOURCE_DIR}/src/Utility/Filesystem.cpp)

--- a/Resource/Configuration.xml
+++ b/Resource/Configuration.xml
@@ -201,4 +201,15 @@
 		<NMSThreshold>0.5</NMSThreshold>
 		<ObjectThreshold>0.5</ObjectThreshold>
 	</DetectorYOLOV7>
+	<!--YOLOV8检测器 -->
+	<DetectorYOLOV8>
+		<ModelPath>"../Resource/Models/Detector/YOLOV8/yolov8n_static.onnx"</ModelPath>
+		<ClassNameFile>"../Resource/Models/Detector/YOLOV8/coco.names"</ClassNameFile>
+		<UseFP16>0</UseFP16><!--是否使用FP16-->
+		<NumberOfClasses>80</NumberOfClasses><!--类别数(不包括背景类)，COCO:80,VOC:20-->
+		<ConfidenceThreshold>0.25</ConfidenceThreshold>
+		<NMSThreshold>0.5</NMSThreshold>
+		<!-- <ObjectThreshold>0.5</ObjectThreshold> -->
+	</DetectorYOLOV8>
 </opencv_storage>
--- a/Resource/Models/Detector/YOLOV8/coco.names
+++ b/Resource/Models/Detector/YOLOV8/coco.names
+person
+bicycle
+car
+motorbike
+aeroplane
+bus
+train
+truck
+boat
+traffic light
+fire hydrant
+stop sign
+parking meter
+bench
+bird
+cat
+dog
+horse
+sheep
+cow
+elephant
+bear
+zebra
+giraffe
+backpack
+umbrella
+handbag
+tie
+suitcase
+frisbee
+skis
+snowboard
+sports ball
+kite
+baseball bat
+baseball glove
+skateboard
+surfboard
+tennis racket
+bottle
+wine glass
+cup
+fork
+knife
+spoon
+bowl
+banana
+apple
+sandwich
+orange
+broccoli
+carrot
+hot dog
+pizza
+donut
+cake
+chair
+sofa
+pottedplant
+bed
+diningtable
+toilet
+tvmonitor
+laptop
+mouse
+remote
+keyboard
+cell phone
+microwave
+oven
+toaster
+sink
+refrigerator
+book
+clock
+vase
+scissors
+teddy bear
+hair drier
+toothbrush
--- a/Resource/Models/Detector/YOLOV8/yolov8n_static.onnx
+++ b/Resource/Models/Detector/YOLOV8/yolov8n_static.onnx
--- a/include/DetectorYOLOV8.h
+++ b/include/DetectorYOLOV8.h
+// YOLOV8检测器
+#ifndef __DETECTOR_YOLOV8_H__
+#define __DETECTOR_YOLOV8_H__
+#include <string>
+#include <migraphx/program.hpp>
+#include <opencv2/opencv.hpp>
+#include <CommonDefinition.h>
+#include "Decoder.h"
+using namespace std;
+using namespace cv;
+using namespace migraphx;
+namespace migraphxSamples
+{
+// YOLOV8参数
+typedef struct _YOLOV8Parameter
+{
+    int numberOfClasses;// 类别数
+    float confidenceThreshold;// 置信度阈值
+    float nmsThreshold;// NMS阈值
+    // float objectThreshold;//目标置信度值
+}YOLOV8Parameter;
+class DetectorYOLOV8
+{
+public:
+    DetectorYOLOV8();
+    ~DetectorYOLOV8();
+    ErrorCode Initialize(InitializationParameterOfDetector initializationParameterOfDetector);
+    ErrorCode Detect(const cv::Mat &srcImage, std::vector<ResultOfDetection> &resultsOfDetection);
+    ErrorCode Detect(DCU_Frame &srcImage, std::vector<ResultOfDetection> &resultsOfDetection);
+    float* preprocess_Image = NULL;
+private:
+    ErrorCode DoCommonInitialization(InitializationParameterOfDetector initializationParameterOfDetector);
+private:
+    cv::FileStorage configurationFile;
+    InitializationParameterOfDetector initializationParameter;
+    FILE *logFile;
+    // net
+    migraphx::program net;
+    cv::Size inputSize;
+    string inputName;
+    migraphx::shape inputShape;
+    bool useFP16;
+    vector<string> classNames;
+    YOLOV8Parameter yolov8Parameter;
+    migraphx::parameter_map ParameterMap;
+};
+}
+#endif
\ No newline at end of file
--- a/include/Sample.h
+++ b/include/Sample.h
@@ -15,7 +15,14 @@ void Sample_DetectorYOLOV5(int device);
 // YOLOV7 sample
 void Sample_DetectorYOLOV7(int device);
+// YOLOV8 sample
+void Sample_DetectorYOLOV8(int device);
 // RetinaFace sample
 void Sample_DetectorRetinaFace(int device);
 #endif
--- a/src/Inference/DetectorYOLOV8.cpp
+++ b/src/Inference/DetectorYOLOV8.cpp
+#include <DetectorYOLOV8.h>
+#include <migraphx/onnx.hpp>
+#include <migraphx/gpu/target.hpp>
+#include <migraphx/gpu/hip.hpp>
+#include <migraphx/generate.hpp>
+#include <migraphx/quantization.hpp>
+#include <opencv2/dnn.hpp>
+#include <CommonUtility.h>
+#include <Filesystem.h>
+#include <SimpleLog.h>
+#include <fstream>
+#include <hip/hip_runtime.h>
+#include "hip/hip_runtime.h"
+using namespace cv::dnn;
+namespace migraphxSamples
+{
+DetectorYOLOV8::DetectorYOLOV8():logFile(NULL)
+{
+}
+DetectorYOLOV8::~DetectorYOLOV8()
+{
+    configurationFile.release();
+}
+ErrorCode DetectorYOLOV8::Initialize(InitializationParameterOfDetector initializationParameterOfDetector)
+{
+    // 初始化(获取日志文件,加载配置文件等)
+    ErrorCode errorCode=DoCommonInitialization(initializationParameterOfDetector);
+    if(errorCode!=SUCCESS)
+    {
+        LOG_ERROR(logFile,"fail to DoCommonInitialization\n");
+        return errorCode;
+    }
+    LOG_INFO(logFile,"succeed to DoCommonInitialization\n");
+    // 获取配置文件参数
+    FileNode netNode = configurationFile["DetectorYOLOV8"];
+    string modelPath=initializationParameter.parentPath+(string)netNode["ModelPath"];
+    string pathOfClassNameFile=(string)netNode["ClassNameFile"];
+    yolov8Parameter.confidenceThreshold = (float)netNode["ConfidenceThreshold"];
+    yolov8Parameter.nmsThreshold = (float)netNode["NMSThreshold"];
+    // yolov8Parameter.objectThreshold = (float)netNode["ObjectThreshold"];
+    yolov8Parameter.numberOfClasses=(int)netNode["NumberOfClasses"];
+    useFP16=(bool)(int)netNode["UseFP16"];
+    // 加载模型
+    if(Exists(modelPath)==false)
+    {
+        LOG_ERROR(logFile,"%s not exist!\n",modelPath.c_str());
+        return MODEL_NOT_EXIST;
+    }
+    net = migraphx::parse_onnx(modelPath);
+    LOG_INFO(logFile,"succeed to load model: %s\n",GetFileName(modelPath).c_str());
+    // 获取模型输入属性
+    std::pair<std::string, migraphx::shape> inputAttribute=*(net.get_parameter_shapes().begin());
+    inputName=inputAttribute.first;
+    inputShape=inputAttribute.second;
+    inputSize=cv::Size(inputShape.lens()[3],inputShape.lens()[2]);// NCHW
+    // 设置模型为GPU模式
+    migraphx::target gpuTarget = migraphx::gpu::target{};
+    // 量化    
+    if(useFP16)
+    {
+        migraphx::quantize_fp16(net);
+    }
+    // 编译模型
+    migraphx::compile_options options;
+    options.device_id=0; // 设置GPU设备，默认为0号设备(>=1.2版本中支持)
+    #ifdef DMA
+    options.offload_copy=false; // 设置offload_copy
+    #else
+    options.offload_copy=true; // 设置offload_copy
+    #endif
+    net.compile(gpuTarget,options);
+    LOG_INFO(logFile,"succeed to compile model: %s\n",GetFileName(modelPath).c_str());
+    // Run once by itself
+    #ifdef DMA
+    ParameterMap=CreateParameterMap(net);
+    net.eval(ParameterMap);
+    #else
+    migraphx::parameter_map inputData;
+    inputData[inputName]=migraphx::generate_argument(inputShape);
+    net.eval(inputData);
+    #endif
+    // 读取类别名
+    if(!pathOfClassNameFile.empty())
+    {
+        ifstream classNameFile(pathOfClassNameFile);
+        string line;
+        while (getline(classNameFile, line))
+        {
+            classNames.push_back(line);
+        }
+    }
+    else
+    {
+        classNames.resize(yolov8Parameter.numberOfClasses);
+    }
+    #ifdef DMA
+    hipMalloc((void**)&preprocess_Image, inputSize.height * inputSize.width * 3 * sizeof(float));
+    #endif
+    // log
+    LOG_INFO(logFile,"InputSize:%dx%d\n",inputSize.width,inputSize.height);
+    LOG_INFO(logFile,"InputName:%s\n",inputName.c_str());
+    LOG_INFO(logFile,"ConfidenceThreshold:%f\n",yolov8Parameter.confidenceThreshold);
+    // LOG_INFO(logFile,"objectThreshold:%f\n",yolov8Parameter.objectThreshold);
+    LOG_INFO(logFile,"NMSThreshold:%f\n",yolov8Parameter.nmsThreshold);
+    LOG_INFO(logFile,"NumberOfClasses:%d\n",yolov8Parameter.numberOfClasses);
+    return SUCCESS;
+}
+#ifdef DMA
+__global__ void convert_bgrp_to_rgb_and_normalization_yolov8(unsigned char* srcImage, float* outImage, int width, int height)
+{
+    int x = blockIdx.x * blockDim.x + threadIdx.x;
+    if (x < width * height)
+    {
+        unsigned char r = srcImage[x + width * height * 2];
+        unsigned char g = srcImage[x + width * height * 1];
+        unsigned char b = srcImage[x + width * height * 0];
+        float sum = 255.0;
+        outImage[x + width * height * 0] = r / sum;
+        outImage[x + width * height * 1] = g / sum;
+        outImage[x + width * height * 2] = b / sum;
+    }
+}
+__global__ void convert_yuv420p_to_rgb_and_normalization_yolov8(unsigned char* srcImage, float* outImage, int width, int height)
+{
+    int x = blockIdx.x * blockDim.x + threadIdx.x;
+    int y = blockIdx.y * blockDim.y + threadIdx.y;
+    if (x >= width || y >= height)
+        return;
+    int index = y * width + x;
+    int yIndex = index;
+    int uIndex = (y / 2) * (width / 2) + (x / 2) + width * height;
+    int vIndex = (y / 2) * (width / 2) + (x / 2) + width * height * 5 / 4;
+    unsigned char yValue = srcImage[yIndex];
+    unsigned char uValue = srcImage[uIndex];
+    unsigned char vValue = srcImage[vIndex];
+    int r = yValue + 1.370705 * (vValue - 128);
+    int g = yValue - 0.698001 * (vValue - 128) - 0.337633 * (uValue - 128);
+    int b = yValue + 1.732446 * (uValue - 128);
+    outImage[x + width * height * 0] = (float)r;
+    outImage[x + width * height * 1] = (float)g;
+    outImage[x + width * height * 2] = (float)b;
+}
+__global__ void convert_rgba_to_rgb_and_normalization_yolov8(unsigned char* srcImage, float* outImage, int width, int height)
+{
+    int x = blockIdx.x * blockDim.x + threadIdx.x;
+    if (x < width * height)
+    {
+        unsigned char r = srcImage[x * 4 + 0];
+        unsigned char g = srcImage[x * 4 + 1];
+        unsigned char b = srcImage[x * 4 + 2];
+        float sum = 255.0;
+        outImage[x + width * height * 0] = r / sum;
+        outImage[x + width * height * 1] = g / sum;
+        outImage[x + width * height * 2] = b / sum;
+    }
+}
+#endif
+ErrorCode DetectorYOLOV8::Detect(const cv::Mat &srcImage,std::vector<ResultOfDetection> &resultsOfDetection)
+{
+    if(srcImage.empty()||srcImage.type()!=CV_8UC3)
+    {
+        LOG_ERROR(logFile, "image error!\n");
+        return IMAGE_ERROR;
+    }
+    // 预处理并转换为NCHW
+    cv::Mat inputBlob;
+    blobFromImage(srcImage,
+                    inputBlob,
+                    1 / 255.0,
+                    inputSize,
+                    Scalar(0, 0, 0),
+                    true,
+                    false);
+    // 输入数据
+    migraphx::parameter_map inputData;
+    inputData[inputName]= migraphx::argument{inputShape, (float*)inputBlob.data};
+    // 推理
+    std::vector<migraphx::argument> inferenceResults = net.eval(inputData);
+    // 获取推理结果
+    std::vector<cv::Mat> outs;
+    migraphx::argument result = inferenceResults[0]; 
+    // 转换为cv::Mat
+    migraphx::shape outputShape = result.get_shape();
+    int shape[]={static_cast<int>(outputShape.lens()[0]),static_cast<int>(outputShape.lens()[1]),static_cast<int>(outputShape.lens()[2])};
+    cv::Mat out(3,shape,CV_32F);
+    memcpy(out.data,result.data(),sizeof(float)*outputShape.elements());
+    outs.push_back(out);
+    //获取先验框的个数
+    int numProposal = outs[0].size[2];
+    int numOut = outs[0].size[1];
+    //变换输出的维度
+    outs[0] = outs[0].reshape(1, numOut);
+    cv::transpose(outs[0], outs[0]);
+    float *data = (float *)outs[0].data;
+    //生成先验框
+    std::vector<float> confidences;
+    std::vector<cv::Rect> boxes;
+    std::vector<int> classIds;
+    float ratioh = (float)srcImage.rows / inputSize.height, ratiow = (float)srcImage.cols / inputSize.width;
+    // 计算x,y,w,h
+    for (int n = 0; n < numProposal; n++)
+        {
+            float *classes_scores = data + 4;
+            cv::Mat scores(1, classNames.size(), CV_32FC1, classes_scores);
+            cv::Point class_id;
+            double maxClassScore;
+            cv::minMaxLoc(scores, 0, &maxClassScore, 0, &class_id);
+            if (maxClassScore > yolov8Parameter.confidenceThreshold)
+            {
+                confidences.push_back(maxClassScore);
+                classIds.push_back(class_id.x);
+                float x = data[0];
+                float y = data[1];
+                float w = data[2];
+                float h = data[3];
+                int left = int((x - 0.5 * w) * ratiow);
+                int top = int((y - 0.5 * h) * ratioh);
+                int width = int(w * ratiow);
+                int height = int(h * ratioh);
+                boxes.push_back(cv::Rect(left, top, width, height));
+            }
+            data += numOut;
+        }
+    //执行non maximum suppression消除冗余重叠boxes
+    std::vector<int> indices;
+    dnn::NMSBoxes(boxes, confidences, yolov8Parameter.confidenceThreshold, yolov8Parameter.nmsThreshold, indices);
+    for (size_t i = 0; i < indices.size(); ++i)
+    {
+        int idx = indices[i];
+        int classID=classIds[idx];
+        string className=classNames[classID];
+        float confidence=confidences[idx];
+        cv::Rect box = boxes[idx];
+        ResultOfDetection result;
+        result.boundingBox=box;
+        result.confidence=confidence;// confidence
+        result.classID=classID; // label
+        result.className=className;
+        resultsOfDetection.push_back(result);
+    }
+    return SUCCESS;
+}
+#ifdef DMA
+ErrorCode DetectorYOLOV8::Detect(DCU_Frame &srcImage, std::vector<ResultOfDetection> &resultsOfDetection)
+{
+    int block_size = 256;
+    int num_blocks = (srcImage.width * srcImage.height + block_size - 1) / block_size;
+    if(srcImage.format == AV_PIX_FMT_BGRP)
+    {
+        convert_bgrp_to_rgb_and_normalization_yolov8<<<num_blocks, block_size>>>(srcImage.dcu_data, preprocess_Image, srcImage.width, srcImage.height);
+    }
+    if(srcImage.format == AV_PIX_FMT_YUV420P)
+    {
+        convert_yuv420p_to_rgb_and_normalization_yolov8<<<num_blocks, block_size>>>(srcImage.dcu_data, preprocess_Image, srcImage.width, srcImage.height);
+    }
+    if(srcImage.format == AV_PIX_FMT_RGBA)
+    {
+        convert_rgba_to_rgb_and_normalization_yolov8<<<num_blocks, block_size>>>(srcImage.dcu_data, preprocess_Image, srcImage.width, srcImage.height);
+    }
+    // 输入数据
+    ParameterMap[inputName] = migraphx::argument{inputShape, preprocess_Image};
+    // 推理
+    std::vector<migraphx::argument> inferenceResults = net.eval(ParameterMap);
+    // 获取推理结果
+    std::vector<cv::Mat> outs;
+    migraphx::argument result = inferenceResults[0];
+    // 转换为cv::Mat
+    migraphx::shape outputShape = result.get_shape();
+    int shape[]={static_cast<int>(outputShape.lens()[0]), static_cast<int>(outputShape.lens()[1]), static_cast<int>(outputShape.lens()[2])};
+    cv::Mat out(3, shape, CV_32F, (unsigned char*)result.data());
+    outs.push_back(out);
+    //获取先验框的个数
+    int numProposal = outs[0].size[1];
+    int numOut = outs[0].size[2];
+    //变换输出的维度
+    outs[0] = outs[0].reshape(0, numProposal);
+    // 生成先验框
+        std::vector<float> confidences;
+        std::vector<cv::Rect> boxes;
+        std::vector<int> classIds;
+        float ratioh = (float)srcImage.rows / inputSize.height, ratiow = (float)srcImage.cols / inputSize.width;
+        // 计算x,y,w,h
+        for (int n = 0; n < numProposal; n++)
+        {
+            float *classes_scores = data + 4;
+            cv::Mat scores(1, classNames.size(), CV_32FC1, classes_scores);
+            cv::Point class_id;
+            double maxClassScore;
+            cv::minMaxLoc(scores, 0, &maxClassScore, 0, &class_id);
+            if (maxClassScore > yolov8Parameter.confidenceThreshold)
+            {
+                confidences.push_back(maxClassScore);
+                classIds.push_back(class_id.x);
+                float x = data[0];
+                float y = data[1];
+                float w = data[2];
+                float h = data[3];
+                int left = int((x - 0.5 * w) * ratiow);
+                int top = int((y - 0.5 * h) * ratioh);
+                int width = int(w * ratiow);
+                int height = int(h * ratioh);
+                boxes.push_back(cv::Rect(left, top, width, height));
+            }
+            data += numOut;
+        }
+    std::vector<int> indices;
+    dnn::NMSBoxes(boxes, confidences, yolov8Parameter.confidenceThreshold, yolov8Parameter.nmsThreshold, indices);
+    for (size_t i = 0; i < indices.size(); ++i)
+    {
+        int idx = indices[i];
+        int classID=classIds[idx];
+        string className=classNames[classID];
+        float confidence=confidences[idx];
+        cv::Rect box = boxes[idx];
+        ResultOfDetection result;
+        result.boundingBox=box;
+        result.confidence=confidence;
+        result.classID=classID;
+        result.className=className;
+        resultsOfDetection.push_back(result);
+    }
+    return SUCCESS;
+}
+#endif
+ErrorCode DetectorYOLOV8::DoCommonInitialization(InitializationParameterOfDetector initializationParameterOfDetector)
+{
+    initializationParameter=initializationParameterOfDetector;
+    // 获取日志文件
+    logFile=LogManager::GetInstance()->GetLogFile(initializationParameter.logName);
+    // 加载配置文件
+    std::string configFilePath=initializationParameter.configFilePath;
+    if(!Exists(configFilePath))
+    {
+        LOG_ERROR(logFile, "no configuration file!\n");
+        return CONFIG_FILE_NOT_EXIST;
+    }
+    if(!configurationFile.open(configFilePath, FileStorage::READ))
+    {
+       LOG_ERROR(logFile, "fail to open configuration file\n");
+       return FAIL_TO_OPEN_CONFIG_FILE;
+    }
+    LOG_INFO(logFile, "succeed to open configuration file\n");
+    // 修改父路径
+    std::string &parentPath = initializationParameter.parentPath;
+    if (!parentPath.empty())
+    {
+        if(!IsPathSeparator(parentPath[parentPath.size() - 1]))
+        {
+           parentPath+=PATH_SEPARATOR;
+        }
+    }
+    return SUCCESS;
+}
+}
--- a/src/YOLOV8.cpp
+++ b/src/YOLOV8.cpp
+#include <Sample.h>
+#include <SimpleLog.h>
+#include <Filesystem.h>
+#include <DetectorYOLOV8.h>
+#include <sys/time.h>
+#include <Decoder.h>
+#include <Queuethread.h>
+using namespace cv;
+using namespace std;
+using namespace cv::dnn;
+using namespace migraphx;
+using namespace migraphxSamples;
+static void DecoderThreadFunc(Queue* queue)
+{
+    int ret, end = 0;
+    int frame_cnt = 0;
+    Queue* que = queue;
+    Decoder decoder(que->device);
+    InitializationParameterOfDecoder initParamOfDecoderYOLOV8;
+    #ifndef DMA
+    initParamOfDecoderYOLOV8.src_filename = "../Resource/Images/Mean.mp4";
+    if( que->device == _HW)
+    {
+	initParamOfDecoderYOLOV8.str_devid[0] = {0};
+	initParamOfDecoderYOLOV8.xcoder_params = "out=hw";
+	initParamOfDecoderYOLOV8.dec_name = "h264_ni_quadra_dec";
+	initParamOfDecoderYOLOV8.filters_descr = "ni_quadra_scale=640:640:format=bgrp,hwdownload,format=bgrp";
+    } else if (que->device == _HW_DMA) {
+        LOG_ERROR(stdout, "Error program param or cmake param, not USE_P2P can`t set '--dma'!\n");
+        que->finish();
+        return;
+    }
+    #else
+    if( que->device == _HW_DMA) {
+	initParamOfDecoderYOLOV8.str_devid[0] = {0};
+	initParamOfDecoderYOLOV8.xcoder_params = "out=hw";
+	initParamOfDecoderYOLOV8.dec_name = "h264_ni_quadra_dec";
+	initParamOfDecoderYOLOV8.filters_descr = "ni_quadra_scale=640:640:format=rgba:is_p2p=1";
+	initParamOfDecoderYOLOV8.src_filename = "../Resource/Images/cr_1920x1080.h264";
+    } else {
+        LOG_ERROR(stdout, "Error program param or cmake param, USE_P2P need set '--dma'!\n");
+        que->finish();
+        return;
+    }
+    #endif
+    ret = decoder.DecoderInit(initParamOfDecoderYOLOV8);
+    if (ret == -1)
+    {
+	que->finish();
+	return;
+    }
+    while(true)
+    {
+	if (av_read_frame(decoder.fmt_ctx, decoder.pkt) < 0)
+	{
+	    if(end == 2)
+	    {
+		que->DecodeEnd = true;
+		break;
+	    }
+	    end = 1;
+	}
+	if (decoder.pkt->stream_index == decoder.video_stream_idx) {
+	    if(!end) {
+		ret = avcodec_send_packet(decoder.video_dec_ctx, decoder.pkt);
+	    } else {
+		ret = avcodec_send_packet(decoder.video_dec_ctx, NULL);
+	    }
+	    if (ret < 0 && ret != AVERROR_EOF) {
+		fprintf(stderr, "Error submitting a packet for decoding\n");
+		que->DecodeEnd = true;
+		break;
+	    }
+	    while (ret >= 0 || end == 1)
+	    {
+		ret = avcodec_receive_frame(decoder.video_dec_ctx, decoder.frame);
+		if (ret == AVERROR(EAGAIN)) {
+		    break;
+		} else if (ret == AVERROR_EOF ) {
+		    end = 2;
+		    break;
+		} else if (ret < 0) {
+		    av_log(NULL, AV_LOG_ERROR, "Error while receiving a frame from the decoder\n");
+		    que->finish();
+		    return;
+		}
+		decoder.frame->pts = decoder.frame->best_effort_timestamp;
+		frame_cnt++;
+		if (que->device == CPU)
+		{
+		    cv::Mat srcImage = cv::Mat::zeros(decoder.frame->height*3/2, decoder.frame->width, CV_8UC1);
+		    memcpy(srcImage.data,						     (unsigned char*)decoder.frame->data[0], decoder.frame->width * decoder.frame->height);
+		    memcpy(srcImage.data + decoder.frame->width * decoder.frame->height,     (unsigned char*)decoder.frame->data[1], decoder.frame->width * decoder.frame->height/4);
+		    memcpy(srcImage.data + decoder.frame->width * decoder.frame->height*5/4, (unsigned char*)decoder.frame->data[2], decoder.frame->width * decoder.frame->height/4);
+		    cvtColor(srcImage, srcImage, COLOR_YUV420p2RGB);
+		    que->enQueue(srcImage);
+		}
+		if (que->device == _HW || que->device == _HW_DMA)
+		{
+		    if (av_buffersrc_add_frame_flags(decoder.buffersrc_ctx, decoder.frame, AV_BUFFERSRC_FLAG_KEEP_REF) < 0) {
+			av_log(NULL, AV_LOG_ERROR, "Error while feeding the filtergraph\n");
+			break;
+		    }
+		    while (1)
+		    {
+			ret = av_buffersink_get_frame(decoder.buffersink_ctx, decoder.filt_frame);
+			if (ret == AVERROR(EAGAIN))
+			{
+			    break;
+			}
+			else if(ret == AVERROR_EOF)
+			{
+			    end = 2;
+			    break;
+			}
+			if (ret < 0)
+			{
+			    que->finish();
+			    return;
+			}
+			#ifndef DMA
+			if (que->device == _HW)
+			{
+			    cv::Mat srcImage;
+			    switch (decoder.filt_frame->format)
+			    {
+				case AV_PIX_FMT_BGRP:
+				{
+				    srcImage = cv::Mat::zeros(decoder.filt_frame->height, decoder.filt_frame->width, CV_8UC3);
+				    cv::Mat mat_r = cv::Mat(decoder.filt_frame->height, decoder.filt_frame->width, CV_8UC1, (unsigned char*)decoder.filt_frame->data[0]);
+				    cv::Mat mat_g = cv::Mat(decoder.filt_frame->height, decoder.filt_frame->width, CV_8UC1, (unsigned char*)decoder.filt_frame->data[1]);
+				    cv::Mat mat_b = cv::Mat(decoder.filt_frame->height, decoder.filt_frame->width, CV_8UC1, (unsigned char*)decoder.filt_frame->data[2]);
+				    cv::Mat Channels[3]{mat_r, mat_g, mat_b};
+				    cv::merge(Channels, 3, srcImage);
+				    break;
+				}
+				case AV_PIX_FMT_YUV420P:
+				{
+				    srcImage = cv::Mat::zeros(decoder.filt_frame->height*3/2, decoder.filt_frame->width, CV_8UC1);
+				    memcpy(srcImage.data,							       (unsigned char*)decoder.filt_frame->data[0], decoder.filt_frame->width * decoder.filt_frame->height);
+				    memcpy(srcImage.data + decoder.filt_frame->width * decoder.filt_frame->height,     (unsigned char*)decoder.filt_frame->data[1], decoder.filt_frame->width * decoder.filt_frame->height/4);
+				    memcpy(srcImage.data + decoder.filt_frame->width * decoder.filt_frame->height*5/4, (unsigned char*)decoder.filt_frame->data[2], decoder.filt_frame->width * decoder.filt_frame->height/4);
+				    cvtColor(srcImage, srcImage, COLOR_YUV420p2RGB);
+				    break;
+				}
+				case AV_PIX_FMT_RGBA:
+				{
+				    srcImage = cv::Mat::zeros(decoder.filt_frame->height, decoder.filt_frame->width, CV_8UC4);
+				    memcpy(srcImage.data, (unsigned char*)decoder.filt_frame->data[0], decoder.filt_frame->width * decoder.filt_frame->height * 4);
+				    cvtColor(srcImage, srcImage, COLOR_BGRA2RGB);
+				    break;
+				}
+				default:
+				    break;
+			    }
+			    que->enQueue(srcImage);
+			    av_frame_unref(decoder.filt_frame);
+			}
+			#else
+			if (que->device == _HW_DMA)
+			{
+			    DCU_Frame dcu_frame;
+			    AVHWFramesContext *hwfc  = (AVHWFramesContext *)decoder.filt_frame->hw_frames_ctx->data;
+			    switch (hwfc->sw_format)
+			    {
+				case AV_PIX_FMT_BGRP:
+				{
+				    dcu_frame.format = AV_PIX_FMT_BGRP;
+				    dcu_frame.data_len = decoder.filt_frame->width * decoder.filt_frame->height * 3;
+				    dcu_frame.srcImage = cv::Mat::zeros(decoder.filt_frame->height, decoder.filt_frame->width, CV_8UC3);
+				    break;
+				}
+				case AV_PIX_FMT_YUV420P:
+				{
+				    dcu_frame.format = AV_PIX_FMT_YUV420P;
+				    dcu_frame.data_len = decoder.filt_frame->width * decoder.filt_frame->height * 3 / 2;
+				    dcu_frame.srcImage = cv::Mat::zeros(decoder.filt_frame->height*3/2, decoder.filt_frame->width, CV_8UC1);
+				    break;
+				}
+				case AV_PIX_FMT_RGBA:
+				{
+				    dcu_frame.format = AV_PIX_FMT_RGBA;
+				    dcu_frame.data_len = decoder.filt_frame->width * decoder.filt_frame->height * 4;
+				    dcu_frame.srcImage = cv::Mat::zeros(decoder.filt_frame->height, decoder.filt_frame->width, CV_8UC4);
+				    break;
+				}
+				default:
+				    break;
+			    }
+			    dcu_frame.width = decoder.filt_frame->width;
+			    dcu_frame.height = decoder.filt_frame->height;
+			    hipMalloc((void**)&(dcu_frame.dcu_data), dcu_frame.data_len * sizeof(unsigned char));
+			    ret = decoder.retrieve_filter_frame(dcu_frame, decoder.filt_frame);
+			    if (ret)
+				av_log(NULL, AV_LOG_ERROR, "Error while retrieve_filter_frame with p2p.\n");
+			    if(dcu_frame.format == AV_PIX_FMT_BGRP)
+			    {
+				cv::Mat mat_b = cv::Mat(decoder.filt_frame->height, decoder.filt_frame->width, CV_8UC1, (unsigned char*)dcu_frame.srcImage.data);
+				cv::Mat mat_g = cv::Mat(decoder.filt_frame->height, decoder.filt_frame->width, CV_8UC1, (unsigned char*)(dcu_frame.srcImage.data + decoder.filt_frame->height * decoder.filt_frame->width));
+				cv::Mat mat_r = cv::Mat(decoder.filt_frame->height, decoder.filt_frame->width, CV_8UC1, (unsigned char*)(dcu_frame.srcImage.data + decoder.filt_frame->height * decoder.filt_frame->width * 2));
+				cv::Mat Channels[3]{mat_r, mat_g, mat_b};
+				cv::merge(Channels, 3, dcu_frame.srcImage);
+			    }
+			    if(dcu_frame.format == AV_PIX_FMT_YUV420P)
+				cvtColor(dcu_frame.srcImage, dcu_frame.srcImage, COLOR_YUV420p2RGB);
+			    if(dcu_frame.format == AV_PIX_FMT_RGBA)
+				cvtColor(dcu_frame.srcImage, dcu_frame.srcImage, COLOR_BGRA2RGB);
+			    queue->enQueue(dcu_frame);
+			    av_frame_unref(decoder.filt_frame);
+			}
+			#endif
+                    }
+		}
+		av_frame_unref(decoder.frame);
+	    }
+	}
+	av_packet_unref(decoder.pkt);
+    }
+    LOG_INFO(stdout, "Decoder: ####### frame count: %d\n", frame_cnt);
+    que->finish();
+}
+static void DetectorThreadFunc(Queue* que)
+{
+    Queue* queue = que;
+    // DetectorYOLOV8 Init
+    DetectorYOLOV8 detector;
+    InitializationParameterOfDetector initParamOfDetectorYOLOV8;
+    initParamOfDetectorYOLOV8.parentPath = "";
+    initParamOfDetectorYOLOV8.configFilePath = CONFIG_FILE;
+    initParamOfDetectorYOLOV8.logName = "";
+    ErrorCode errorCode=detector.Initialize(initParamOfDetectorYOLOV8);
+    if(errorCode!=SUCCESS)
+    {
+	LOG_ERROR(stdout, "fail to initialize detector!\n");
+	exit(-1);
+    }
+    LOG_INFO(stdout, "succeed to initialize detector\n");
+    int frame_cnt = 0;
+    double start_time = getTickCount();
+    while (!queue->DecodeEnd) {
+	#ifdef DMA
+	DCU_Frame dcu_frame;
+	queue->deQueue(&dcu_frame);
+	if(dcu_frame.srcImage.empty()) {
+	    continue;
+	}
+	#else
+	cv::Mat InferImage;
+	queue->deQueue(&InferImage);
+	if (InferImage.empty()) {
+	    continue;
+	}
+	#endif
+	// detect
+	std::vector<ResultOfDetection> predictions;
+	double time1 = getTickCount();
+	#ifdef DMA
+	detector.Detect(dcu_frame, predictions);
+	#else
+	detector.Detect(InferImage, predictions);
+	#endif
+	double time2 = getTickCount();
+	double elapsedTime = (time2 - time1)*1000 / getTickFrequency();
+	LOG_INFO(stdout, "inference time:%f ms\n", elapsedTime);
+	frame_cnt++;
+	#ifdef DMA
+        hipFree(dcu_frame.dcu_data);
+        #endif
+	// process result
+	LOG_INFO(stdout,"////////////////Detection Results////////////////\n");
+	for( int i = 0; i < predictions.size(); ++i)
+	{
+	    ResultOfDetection result = predictions[i];
+	    #ifdef DMA
+	    cv::rectangle(dcu_frame.srcImage, result.boundingBox, Scalar(0,255,255),2);
+	    cv::putText(dcu_frame.srcImage, result.className, cv::Point(result.boundingBox.x, result.boundingBox.y-20), cv::FONT_HERSHEY_PLAIN, 2.0, Scalar(0, 0, 255), 2);
+	    #else
+	    cv::rectangle(InferImage, result.boundingBox, Scalar(0,255,255),2);
+	    cv::putText(InferImage, result.className, cv::Point(result.boundingBox.x, result.boundingBox.y-20), cv::FONT_HERSHEY_PLAIN, 2.0, Scalar(0, 0, 255), 2);
+	    #endif
+	    LOG_INFO(stdout,"box:%d %d %d %d,label:%d,confidence:%f\n",result.boundingBox.x,
+	    result.boundingBox.y,result.boundingBox.width,result.boundingBox.height,result.classID,result.confidence);
+	}
+	// X11 display can`t support in docker.
+	/*namedWindow("video", WINDOW_NORMAL | WINDOW_KEEPRATIO);
+	imshow("video", InferImage);
+	if (waitKey(10) == 'q') {
+	    break;
+	}*/
+    }
+    #ifdef DMA
+    hipFree(detector.preprocess_Image);
+    #endif
+    double end_time = getTickCount();
+    fprintf(stdout, "Finish ####### frame_cnt: %d, Inference fps: %.2f, all time: %.2f ms\n", frame_cnt, float(frame_cnt/((end_time - start_time)/getTickFrequency())), (end_time - start_time)/getTickFrequency()*1000);
+    queue->finish();
+}
+void Sample_DetectorYOLOV8(int device)
+{
+    Queue* queue = new Queue(1);
+    queue->device = device;
+    std::thread ThreadDecoder(DecoderThreadFunc, queue);
+    std::thread ThreadDetector(DetectorThreadFunc, queue);
+    ThreadDecoder.join();
+    ThreadDetector.join();
+    delete queue;
+    queue = NULL;
+    return;
+}
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -36,7 +36,7 @@ int main(int argc, char *argv[])
    }
    int opt, index;
    int device = -1;
-    const char* Nets[] = {"YOLOV3", "YOLOV5", "YOLOV7", "SSD", "RetinaFace"};
+    const char* Nets[] = {"YOLOV3", "YOLOV5", "YOLOV7", "SSD", "RetinaFace", "YOLOV8"};
    while ((opt = getopt_long(argc, argv, "chdn:", long_options, NULL)) != -1)
    {
        switch (opt)
@@ -91,6 +91,10 @@ int main(int argc, char *argv[])
            {
                Sample_DetectorRetinaFace(device);
            }
+         case 5:
+            {
+                Sample_DetectorYOLOV8(device);
+            }
            break;
    }