#include <DetectorYOLOV3.h>
#include <migraphx/onnx.hpp>
#include <migraphx/gpu/target.hpp>
#include <migraphx/gpu/hip.hpp>
#include <migraphx/generate.hpp>
#include <migraphx/quantization.hpp>
#include <opencv2/dnn.hpp>
#include <CommonUtility.h>
#include <Filesystem.h>
#include <SimpleLog.h>

#include <hip/hip_runtime.h>
#include "hip/hip_runtime.h"

using namespace cv::dnn;

namespace migraphxSamples
{

DetectorYOLOV3::DetectorYOLOV3():logFile(NULL)
{

}

DetectorYOLOV3::~DetectorYOLOV3()
{

    configurationFile.release();
    
}

ErrorCode DetectorYOLOV3::Initialize(InitializationParameterOfDetector initializationParameterOfDetector)
{
    // 初始化(获取日志文件,加载配置文件等)
    ErrorCode errorCode=DoCommonInitialization(initializationParameterOfDetector);
    if(errorCode!=SUCCESS)
    {
        LOG_ERROR(logFile,"fail to DoCommonInitialization\n");
        return errorCode;
    }
    LOG_INFO(logFile,"succeed to DoCommonInitialization\n");
    
    // 获取配置文件参数
    FileNode netNode = configurationFile["DetectorYOLOV3"];
    string modelPath=initializationParameter.parentPath+(string)netNode["ModelPath"];
    string pathOfClassNameFile=(string)netNode["ClassNameFile"];
    yolov3Parameter.confidenceThreshold = (float)netNode["ConfidenceThreshold"];
    yolov3Parameter.nmsThreshold = (float)netNode["NMSThreshold"];
    yolov3Parameter.objectThreshold = (float)netNode["ObjectThreshold"];
    yolov3Parameter.numberOfClasses=(int)netNode["NumberOfClasses"];
    useFP16=(bool)(int)netNode["UseFP16"];

    // 加载模型
    if(Exists(modelPath)==false)
    {
        LOG_ERROR(logFile,"%s not exist!\n",modelPath.c_str());
        return MODEL_NOT_EXIST;
    }
    net = migraphx::parse_onnx(modelPath);
    LOG_INFO(logFile,"succeed to load model: %s\n",GetFileName(modelPath).c_str());

    // 获取模型输入属性
    std::pair<std::string, migraphx::shape> inputAttribute=*(net.get_parameter_shapes().begin());
    inputName=inputAttribute.first;
    inputShape=inputAttribute.second;
    inputSize=cv::Size(inputShape.lens()[3],inputShape.lens()[2]);// NCHW

    // 设置模型为GPU模式
    migraphx::target gpuTarget = migraphx::gpu::target{};

    // 量化    
    if(useFP16)
    {
        migraphx::quantize_fp16(net);
    }

    // 编译模型
    migraphx::compile_options options;
    options.device_id=0; // 设置GPU设备，默认为0号设备(>=1.2版本中支持)
    #ifdef DMA
    options.offload_copy=false;
    #else
    options.offload_copy=true; // 设置offload_copy
    #endif
    net.compile(gpuTarget,options);
    LOG_INFO(logFile,"succeed to compile model: %s\n",GetFileName(modelPath).c_str());

    // Run once by itself
    #ifdef DMA
    ParameterMap=CreateParameterMap(net);
    net.eval(ParameterMap);
    #else
    migraphx::parameter_map inputData;
    inputData[inputName]=migraphx::generate_argument(inputShape);
    net.eval(inputData);
    #endif

    // 读取类别名
    if(!pathOfClassNameFile.empty())
    {
        ifstream classNameFile(pathOfClassNameFile);
        string line;
        while (getline(classNameFile, line))
        {
            classNames.push_back(line);
        }
    }
    else
    {
        classNames.resize(yolov3Parameter.numberOfClasses);
    }

    #ifdef DMA
    hipMalloc((void**)&preprocess_Image, inputSize.height * inputSize.width * 3 * sizeof(float));
    #endif
    // log
    LOG_INFO(logFile,"InputSize:%dx%d\n",inputSize.width,inputSize.height);
    LOG_INFO(logFile,"InputName:%s\n",inputName.c_str());
    LOG_INFO(logFile,"ConfidenceThreshold:%f\n",yolov3Parameter.confidenceThreshold);
    LOG_INFO(logFile,"NMSThreshold:%f\n",yolov3Parameter.nmsThreshold);
    LOG_INFO(logFile,"objectThreshold:%f\n",yolov3Parameter.objectThreshold);
    LOG_INFO(logFile,"NumberOfClasses:%d\n",yolov3Parameter.numberOfClasses);

    return SUCCESS;

}

#ifdef DMA
__global__ void convert_bgrp_to_rgb_and_normalization_yolov3(unsigned char* srcImage, float* outImage, int width, int height)
{
    int x = blockIdx.x * blockDim.x + threadIdx.x;
    if (x < width * height)
    {
        unsigned char r = srcImage[x + width * height * 2];
        unsigned char g = srcImage[x + width * height * 1];
        unsigned char b = srcImage[x + width * height * 0];
        float sum = 255.0;
        outImage[x + width * height * 0] = r / sum;
        outImage[x + width * height * 1] = g / sum;
        outImage[x + width * height * 2] = b / sum;
    }
}
__global__ void convert_yuv420p_to_rgb_and_normalization_yolov3(unsigned char* srcImage, float* outImage, int width, int height)
{
    int x = blockIdx.x * blockDim.x + threadIdx.x;
    int y = blockIdx.y * blockDim.y + threadIdx.y;
    if (x >= width || y >= height)
        return;

    int index = y * width + x;
    int yIndex = index;
    int uIndex = (y / 2) * (width / 2) + (x / 2) + width * height;
    int vIndex = (y / 2) * (width / 2) + (x / 2) + width * height * 5 / 4;

    unsigned char yValue = srcImage[yIndex];
    unsigned char uValue = srcImage[uIndex];
    unsigned char vValue = srcImage[vIndex];

    int r = yValue + 1.370705 * (vValue - 128);
    int g = yValue - 0.698001 * (vValue - 128) - 0.337633 * (uValue - 128);
    int b = yValue + 1.732446 * (uValue - 128);

    outImage[x + width * height * 0] = (float)r;
    outImage[x + width * height * 1] = (float)g;
    outImage[x + width * height * 2] = (float)b;
}
__global__ void convert_rgba_to_rgb_and_normalization_yolov3(unsigned char* srcImage, float* outImage, int width, int height)
{
    int x = blockIdx.x * blockDim.x + threadIdx.x;
    if (x < width * height)
    {
        unsigned char r = srcImage[x * 4 + 0];
        unsigned char g = srcImage[x * 4 + 1];
        unsigned char b = srcImage[x * 4 + 2];
        float sum = 255.0;
        outImage[x + width * height * 0] = r / sum;
        outImage[x + width * height * 1] = g / sum;
        outImage[x + width * height * 2] = b / sum;
    }
}
#endif

ErrorCode DetectorYOLOV3::Detect(const cv::Mat &srcImage, std::vector<ResultOfDetection> &resultsOfDetection, double* dprep_time, double* deval_time, double* dpostp_time)
{
    if(srcImage.empty()||srcImage.type()!=CV_8UC3)
    {
        LOG_ERROR(logFile, "image error!\n");
        return IMAGE_ERROR;
    }

   // 预处理并转换为NCHW
    double prep_time0 = getTickCount();
    cv::Mat inputBlob;
    blobFromImage(srcImage,
                    inputBlob,
                    1 / 255.0,
                    inputSize,
                    Scalar(0, 0, 0),
                    true,
                    false);
    // 输入数据
    migraphx::parameter_map inputData;
    inputData[inputName]= migraphx::argument{inputShape, (float*)inputBlob.data};
    double prep_time1 = getTickCount();

    // 推理
    std::vector<migraphx::argument> inferenceResults;
    double eval_time0 = getTickCount();
    inferenceResults = net.eval(inputData);
    double eval_time1 = getTickCount();

    // 获取推理结果
    double postp_time0 = getTickCount();
    std::vector<cv::Mat> outs;
    migraphx::argument result = inferenceResults[0]; 

    // 转换为cv::Mat
    migraphx::shape outputShape = result.get_shape();
    int shape[]={static_cast<int>(outputShape.lens()[0]),static_cast<int>(outputShape.lens()[1]),static_cast<int>(outputShape.lens()[2])};
    cv::Mat out(3,shape,CV_32F,(unsigned char*)result.data());
    outs.push_back(out);

    //获取先验框的个数
    int numProposal = outs[0].size[1];
    int numOut = outs[0].size[2];
    //变换输出的维度
    outs[0] = outs[0].reshape(0, numProposal);

    //生成先验框
    std::vector<float> confidences;
    std::vector<cv::Rect> boxes;
    std::vector<int> classIds;
    float ratioh = (float)srcImage.rows / inputSize.height, ratiow = (float)srcImage.cols / inputSize.width;

    //计算cx,cy,w,h,box_sore,class_sore
    int n = 0, rowInd = 0;
    float* pdata = (float*)outs[0].data;
    for (n = 0; n < numProposal; n++)
    {
        float boxScores = pdata[4];
        if (boxScores > yolov3Parameter.objectThreshold)
        {
            cv::Mat scores = outs[0].row(rowInd).colRange(5, numOut);
            cv::Point classIdPoint;
            double maxClassScore;
            cv::minMaxLoc(scores, 0, &maxClassScore, 0, &classIdPoint);
            maxClassScore *= boxScores;
            if (maxClassScore > yolov3Parameter.confidenceThreshold)
            {
                const int classIdx = classIdPoint.x;
                float cx = pdata[0] * ratiow;
                float cy = pdata[1] * ratioh;
                float w = pdata[2] * ratiow;
                float h = pdata[3] * ratioh;

                int left = int(cx - 0.5 * w);
                int top = int(cy - 0.5 * h);

                confidences.push_back((float)maxClassScore);
                boxes.push_back(cv::Rect(left, top, (int)(w), (int)(h)));
                classIds.push_back(classIdx);
            }
        }
        rowInd++;
        pdata += numOut;
    }

    //执行non maximum suppression消除冗余重叠boxes
    std::vector<int> indices;
    dnn::NMSBoxes(boxes, confidences, yolov3Parameter.confidenceThreshold, yolov3Parameter.nmsThreshold, indices);
    for (size_t i = 0; i < indices.size(); ++i)
    {
        int idx = indices[i];
        int classID=classIds[idx];
        string className=classNames[classID];
        float confidence=confidences[idx];
        cv::Rect box = boxes[idx];

        ResultOfDetection result;
        result.boundingBox=box;
        result.confidence=confidence;// confidence
        result.classID=classID; // label
        result.className=className;
        resultsOfDetection.push_back(result);
    }
    double postp_time1 = getTickCount();
    *dprep_time += (prep_time1 - prep_time0) *1000 / getTickFrequency();
    *deval_time += (eval_time1 - eval_time0) *1000 / getTickFrequency();
    *dpostp_time += (postp_time1 - postp_time0) *1000 / getTickFrequency();

    return SUCCESS;
}

#ifdef DMA
ErrorCode DetectorYOLOV3::Detect(DCU_Frame &srcImage, std::vector<ResultOfDetection> &resultsOfDetection)
{
    int block_size = 256;
    int num_blocks = (srcImage.width * srcImage.height + block_size - 1) / block_size;
    if(srcImage.format == AV_PIX_FMT_BGRP)
    {
        convert_bgrp_to_rgb_and_normalization_yolov3<<<num_blocks, block_size>>>(srcImage.dcu_data, preprocess_Image, srcImage.width, srcImage.height);
    }
    if(srcImage.format == AV_PIX_FMT_YUV420P)
    {
        convert_yuv420p_to_rgb_and_normalization_yolov3<<<num_blocks, block_size>>>(srcImage.dcu_data, preprocess_Image, srcImage.width, srcImage.height);
    }
    if(srcImage.format == AV_PIX_FMT_RGBA)
    {
        convert_rgba_to_rgb_and_normalization_yolov3<<<num_blocks, block_size>>>(srcImage.dcu_data, preprocess_Image, srcImage.width, srcImage.height);
    }

    // 输入数据
    ParameterMap[inputName] = migraphx::argument{inputShape, preprocess_Image};

    // 推理
    std::vector<migraphx::argument> inferenceResults = net.eval(ParameterMap);

    // 获取推理结果
    std::vector<cv::Mat> outs;
    migraphx::argument result = inferenceResults[0];

    // 转换为cv::Mat
    migraphx::shape outputShape = result.get_shape();
    int shape[]={static_cast<int>(outputShape.lens()[0]), static_cast<int>(outputShape.lens()[1]), static_cast<int>(outputShape.lens()[2])};
    cv::Mat out(3, shape, CV_32F, (unsigned char*)result.data());
    outs.push_back(out);

    //获取先验框的个数
    int numProposal = outs[0].size[1];
    int numOut = outs[0].size[2];
    //变换输出的维度
    outs[0] = outs[0].reshape(0, numProposal);

    //生成先验框
    std::vector<float> confidences;
    std::vector<cv::Rect> boxes;
    std::vector<int> classIds;
    float ratioh = (float)srcImage.height / inputSize.height, ratiow = (float)srcImage.width / inputSize.width;

    int n = 0, rowInd = 0;
    float* pdata = (float*)outs[0].data;
    for (n = 0; n < numProposal; n++)
    {
        float boxScores = pdata[4];
        if (boxScores > yolov3Parameter.objectThreshold)
        {
            cv::Mat scores = outs[0].row(rowInd).colRange(5, numOut);
            cv::Point classIdPoint;
            double maxClassScore;
            cv::minMaxLoc(scores, 0, &maxClassScore, 0, &classIdPoint);
            maxClassScore *= boxScores;
            if (maxClassScore > yolov3Parameter.confidenceThreshold)
            {
                const int classIdx = classIdPoint.x;
                float cx = pdata[0] * ratiow;
                float cy = pdata[1] * ratioh;
                float w = pdata[2] * ratiow;
                float h = pdata[3] * ratioh;

                int left = int(cx - 0.5 * w);
                int top = int(cy - 0.5 * h);

                confidences.push_back((float)maxClassScore);
                boxes.push_back(cv::Rect(left, top, (int)(w), (int)(h)));
                classIds.push_back(classIdx);
            }
        }
        rowInd++;
        pdata += numOut;
    }

    std::vector<int> indices;
    dnn::NMSBoxes(boxes, confidences, yolov3Parameter.confidenceThreshold, yolov3Parameter.nmsThreshold, indices);
    for (size_t i = 0; i < indices.size(); ++i)
    {
        int idx = indices[i];
        int classID=classIds[idx];
        string className=classNames[classID];
        float confidence=confidences[idx];
        cv::Rect box = boxes[idx];

        ResultOfDetection result;
        result.boundingBox=box;
        result.confidence=confidence;
        result.classID=classID;
        result.className=className;
        resultsOfDetection.push_back(result);
    }

    return SUCCESS;
}
#endif

ErrorCode DetectorYOLOV3::DoCommonInitialization(InitializationParameterOfDetector initializationParameterOfDetector)
{
    initializationParameter=initializationParameterOfDetector;

    // 获取日志文件
    logFile=LogManager::GetInstance()->GetLogFile(initializationParameter.logName);

    // 加载配置文件
    std::string configFilePath=initializationParameter.configFilePath;
    if(!Exists(configFilePath))
    {
        LOG_ERROR(logFile, "no configuration file!\n");
        return CONFIG_FILE_NOT_EXIST;
    }
    if(!configurationFile.open(configFilePath, FileStorage::READ))
    {
       LOG_ERROR(logFile, "fail to open configuration file\n");
       return FAIL_TO_OPEN_CONFIG_FILE;
    }
    LOG_INFO(logFile, "succeed to open configuration file\n");

    // 修改父路径
    std::string &parentPath = initializationParameter.parentPath;
    if (!parentPath.empty())
    {
        if(!IsPathSeparator(parentPath[parentPath.size() - 1]))
        {
           parentPath+=PATH_SEPARATOR;
        }
    }

    return SUCCESS;

}

}