#include #include #include #include #include #include #include #include #include #include #include #include "hip/hip_runtime.h" using namespace cv::dnn; namespace migraphxSamples { #define SSD_QUANT_BASE 4096 // 基数 #define SSD_COORDI_NUM 4 // 坐标个数(x1,y1,x2,y2) #define SSD_PROPOSAL_WIDTH 6 #define SSD_HALF 0.5 #define SSD_ASPECT_RATIO_NUM 6 // 默认最大的宽高比个数 #define SSD_MAX(a,b) (((a) > (b)) ? (a) : (b)) #define SSD_MIN(a,b) (((a) < (b)) ? (a) : (b)) // 16字节对齐 #define SSD_ALIGN_16 16 #define SSD_ALIGN16(number) ((number + SSD_ALIGN_16-1) / SSD_ALIGN_16*SSD_ALIGN_16) DetectorRetinaFace::DetectorRetinaFace():logFile(NULL) { } DetectorRetinaFace::~DetectorRetinaFace() { configurationFile.release(); // 释放SSD参数的内存空间 delete[] ssdParameter.buffer; } ErrorCode DetectorRetinaFace::Initialize(InitializationParameterOfDetector initializationParameterOfDetector) { // 初始化(获取日志文件,加载配置文件等) ErrorCode errorCode=DoCommonInitialization(initializationParameterOfDetector); if(errorCode!=SUCCESS) { LOG_ERROR(logFile,"fail to DoCommonInitialization\n"); return errorCode; } LOG_INFO(logFile,"succeed to DoCommonInitialization\n"); // 获取配置文件参数 FileNode netNode = configurationFile["DetectorRetinaFace"]; string modelPath=initializationParameter.parentPath+(string)netNode["ModelPath"]; scale=(float)netNode["Scale"]; meanValue.val[0]=(float)netNode["MeanValue1"]; meanValue.val[1]=(float)netNode["MeanValue2"]; meanValue.val[2]=(float)netNode["MeanValue3"]; swapRB=(bool)(int)netNode["SwapRB"]; crop=(bool)(int)netNode["Crop"]; useInt8=(bool)(int)netNode["UseInt8"]; useFP16=(bool)(int)netNode["UseFP16"]; // 加载模型 if(Exists(modelPath)==false) { LOG_ERROR(logFile,"%s not exist!\n",modelPath.c_str()); return MODEL_NOT_EXIST; } net = migraphx::parse_onnx(modelPath); LOG_INFO(logFile,"succeed to load model: %s\n",GetFileName(modelPath).c_str()); // 获取模型输入属性 std::pair inputAttribute=*(net.get_parameter_shapes().begin()); inputName=inputAttribute.first; inputShape=inputAttribute.second; inputSize=Size(inputShape.lens()[3],inputShape.lens()[2]);// NCHW // 设置模型为GPU模式 migraphx::target gpuTarget = migraphx::gpu::target{}; // 量化 if(useInt8) { // 创建量化校准数据,建议使用测试集中的多张典型图像 cv::Mat srcImage=imread("../Resource/Images/FaceDetect_2.jpg",1); std::vector srcImages; for(int i=0;i calibrationData = {inputData}; // INT8量化 migraphx::quantize_int8(net, gpuTarget, calibrationData); } if(useFP16) { migraphx::quantize_fp16(net); } // 编译模型 migraphx::compile_options options; options.device_id=0; // 设置GPU设备,默认为0号设备(>=1.2版本中支持) #ifdef DMA options.offload_copy=false; // 设置offload_copy #else options.offload_copy=true; // 设置offload_copy #endif net.compile(gpuTarget,options); LOG_INFO(logFile,"succeed to compile model: %s\n",GetFileName(modelPath).c_str()); // Run once by itself #ifdef DMA ParameterMap=CreateParameterMap(net); net.eval(ParameterMap); #else migraphx::parameter_map inputData; inputData[inputName]=migraphx::generate_argument(inputShape); net.eval(inputData); #endif #ifdef DMA hipMalloc((void**)&preprocess_Image, inputSize.height * inputSize.width * 3 * sizeof(float)); #endif // log LOG_INFO(logFile,"InputSize:%dx%d\n",inputSize.width,inputSize.height); LOG_INFO(logFile,"InputName:%s\n",inputName.c_str()); LOG_INFO(logFile,"Scale:%.6f\n",scale); LOG_INFO(logFile,"Mean:%.2f,%.2f,%.2f\n",meanValue.val[0],meanValue.val[1],meanValue.val[2]); LOG_INFO(logFile,"SwapRB:%d\n",(int)swapRB); LOG_INFO(logFile,"Crop:%d\n",(int)crop); LOG_INFO(logFile,"UseInt8:%d\n",(int)useInt8); LOG_INFO(logFile,"UseFP16:%d\n",(int)useFP16); // 读取SSD 参数 GetSSDParameter(); return SUCCESS; } ErrorCode DetectorRetinaFace::DoCommonInitialization(InitializationParameterOfDetector initializationParameterOfDetector) { initializationParameter=initializationParameterOfDetector; // 获取日志文件 logFile=LogManager::GetInstance()->GetLogFile(initializationParameter.logName); // 加载配置文件 std::string configFilePath=initializationParameter.configFilePath; if(!Exists(configFilePath)) { LOG_ERROR(logFile, "no configuration file!\n"); return CONFIG_FILE_NOT_EXIST; } if(!configurationFile.open(configFilePath, FileStorage::READ)) { LOG_ERROR(logFile, "fail to open configuration file\n"); return FAIL_TO_OPEN_CONFIG_FILE; } LOG_INFO(logFile, "succeed to open configuration file\n"); // 修改父路径 std::string &parentPath = initializationParameter.parentPath; if (!parentPath.empty()) { if(!IsPathSeparator(parentPath[parentPath.size() - 1])) { parentPath+=PATH_SEPARATOR; } } return SUCCESS; } #ifdef DMA __global__ void convert_bgrp_to_rgb_and_normalization_retinaface(unsigned char* srcImage, float* outImage, int width, int height) { int x = blockIdx.x * blockDim.x + threadIdx.x; if (x < width * height) { unsigned char r = srcImage[x + width * height * 2]; unsigned char g = srcImage[x + width * height * 1]; unsigned char b = srcImage[x + width * height * 0]; float sum = 255.0; outImage[x + width * height * 0] = r / sum; outImage[x + width * height * 1] = g / sum; outImage[x + width * height * 2] = b / sum; } } __global__ void convert_yuv420p_to_rgb_and_normalization_retinaface(unsigned char* srcImage, float* outImage, int width, int height) { int x = blockIdx.x * blockDim.x + threadIdx.x; int y = blockIdx.y * blockDim.y + threadIdx.y; if (x >= width || y >= height) return; int index = y * width + x; int yIndex = index; int uIndex = (y / 2) * (width / 2) + (x / 2) + width * height; int vIndex = (y / 2) * (width / 2) + (x / 2) + width * height * 5 / 4; unsigned char yValue = srcImage[yIndex]; unsigned char uValue = srcImage[uIndex]; unsigned char vValue = srcImage[vIndex]; int r = yValue + 1.370705 * (vValue - 128); int g = yValue - 0.698001 * (vValue - 128) - 0.337633 * (uValue - 128); int b = yValue + 1.732446 * (uValue - 128); outImage[x + width * height * 0] = (float)r; outImage[x + width * height * 1] = (float)g; outImage[x + width * height * 2] = (float)b; } __global__ void convert_rgba_to_rgb_and_normalization_retinaface(unsigned char* srcImage, float* outImage, int width, int height) { int x = blockIdx.x * blockDim.x + threadIdx.x; if (x < width * height) { unsigned char r = srcImage[x * 4 + 0]; unsigned char g = srcImage[x * 4 + 1]; unsigned char b = srcImage[x * 4 + 2]; float sum = 1.0; outImage[x + width * height * 0] = (r - 123) / sum; outImage[x + width * height * 1] = (g - 117) / sum; outImage[x + width * height * 2] = (b - 104) / sum; } } #endif ErrorCode DetectorRetinaFace::Detect(const cv::Mat &srcImage,std::vector &resultsOfDetection) { if(srcImage.empty()||srcImage.type()!=CV_8UC3) { LOG_ERROR(logFile, "image error!\n"); return IMAGE_ERROR; } // 预处理并转换为NCHW cv::Mat inputBlob; blobFromImage(srcImage, inputBlob, scale, inputSize, meanValue, swapRB, false); // 输入数据 migraphx::parameter_map inputData; inputData[inputName]= migraphx::argument{inputShape, (float*)inputBlob.data}; // 推理 std::vector inferenceResults=net.eval(inputData); vector> regressions; vector> classifications; for(int i=0;i regression; migraphx::argument result0 = inferenceResults[2*i]; result0.visit([&](auto output) { regression.assign(output.begin(), output.end()); }); regression=PermuteLayer(regression,ssdParameter.priorBoxWidth[i],ssdParameter.priorBoxHeight[i],numberOfPriorBox*4); regressions.push_back(regression); // ClassHead std::vector classification; migraphx::argument result1 = inferenceResults[2*i+1]; result1.visit([&](auto output) { classification.assign(output.begin(), output.end()); }); classification=PermuteLayer(classification,ssdParameter.priorBoxWidth[i],ssdParameter.priorBoxHeight[i],numberOfPriorBox*ssdParameter.classNum); classifications.push_back(classification); } // 对推理结果进行处理,得到最后SSD检测的结果 GetResult(classifications,regressions,resultsOfDetection); // 转换到原图坐标 for(int i=0;i &resultsOfDetection) { int block_size = 256; int num_blocks = (srcImage.width * srcImage.height + block_size - 1) / block_size; if(srcImage.format == AV_PIX_FMT_BGRP) { convert_bgrp_to_rgb_and_normalization_retinaface<<>>(srcImage.dcu_data, preprocess_Image, srcImage.width, srcImage.height); } if(srcImage.format == AV_PIX_FMT_YUV420P) { convert_yuv420p_to_rgb_and_normalization_retinaface<<>>(srcImage.dcu_data, preprocess_Image, srcImage.width, srcImage.height); } if(srcImage.format == AV_PIX_FMT_RGBA) { convert_rgba_to_rgb_and_normalization_retinaface<<>>(srcImage.dcu_data, preprocess_Image, srcImage.width, srcImage.height); } // 输入数据 ParameterMap[inputName] = migraphx::argument{inputShape, preprocess_Image}; // 推理 std::vector inferenceResults = net.eval(ParameterMap); vector> regressions; vector> classifications; for(int i=0;i regression; migraphx::argument result0 = inferenceResults[2*i]; result0.visit([&](auto output) { regression.assign(output.begin(), output.end()); }); regression=PermuteLayer(regression,ssdParameter.priorBoxWidth[i],ssdParameter.priorBoxHeight[i],numberOfPriorBox*4); regressions.push_back(regression); // ClassHead std::vector classification; migraphx::argument result1 = inferenceResults[2*i+1]; result1.visit([&](auto output) { classification.assign(output.begin(), output.end()); }); classification=PermuteLayer(classification,ssdParameter.priorBoxWidth[i],ssdParameter.priorBoxHeight[i],numberOfPriorBox*ssdParameter.classNum); classifications.push_back(classification); } // 对推理结果进行处理,得到最后SSD检测的结果 GetResult(classifications,regressions,resultsOfDetection); // 转换到原图坐标 for(int i=0;i> &classifications,const vector> ®ressions,vector &resultsOfDetection) { int numberOfPriorBoxLayer=ssdParameter.numberOfPriorBoxLayer; // 类型转换 for(int i = 0; i < numberOfPriorBoxLayer; i++) { // 分类 vector classificationOfEachLayer=classifications[i]; for(int j=0;j regressionOfEachLayer=regressions[i]; for(int j=0;j priorBoxMinSize=ssdParameter.priorBoxMinSize[indexOfLayer]; int minSizeNum=ssdParameter.minSizeNum[indexOfLayer]; vector priorBoxMaxSize=ssdParameter.priorBoxMaxSize[indexOfLayer]; int maxSizeNum=ssdParameter.maxSizeNum[indexOfLayer]; int flip=ssdParameter.flip[indexOfLayer]; int clip=ssdParameter.clip[indexOfLayer]; int inputAspectRatioNum=ssdParameter.inputAspectRatioNum[indexOfLayer]; vector priorBoxAspectRatio=ssdParameter.priorBoxAspectRatio[indexOfLayer]; float priorBoxStepWidth=ssdParameter.priorBoxStepWidth[indexOfLayer]; float priorBoxStepHeight= ssdParameter.priorBoxStepHeight[indexOfLayer]; float offset=ssdParameter.offset; int *priorBoxVar=ssdParameter.priorBoxVar; int aspectRatioNum = 0; int index = 0; float aspectRatio[SSD_ASPECT_RATIO_NUM] = { 0 }; int numPrior = 0; float centerX = 0; float centerY = 0; float boxHeight = 0; float boxWidth = 0; float maxBoxWidth = 0; int i = 0; int j = 0; int n = 0; int h = 0; int w = 0; aspectRatioNum = 0; aspectRatio[0] = 1; aspectRatioNum++; for (i = 0; i < inputAspectRatioNum; i++) { aspectRatio[aspectRatioNum++] = priorBoxAspectRatio[i]; if (flip) { aspectRatio[aspectRatioNum++] = 1.0f / priorBoxAspectRatio[i]; } } numPrior = minSizeNum * aspectRatioNum + maxSizeNum; index = 0; for (h = 0; h < priorBoxHeight; h++) { for (w = 0; w < priorBoxWidth; w++) { centerX = (w + offset) * priorBoxStepWidth; centerY = (h + offset) * priorBoxStepHeight; for (n = 0; n < minSizeNum; n++) { // 首先产生宽高比为1的priorbox boxHeight = priorBoxMinSize[n]; boxWidth = priorBoxMinSize[n]; priorboxOutputData[index++] = (int)(centerX - boxWidth * SSD_HALF); priorboxOutputData[index++] = (int)(centerY - boxHeight * SSD_HALF); priorboxOutputData[index++] = (int)(centerX + boxWidth * SSD_HALF); priorboxOutputData[index++] = (int)(centerY + boxHeight * SSD_HALF); // 对于max_size,生成宽高比为1的priorbox,宽高为sqrt(min_size * max_size) if(maxSizeNum>0) { maxBoxWidth = sqrt(priorBoxMinSize[n] * priorBoxMaxSize[n]); boxHeight = maxBoxWidth; boxWidth = maxBoxWidth; priorboxOutputData[index++] = (int)(centerX - boxWidth * SSD_HALF); priorboxOutputData[index++] = (int)(centerY - boxHeight * SSD_HALF); priorboxOutputData[index++] = (int)(centerX + boxWidth * SSD_HALF); priorboxOutputData[index++] = (int)(centerY + boxHeight * SSD_HALF); } // 剩下的priorbox for (i = 1; i < aspectRatioNum; i++) { boxWidth = (float)(priorBoxMinSize[n] * sqrt( aspectRatio[i] )); boxHeight = (float)(priorBoxMinSize[n]/sqrt( aspectRatio[i] )); priorboxOutputData[index++] = (int)(centerX - boxWidth * SSD_HALF); priorboxOutputData[index++] = (int)(centerY - boxHeight * SSD_HALF); priorboxOutputData[index++] = (int)(centerX + boxWidth * SSD_HALF); priorboxOutputData[index++] = (int)(centerY + boxHeight * SSD_HALF); } } } } // 越界处理 [0, srcImageWidth] & [0, srcImageHeight] if (clip) { for (i = 0; i < (int)(priorBoxWidth * priorBoxHeight * SSD_COORDI_NUM*numPrior / 2); i++) { priorboxOutputData[2 * i] = SSD_MIN((int)SSD_MAX(priorboxOutputData[2 * i], 0), srcImageWidth); priorboxOutputData[2 * i + 1] = SSD_MIN((int)SSD_MAX(priorboxOutputData[2 * i + 1], 0), srcImageHeight); } } // var for (h = 0; h < priorBoxHeight; h++) { for (w = 0; w < priorBoxWidth; w++) { for (i = 0; i < numPrior; i++) { for (j = 0; j < SSD_COORDI_NUM; j++) { priorboxOutputData[index++] = (int)priorBoxVar[j]; } } } } } void DetectorRetinaFace::SoftmaxLayer(int softMaxWidth[],int* softMaxInputData[], int* softMaxOutputData) { // 参数赋值 int softMaxInHeight=ssdParameter.softMaxInHeight; int *softMaxInChn=ssdParameter.softMaxInChn; int concatNum=ssdParameter.concatNum; int *convStride=ssdParameter.convStride; int* inputData = NULL; int* outputTmp = NULL; int outerNum = 0; int innerNum = 0; int inputChannel = 0; int i = 0; int concatCnt = 0; int stride = 0; int skip = 0; int left = 0; outputTmp = softMaxOutputData; for (concatCnt = 0; concatCnt < concatNum; concatCnt++) { inputData = softMaxInputData[concatCnt]; stride = convStride[concatCnt]; inputChannel = softMaxInChn[concatCnt]; outerNum = inputChannel / softMaxInHeight; innerNum = softMaxInHeight; skip = softMaxWidth[concatCnt] / innerNum; left = stride - softMaxWidth[concatCnt]; for (i = 0; i < outerNum; i++) { ComputeSoftMax(inputData, (int)innerNum,outputTmp); inputData += innerNum; outputTmp += innerNum; } } } void DetectorRetinaFace::ComputeSoftMax(int* src, int size, int* dst) { int max = 0; int sum = 0; int i = 0; for (i = 0; i < size; ++i) { if (max < src[i]) { max = src[i]; } } for (i = 0; i < size; ++i) { dst[i] = (int)(SSD_QUANT_BASE* exp((float)(src[i] - max) / SSD_QUANT_BASE)); sum += dst[i]; } for (i = 0; i < size; ++i) { dst[i] = (int)(((float)dst[i] / (float)sum) * SSD_QUANT_BASE); } } void DetectorRetinaFace::DetectionOutputLayer(int* allLocPreds[], int* allPriorBoxes[],int* confScores, int* assistMemPool) { // 参数赋值 int concatNum=ssdParameter.concatNum; int confThresh=ssdParameter.confThresh; int classNum=ssdParameter.classNum; int topK=ssdParameter.topK; int keepTopK=ssdParameter.keepTopK; int NMSThresh=ssdParameter.NMSThresh; int *detectInputChn=ssdParameter.detectInputChn; int* dstScoreSrc=ssdParameter.dstScore; int* dstBboxSrc=ssdParameter.dstRoi; int* roiOutCntSrc=ssdParameter.classRoiNum; int* locPreds = NULL; int* priorBoxes = NULL; int* priorVar = NULL; int* allDecodeBoxes = NULL; int* dstScore = NULL; int* dstBbox = NULL; int* classRoiNum = NULL; int roiOutCnt = 0; int* singleProposal = NULL; int* afterTopK = NULL; QuickSortStack* stack = NULL; int priorNum = 0; int numPredsPerClass = 0; float priorWidth = 0; float priorHeight = 0; float priorCenterX = 0; float priorCenterY = 0; float decodeBoxCenterX = 0; float decodeBoxCenterY = 0; float decodeBoxWidth = 0; float decodeBoxHeight = 0; int srcIdx = 0; int afterFilter = 0; int afterTopK2 = 0; int keepCnt = 0; int i = 0; int j = 0; int offset = 0; priorNum = 0; for (i = 0; i < concatNum; i++) { priorNum += detectInputChn[i] / SSD_COORDI_NUM; } // 缓存 allDecodeBoxes = assistMemPool; singleProposal = allDecodeBoxes + priorNum * SSD_COORDI_NUM; afterTopK = singleProposal + SSD_PROPOSAL_WIDTH * priorNum; stack = (QuickSortStack*)(afterTopK + priorNum * SSD_PROPOSAL_WIDTH); srcIdx = 0; for (i = 0; i < concatNum; i++) { // 回归预测值 locPreds = allLocPreds[i]; numPredsPerClass = detectInputChn[i] / SSD_COORDI_NUM; // 获取priorbox priorBoxes = allPriorBoxes[i]; priorVar = priorBoxes + numPredsPerClass*SSD_COORDI_NUM; for (j = 0; j < numPredsPerClass; j++) { priorWidth = (float)(priorBoxes[j*SSD_COORDI_NUM+2] - priorBoxes[j*SSD_COORDI_NUM]); priorHeight = (float)(priorBoxes[j*SSD_COORDI_NUM+3] - priorBoxes[j*SSD_COORDI_NUM + 1]); priorCenterX = (priorBoxes[j*SSD_COORDI_NUM+2] + priorBoxes[j*SSD_COORDI_NUM])*SSD_HALF; priorCenterY = (priorBoxes[j*SSD_COORDI_NUM+3] + priorBoxes[j*SSD_COORDI_NUM+1])*SSD_HALF; decodeBoxCenterX = ((float)priorVar[j*SSD_COORDI_NUM]/SSD_QUANT_BASE)* ((float)locPreds[j*SSD_COORDI_NUM]/SSD_QUANT_BASE)*priorWidth+priorCenterX; decodeBoxCenterY = ((float)priorVar[j*SSD_COORDI_NUM+1]/SSD_QUANT_BASE)* ((float)locPreds[j*SSD_COORDI_NUM+1]/SSD_QUANT_BASE)*priorHeight+priorCenterY; decodeBoxWidth = exp(((float)priorVar[j*SSD_COORDI_NUM+2]/SSD_QUANT_BASE)* ((float)locPreds[j*SSD_COORDI_NUM+2]/SSD_QUANT_BASE))*priorWidth; decodeBoxHeight = exp(((float)priorVar[j*SSD_COORDI_NUM+3]/SSD_QUANT_BASE)* ((float)locPreds[j*SSD_COORDI_NUM+3]/SSD_QUANT_BASE))*priorHeight; allDecodeBoxes[srcIdx++] = (int)(decodeBoxCenterX - decodeBoxWidth * SSD_HALF); allDecodeBoxes[srcIdx++] = (int)(decodeBoxCenterY - decodeBoxHeight * SSD_HALF); allDecodeBoxes[srcIdx++] = (int)(decodeBoxCenterX + decodeBoxWidth * SSD_HALF); allDecodeBoxes[srcIdx++] = (int)(decodeBoxCenterY + decodeBoxHeight * SSD_HALF); } } // 对每一类做NMS afterTopK2 = 0; for (i = 0; i < classNum; i++) { if(i==0) continue; for (j = 0; j < priorNum; j++) { singleProposal[j * SSD_PROPOSAL_WIDTH] = allDecodeBoxes[j * SSD_COORDI_NUM]; singleProposal[j * SSD_PROPOSAL_WIDTH + 1] = allDecodeBoxes[j * SSD_COORDI_NUM + 1]; singleProposal[j * SSD_PROPOSAL_WIDTH + 2] = allDecodeBoxes[j * SSD_COORDI_NUM + 2]; singleProposal[j * SSD_PROPOSAL_WIDTH + 3] = allDecodeBoxes[j * SSD_COORDI_NUM + 3]; singleProposal[j * SSD_PROPOSAL_WIDTH + 4] = confScores[j*classNum + i]; singleProposal[j * SSD_PROPOSAL_WIDTH + 5] = 0; } QuickSort(singleProposal, 0, priorNum - 1, stack,topK); afterFilter = (priorNum < topK) ? priorNum : topK; NonMaxSuppression(singleProposal, afterFilter, NMSThresh, afterFilter); roiOutCnt = 0; dstScore = (int*)dstScoreSrc; dstBbox = (int*)dstBboxSrc; classRoiNum = (int*)roiOutCntSrc; dstScore += (int)afterTopK2; dstBbox += (int)(afterTopK2 * SSD_COORDI_NUM); for (j = 0; j < topK; j++) { if (singleProposal[j * SSD_PROPOSAL_WIDTH + 5] == 0 && singleProposal[j * SSD_PROPOSAL_WIDTH + 4] > (int)confThresh) { dstScore[roiOutCnt] = singleProposal[j * 6 + 4]; dstBbox[roiOutCnt * SSD_COORDI_NUM] = singleProposal[j * SSD_PROPOSAL_WIDTH]; dstBbox[roiOutCnt * SSD_COORDI_NUM + 1] = singleProposal[j * SSD_PROPOSAL_WIDTH + 1]; dstBbox[roiOutCnt * SSD_COORDI_NUM + 2] = singleProposal[j * SSD_PROPOSAL_WIDTH + 2]; dstBbox[roiOutCnt * SSD_COORDI_NUM + 3] = singleProposal[j * SSD_PROPOSAL_WIDTH + 3]; roiOutCnt++; } } classRoiNum[i] = (int)roiOutCnt; afterTopK2 += roiOutCnt; } keepCnt = 0; offset = 0; if (afterTopK2 > keepTopK) { offset = classRoiNum[0]; for (i = 1; i < classNum; i++) { dstScore = (int*)dstScoreSrc; dstBbox = (int*)dstBboxSrc; classRoiNum = (int*)roiOutCntSrc; dstScore += (int)(offset); dstBbox += (int)(offset * SSD_COORDI_NUM); for (j = 0; j < (int)classRoiNum[i]; j++) { afterTopK[keepCnt * SSD_PROPOSAL_WIDTH] = dstBbox[j * SSD_COORDI_NUM]; afterTopK[keepCnt * SSD_PROPOSAL_WIDTH + 1] = dstBbox[j * SSD_COORDI_NUM + 1]; afterTopK[keepCnt * SSD_PROPOSAL_WIDTH + 2] = dstBbox[j * SSD_COORDI_NUM + 2]; afterTopK[keepCnt * SSD_PROPOSAL_WIDTH + 3] = dstBbox[j * SSD_COORDI_NUM + 3]; afterTopK[keepCnt * SSD_PROPOSAL_WIDTH + 4] = dstScore[j]; afterTopK[keepCnt * SSD_PROPOSAL_WIDTH + 5] = i; keepCnt++; } offset = offset + classRoiNum[i]; } QuickSort(afterTopK, 0, keepCnt - 1, stack,keepCnt); offset = 0; offset = classRoiNum[0]; for (i = 1; i < classNum; i++) { roiOutCnt = 0; dstScore = (int*)dstScoreSrc; dstBbox = (int*)dstBboxSrc; classRoiNum = (int*)roiOutCntSrc; dstScore += (int)(offset); dstBbox += (int)(offset * SSD_COORDI_NUM); for (j = 0; j < keepTopK; j++) { if (afterTopK[j * SSD_PROPOSAL_WIDTH + 5] == i) { dstScore[roiOutCnt] = afterTopK[j * SSD_PROPOSAL_WIDTH + 4]; dstBbox[roiOutCnt * SSD_COORDI_NUM] = afterTopK[j * SSD_PROPOSAL_WIDTH]; dstBbox[roiOutCnt * SSD_COORDI_NUM + 1] = afterTopK[j * SSD_PROPOSAL_WIDTH + 1]; dstBbox[roiOutCnt * SSD_COORDI_NUM + 2] = afterTopK[j * SSD_PROPOSAL_WIDTH + 2]; dstBbox[roiOutCnt * SSD_COORDI_NUM + 3] = afterTopK[j * SSD_PROPOSAL_WIDTH + 3]; roiOutCnt++; } } classRoiNum[i] = (int)roiOutCnt; offset += roiOutCnt; } } } vector DetectorRetinaFace::PermuteLayer(const vector &data,int width,int height,int channels) { vector result(data.size()); int index=0; int channelStep=width*height; for(int h=0; h -1) { low = stack[top].min; high = stack[top].max; i = low; j = high; top--; keyConfidence = src[SSD_PROPOSAL_WIDTH * low + 4]; while(i < j) { while((i < j) && (keyConfidence > src[j * SSD_PROPOSAL_WIDTH + 4])) { j--; } if(i < j) { Swap(&src[i*SSD_PROPOSAL_WIDTH], &src[j*SSD_PROPOSAL_WIDTH]); i++; } while((i < j) && (keyConfidence < src[i*SSD_PROPOSAL_WIDTH + 4])) { i++; } if(i < j) { Swap(&src[i*SSD_PROPOSAL_WIDTH], &src[j*SSD_PROPOSAL_WIDTH]); j--; } } if(low <= maxNum) { if(low < i-1) { top++; stack[top].min = low; stack[top].max = i-1; } if(high > i+1) { top++; stack[top].min = i+1; stack[top].max = high; } } } } void DetectorRetinaFace::NonMaxSuppression( int* proposals, int anchorsNum,int NMSThresh,int maxRoiNum) { int xMin1 = 0; int yMin1 = 0; int xMax1 = 0; int yMax1 = 0; int xMin2 = 0; int yMin2 = 0; int xMax2 = 0; int yMax2 = 0; int areaTotal = 0; int areaInter = 0; int i = 0; int j = 0; int num = 0; int NoOverlap = 1; for (i = 0; i < anchorsNum && num < maxRoiNum; i++) { if( proposals[SSD_PROPOSAL_WIDTH*i+5] == 0 ) { num++; xMin1 = proposals[SSD_PROPOSAL_WIDTH*i]; yMin1 = proposals[SSD_PROPOSAL_WIDTH*i+1]; xMax1 = proposals[SSD_PROPOSAL_WIDTH*i+2]; yMax1 = proposals[SSD_PROPOSAL_WIDTH*i+3]; for(j= i+1;j< anchorsNum; j++) { if( proposals[SSD_PROPOSAL_WIDTH*j+5] == 0 ) { xMin2 = proposals[SSD_PROPOSAL_WIDTH*j]; yMin2 = proposals[SSD_PROPOSAL_WIDTH*j+1]; xMax2 = proposals[SSD_PROPOSAL_WIDTH*j+2]; yMax2 = proposals[SSD_PROPOSAL_WIDTH*j+3]; NoOverlap = (xMin2>xMax1)||(xMax2yMax1)||(yMax2 ((int)NMSThresh*areaTotal)) { if( proposals[SSD_PROPOSAL_WIDTH*i+4] >= proposals[SSD_PROPOSAL_WIDTH*j+4] ) { proposals[SSD_PROPOSAL_WIDTH*j+5] = 1; } else { proposals[SSD_PROPOSAL_WIDTH*i+5] = 1; } } } } } } } void DetectorRetinaFace::ComputeOverlap(int xMin1, int yMin1, int xMax1, int yMax1, int xMin2, int yMin2, int xMax2, int yMax2, int* areaSum, int* areaInter) { int inter = 0; int s32Total = 0; int xMin = 0; int yMin = 0; int xMax = 0; int yMax = 0; int area1 = 0; int area2 = 0; int interWidth = 0; int interHeight = 0; xMin = SSD_MAX(xMin1, xMin2); yMin = SSD_MAX(yMin1, yMin2); xMax = SSD_MIN(xMax1, xMax2); yMax = SSD_MIN(yMax1, yMax2); interWidth = xMax - xMin + 1; interHeight = yMax - yMin + 1; interWidth = ( interWidth >= 0 ) ? interWidth : 0; interHeight = ( interHeight >= 0 ) ? interHeight : 0; inter = interWidth * interHeight; area1 = (xMax1 - xMin1 + 1) * (yMax1 - yMin1 + 1); area2 = (xMax2 - xMin2 + 1) * (yMax2 - yMin2 + 1); s32Total = area1 + area2 - inter; *areaSum = s32Total; *areaInter = inter; } void DetectorRetinaFace::Swap(int* src1, int* src2) { int i = 0; int temp = 0; for( i = 0; i < SSD_PROPOSAL_WIDTH; i++ ) { temp = src1[i]; src1[i] = src2[i]; src2[i] = temp; } } void DetectorRetinaFace::CreateDetectionResults(std::vector &resultsOfDetection) { // 参数赋值 int* score=ssdParameter.dstScore; int* roi=ssdParameter.dstRoi; int* classRoiNum=ssdParameter.classRoiNum; float printResultThresh=((float)ssdParameter.confThresh)/SSD_QUANT_BASE; int classNum=ssdParameter.classNum; int i = 0, j = 0; int roiNumBias = 0; int scoreBias = 0; int bboxBias = 0; float score2 = 0.0f; int xMin = 0,yMin= 0,xMax = 0,yMax = 0; roiNumBias += classRoiNum[0]; for (i = 1; i < classNum; i++) { scoreBias = roiNumBias; bboxBias = roiNumBias * SSD_COORDI_NUM; if((float)score[scoreBias] / SSD_QUANT_BASE >= printResultThresh && classRoiNum[i]!=0) { //printf("==== The %d th class box info====\n", i); } for (j = 0; j < (int)classRoiNum[i]; j++) { score2 = (float)score[scoreBias + j] / SSD_QUANT_BASE; if (score2 < printResultThresh) { break; } xMin = roi[bboxBias + j*SSD_COORDI_NUM]; yMin = roi[bboxBias + j*SSD_COORDI_NUM + 1]; xMax = roi[bboxBias + j*SSD_COORDI_NUM + 2]; yMax = roi[bboxBias + j*SSD_COORDI_NUM + 3]; ResultOfDetection result; result.boundingBox.x=xMin; result.boundingBox.y=yMin; result.boundingBox.width=xMax-xMin+1; result.boundingBox.height=yMax-yMin+1; result.classID=i; result.confidence=score2; resultsOfDetection.push_back(result); } roiNumBias += classRoiNum[i]; } } }