Commit 56762529 authored by lijian6's avatar lijian6
Browse files

Initial commit


Signed-off-by: lijian6's avatarlijian <lijian6@sugon.com>
parents
// YOLOV5检测器
#ifndef __DETECTOR_YOLOV5_H__
#define __DETECTOR_YOLOV5_H__
#include <string>
#include <migraphx/program.hpp>
#include <opencv2/opencv.hpp>
#include <CommonDefinition.h>
#include "Decoder.h"
using namespace std;
using namespace cv;
using namespace migraphx;
namespace migraphxSamples
{
// YOLOV5参数
typedef struct _YOLOV5Parameter
{
int numberOfClasses;// 类别数
float confidenceThreshold;// 置信度阈值
float nmsThreshold;// NMS阈值
float objectThreshold;// 目标置信度值
}YOLOV5Parameter;
class DetectorYOLOV5
{
public:
DetectorYOLOV5();
~DetectorYOLOV5();
ErrorCode Initialize(InitializationParameterOfDetector initializationParameterOfDetector);
ErrorCode Detect(const cv::Mat &srcImage, std::vector<ResultOfDetection> &resultsOfDetection);
ErrorCode Detect(DCU_Frame &srcImage, std::vector<ResultOfDetection> &resultsOfDetection);
float* preprocess_Image = NULL;
private:
ErrorCode DoCommonInitialization(InitializationParameterOfDetector initializationParameterOfDetector);
private:
cv::FileStorage configurationFile;
InitializationParameterOfDetector initializationParameter;
FILE *logFile;
// net
migraphx::program net;
cv::Size inputSize;
string inputName;
migraphx::shape inputShape;
bool useFP16;
vector<string> classNames;
YOLOV5Parameter yolov5Parameter;
migraphx::parameter_map ParameterMap;
};
}
#endif
// YOLOV7检测器
#ifndef __DETECTOR_YOLOV7_H__
#define __DETECTOR_YOLOV7_H__
#include <string>
#include <migraphx/program.hpp>
#include <opencv2/opencv.hpp>
#include <CommonDefinition.h>
#include "Decoder.h"
using namespace std;
using namespace cv;
using namespace migraphx;
namespace migraphxSamples
{
// YOLOV7参数
typedef struct _YOLOV7Parameter
{
int numberOfClasses;// 类别数
float confidenceThreshold;// 置信度阈值
float nmsThreshold;// NMS阈值
float objectThreshold;//目标置信度值
}YOLOV7Parameter;
class DetectorYOLOV7
{
public:
DetectorYOLOV7();
~DetectorYOLOV7();
ErrorCode Initialize(InitializationParameterOfDetector initializationParameterOfDetector);
ErrorCode Detect(const cv::Mat &srcImage, std::vector<ResultOfDetection> &resultsOfDetection);
ErrorCode Detect(DCU_Frame &srcImage, std::vector<ResultOfDetection> &resultsOfDetection);
float* preprocess_Image = NULL;
private:
ErrorCode DoCommonInitialization(InitializationParameterOfDetector initializationParameterOfDetector);
private:
cv::FileStorage configurationFile;
InitializationParameterOfDetector initializationParameter;
FILE *logFile;
// net
migraphx::program net;
cv::Size inputSize;
string inputName;
migraphx::shape inputShape;
bool useFP16;
vector<string> classNames;
YOLOV7Parameter yolov7Parameter;
migraphx::parameter_map ParameterMap;
};
}
#endif
// 文件以及目录处理
#ifndef __FILE_SYSTEM_H__
#define __FILE_SYSTEM_H__
#include <vector>
#include <string>
using namespace std;
namespace migraphxSamples
{
// 路径是否存在
bool Exists(const string &path);
// 路径是否为目录
bool IsDirectory(const string &path);
// 是否是路径分隔符(Linux:‘/’,Windows:’\\’)
bool IsPathSeparator(char c);
string JoinPath(const string &base, const string &path);
// 创建多级目录,注意:创建多级目录的时候,目标目录是不能有文件存在的
bool CreateDirectories(const string &directoryPath);
/**
* 生成符合指定模式的文件名列表(支持递归遍历)
* pattern:模式,比如"*.jpg","*.png","*.jpg,*.png"
* addPath:是否包含父路径
* 注意:
1. 多个模式使用","分割,比如"*.jpg,*.png"
2. 支持通配符'*','?' ,比如第一个字符是7的所有文件名:"7*.*", 以512结尾的所有jpg文件名:"*512.jpg"
3. 使用"*.jpg",而不是".jpg"
4. 空string表示返回所有结果
5. 不能返回子目录名
*
*/
void GetFileNameList(const string &directory, const string &pattern, std::vector<string> &result, bool recursive, bool addPath);
// 与GetFileNameList的区别在于如果有子目录,在addPath为true的时候会返回子目录路径(目录名最后有"/")
void GetFileNameList2(const string &directory, const string &pattern, std::vector<string> &result, bool recursive, bool addPath);
// 删除文件或者目录,支持递归删除
void Remove(const string &directory, const string &extension="");
// 获取路径的文件名和扩展名
// D:/1/1.txt,文件名为1.txt,扩展名为.txt,父路径为D:/1/
string GetFileName(const string &path); // 1.txt
string GetFileName_NoExtension(const string &path); // 1
string GetExtension(const string &path);// .txt
string GetParentPath(const string &path);// D:/1/
// 拷贝文件:CopyFile("D:/1.txt","D:/2.txt");将1.txt拷贝为2.txt
bool CopyFile(const string srcPath,const string dstPath);
/*拷贝目录
示例:CopyDirectories(“D:/0/1/2/”,”E:/3/”);实现把D:/0/1/2/目录拷贝到E:/3/目录中(即拷贝完成后的目录结构为E:/3/2/)
注意:
1.第一个参数的最后不能加”/”
2.不能拷贝隐藏文件
*/
bool CopyDirectories(string srcPath,const string dstPath);
}
#endif //
/*
* queue for Decoder and Detector
* 2023.03.04 sugon
* */
#ifndef __QUEUE_H__
#define __QUEUE_H__
#include <pthread.h>
#include <queue>
#include <condition_variable>
#include <mutex>
#include <sys/time.h>
#include <opencv2/opencv.hpp>
#include <SimpleLog.h>
#include <Decoder.h>
using std::queue;
namespace migraphxSamples
{
#define CAPACITY 1
class Queue
{
public:
Queue(size_t Capacity = CAPACITY);
~Queue();
void enQueue(cv::Mat& Image);
void enQueue(DCU_Frame& dcu_frame);
void deQueue(cv::Mat* Image);
void deQueue(DCU_Frame* dcu_frame);
void finish();
bool CQ_Isempty();
bool DQ_Isempty();
bool DecodeEnd;
int device;
private:
bool CQ_IsFull();
bool DQ_IsFull();
queue<cv::Mat> C_Queue;
queue<DCU_Frame> D_Queue;
size_t _Capacity;
pthread_mutex_t Lock;
pthread_cond_t ConsumeCond;
pthread_cond_t ProductCond;
};
}
#endif
// SSD定义
#ifndef __SSD_DEFINITION_H__
#define __SSD_DEFINITION_H__
#include <string>
#include <vector>
using namespace std;
namespace migraphxSamples
{
#define SSD_MAX_PRIORBOX_LAYER_NUM 10 // 能够支持的最大检测层数量
// SSD参数
typedef struct _SSDParameter
{
int numberOfPriorBoxLayer; // 检测层数量
// Model Parameters
int convHeight[SSD_MAX_PRIORBOX_LAYER_NUM*2];
int convWidth[SSD_MAX_PRIORBOX_LAYER_NUM*2];
int convChannel[SSD_MAX_PRIORBOX_LAYER_NUM*2];
// PriorBoxLayer Parameters
int priorBoxWidth[SSD_MAX_PRIORBOX_LAYER_NUM]; // 每个检测层priorbox的宽
int priorBoxHeight[SSD_MAX_PRIORBOX_LAYER_NUM];// 每个检测层priorbox的高
std::vector<std::vector<float>> priorBoxMinSize; // 每个检测层priorbox的minsize
std::vector<std::vector<float>> priorBoxMaxSize; // 每个检测层priorbox的maxsize
int minSizeNum[SSD_MAX_PRIORBOX_LAYER_NUM]; // 每个检测层priorbox的minsize数量
int maxSizeNum[SSD_MAX_PRIORBOX_LAYER_NUM]; // 每个检测层priorbox的maxsize数量
int srcImageHeight;// 原图大小
int srcImageWidth;
int inputAspectRatioNum[SSD_MAX_PRIORBOX_LAYER_NUM];// 每个检测层宽高比的数量
std::vector<std::vector<float>> priorBoxAspectRatio;// 每个检测层的宽高比
float priorBoxStepWidth[SSD_MAX_PRIORBOX_LAYER_NUM];// 每个检测层步长的宽
float priorBoxStepHeight[SSD_MAX_PRIORBOX_LAYER_NUM];// 每个检测层步长的高
float offset;
int flip[SSD_MAX_PRIORBOX_LAYER_NUM];
int clip[SSD_MAX_PRIORBOX_LAYER_NUM];
int priorBoxVar[4];
// SoftmaxLayer Parameters
int softMaxInChn[SSD_MAX_PRIORBOX_LAYER_NUM];
int softMaxInHeight;
int concatNum;
int softMaxOutWidth;
int softMaxOutHeight;
int softMaxOutChn;
// DetectionOutLayer Parameters
int classNum;// 类别数(包含背景类)
int topK;
int keepTopK;
int NMSThresh;
int confThresh;
int detectInputChn[SSD_MAX_PRIORBOX_LAYER_NUM];
int convStride[SSD_MAX_PRIORBOX_LAYER_NUM];
// buffer
int *buffer;
int *classification[SSD_MAX_PRIORBOX_LAYER_NUM];// 分类数据
int *regression[SSD_MAX_PRIORBOX_LAYER_NUM];// 回归
int *priorboxOutputData;
int *softMaxOutputData;
int *getResultBuffer;
int *dstScore;
int *dstRoi;
int *classRoiNum;
_SSDParameter():srcImageHeight(0),
srcImageWidth(0),
offset(0.0),
softMaxInHeight(0),
concatNum(0),
softMaxOutWidth(0),
softMaxOutHeight(0),
softMaxOutChn(0),
buffer(NULL),
priorboxOutputData(NULL),
softMaxOutputData(NULL),
getResultBuffer(NULL),
dstScore(NULL),
dstRoi(NULL),
classRoiNum(NULL){}
}SSDParameter;
typedef struct _QuickSortStack
{
int min;
int max;
}QuickSortStack;
}
#endif
// 示例程序
#ifndef __SAMPLE_H__
#define __SAMPLE_H__
// SSD sample
void Sample_DetectorSSD(int device);
// YOLOV3 sample
void Sample_DetectorYOLOV3(int device);
// YOLOV5 sample
void Sample_DetectorYOLOV5(int device);
// YOLOV7 sample
void Sample_DetectorYOLOV7(int device);
// RetinaFace sample
void Sample_DetectorRetinaFace(int device);
#endif
// 简易日志
#ifndef __SIMPLE_LOG_H__
#define __SIMPLE_LOG_H__
#include <time.h>
#include <string>
#include <map>
#include <thread>
#include <mutex>
#if (defined WIN32 || defined _WIN32)
#include <Windows.h>
#else
#include <sys/time.h>
#endif
using namespace std;
/* 简易日志简介
轻量级日志系统,不依赖于其他第三方库,只需要包含一个头文件就可以使用。提供了4种日志级别,包括INFO,DEBUG,WARN和ERROR。
示例1:
// 初始化日志,在./Log/目录下创建两个日志文件log1.log和log2.log(注意:目录./Log/需要存在,否则日志创建失败)
LogManager::GetInstance()->Initialize("./Log/","log1");
LogManager::GetInstance()->Initialize("./Log/","log2");
// 写日志
string log = "Hello World";
LOG_INFO(LogManager::GetInstance()->GetLogFile("log1"), "%s\n", log.c_str()); // 写入log1.log
LOG_INFO(LogManager::GetInstance()->GetLogFile("log2"), "%s\n", log.c_str()); // 写入log2.log
// 关闭日志
LogManager::GetInstance()->Close("log1");
LogManager::GetInstance()->Close("log2");
示例2:
// 将日志输出到控制台
string log = "Hello World";
LOG_INFO(stdout, "%s\n", log.c_str());
注意:
1. 需要C++11
2. 多线程的时候需要加锁(打开#define LOG_MUTEX),否则会导致日志显示混乱
*/
// #define LOG_MUTEX // 加锁
class LogManager
{
private:
LogManager(){}
public:
~LogManager(){}
inline void Initialize(const string &parentPath,const string &logName)
{
// 日志名为空表示输出到控制台
if(logName.size()==0)
return;
// 查找该日志文件,如果没有则创建
std::map<string, FILE*>::const_iterator iter = logMap.find(logName);
if (iter == logMap.end())
{
string pathOfLog = parentPath+ logName + ".log";
FILE *logFile = fopen(pathOfLog.c_str(), "a"); // w:覆盖原有文件,a:追加
if(logFile!=NULL)
{
logMap.insert(std::make_pair(logName, logFile));
}
}
}
inline FILE* GetLogFile(const string &logName)
{
std::map<string, FILE*>::const_iterator iter=logMap.find(logName);
if(iter==logMap.end())
{
return NULL;
}
return (*iter).second;
}
inline void Close(const string &logName)
{
std::map<string, FILE*>::const_iterator iter=logMap.find(logName);
if(iter==logMap.end())
{
return;
}
fclose((*iter).second);
logMap.erase(iter);
}
inline std::mutex &GetLogMutex()
{
return logMutex;
}
// Singleton(注意线程安全的问题)
static LogManager* GetInstance()
{
static LogManager logManager;
return &logManager;
}
private:
std::map<string, FILE*> logMap;
std::mutex logMutex;
};
#ifdef LOG_MUTEX
#define LOCK LogManager::GetInstance()->GetLogMutex().lock()
#define UNLOCK LogManager::GetInstance()->GetLogMutex().unlock()
#else
#define LOCK
#define UNLOCK
#endif
// log time
typedef struct _LogTime
{
string year;
string month;
string day;
string hour;
string minute;
string second;
string millisecond; // ms
string microsecond; // us
string weekDay;
}LogTime;
inline LogTime GetTime()
{
LogTime currentTime;
#if (defined WIN32 || defined _WIN32)
SYSTEMTIME systemTime;
GetLocalTime(&systemTime);
char temp[8] = { 0 };
sprintf(temp, "%04d", systemTime.wYear);
currentTime.year=string(temp);
sprintf(temp, "%02d", systemTime.wMonth);
currentTime.month=string(temp);
sprintf(temp, "%02d", systemTime.wDay);
currentTime.day=string(temp);
sprintf(temp, "%02d", systemTime.wHour);
currentTime.hour=string(temp);
sprintf(temp, "%02d", systemTime.wMinute);
currentTime.minute=string(temp);
sprintf(temp, "%02d", systemTime.wSecond);
currentTime.second=string(temp);
sprintf(temp, "%03d", systemTime.wMilliseconds);
currentTime.millisecond=string(temp);
sprintf(temp, "%d", systemTime.wDayOfWeek);
currentTime.weekDay=string(temp);
#else
struct timeval tv;
struct tm *p;
gettimeofday(&tv, NULL);
p = localtime(&tv.tv_sec);
char temp[8]={0};
sprintf(temp,"%04d",1900+p->tm_year);
currentTime.year=string(temp);
sprintf(temp,"%02d",1+p->tm_mon);
currentTime.month=string(temp);
sprintf(temp,"%02d",p->tm_mday);
currentTime.day=string(temp);
sprintf(temp,"%02d",p->tm_hour);
currentTime.hour=string(temp);
sprintf(temp,"%02d",p->tm_min);
currentTime.minute=string(temp);
sprintf(temp,"%02d",p->tm_sec);
currentTime.second=string(temp);
sprintf(temp,"%03d",(int)(tv.tv_usec/1000));
currentTime.millisecond = string(temp);
sprintf(temp, "%03d", (int)(tv.tv_usec % 1000));
currentTime.microsecond = string(temp);
sprintf(temp, "%d", p->tm_wday);
currentTime.weekDay = string(temp);
#endif
return currentTime;
}
#define LOG_TIME(logFile) \
do\
{\
LogTime currentTime=GetTime(); \
fprintf(((logFile == NULL) ? stdout : logFile), "%s-%s-%s %s:%s:%s.%s\t",currentTime.year.c_str(),currentTime.month.c_str(),currentTime.day.c_str(),currentTime.hour.c_str(),currentTime.minute.c_str(),currentTime.second.c_str(),currentTime.millisecond.c_str()); \
}while (0)
#define LOG_INFO(logFile,logInfo, ...) \
do\
{\
LOCK; \
LOG_TIME(logFile); \
fprintf(((logFile == NULL) ? stdout : logFile), "INFO\t"); \
fprintf(((logFile == NULL) ? stdout : logFile), "[%s:%d (%s) ]: ", __FILE__, __LINE__, __FUNCTION__); \
fprintf(((logFile == NULL) ? stdout : logFile), logInfo, ## __VA_ARGS__); \
fflush(logFile); \
UNLOCK; \
} while (0)
#define LOG_DEBUG(logFile,logInfo, ...) \
do\
{\
LOCK; \
LOG_TIME(logFile);\
fprintf(((logFile==NULL)?stdout:logFile), "DEBUG\t"); \
fprintf(((logFile==NULL)?stdout:logFile), "[%s:%d (%s) ]: ", __FILE__, __LINE__, __FUNCTION__); \
fprintf(((logFile==NULL)?stdout:logFile),logInfo, ## __VA_ARGS__); \
fflush(logFile); \
UNLOCK; \
} while (0)
#define LOG_ERROR(logFile,logInfo, ...) \
do\
{\
LOCK; \
LOG_TIME(logFile);\
fprintf(((logFile==NULL)?stdout:logFile), "ERROR\t"); \
fprintf(((logFile==NULL)?stdout:logFile), "[%s:%d (%s) ]: ", __FILE__, __LINE__, __FUNCTION__); \
fprintf(((logFile==NULL)?stdout:logFile),logInfo, ## __VA_ARGS__); \
fflush(logFile); \
UNLOCK; \
} while (0)
#define LOG_WARN(logFile,logInfo, ...) \
do\
{\
LOCK; \
LOG_TIME(logFile);\
fprintf(((logFile==NULL)?stdout:logFile), "WARN\t"); \
fprintf(((logFile==NULL)?stdout:logFile), "[%s:%d (%s) ]: ", __FILE__, __LINE__, __FUNCTION__); \
fprintf(((logFile==NULL)?stdout:logFile),logInfo, ## __VA_ARGS__); \
fflush(logFile); \
UNLOCK; \
} while (0)
inline long long GetNowMs()
{
struct timeval tv;
gettimeofday(&tv, NULL);
long long t = tv.tv_sec * 1000 * 1000 + tv.tv_usec;
return t;
}
#endif // __SIMPLE_LOG_H__
File added
#include <DetectorRetinaFace.h>
#include <migraphx/onnx.hpp>
#include <migraphx/gpu/target.hpp>
#include <migraphx/gpu/hip.hpp>
#include <migraphx/generate.hpp>
#include <migraphx/quantization.hpp>
#include <opencv2/dnn.hpp>
#include <CommonUtility.h>
#include <Filesystem.h>
#include <SimpleLog.h>
#include <hip/hip_runtime.h>
#include "hip/hip_runtime.h"
using namespace cv::dnn;
namespace migraphxSamples
{
#define SSD_QUANT_BASE 4096 // 基数
#define SSD_COORDI_NUM 4 // 坐标个数(x1,y1,x2,y2)
#define SSD_PROPOSAL_WIDTH 6
#define SSD_HALF 0.5
#define SSD_ASPECT_RATIO_NUM 6 // 默认最大的宽高比个数
#define SSD_MAX(a,b) (((a) > (b)) ? (a) : (b))
#define SSD_MIN(a,b) (((a) < (b)) ? (a) : (b))
// 16字节对齐
#define SSD_ALIGN_16 16
#define SSD_ALIGN16(number) ((number + SSD_ALIGN_16-1) / SSD_ALIGN_16*SSD_ALIGN_16)
DetectorRetinaFace::DetectorRetinaFace():logFile(NULL)
{
}
DetectorRetinaFace::~DetectorRetinaFace()
{
configurationFile.release();
// 释放SSD参数的内存空间
delete[] ssdParameter.buffer;
}
ErrorCode DetectorRetinaFace::Initialize(InitializationParameterOfDetector initializationParameterOfDetector)
{
// 初始化(获取日志文件,加载配置文件等)
ErrorCode errorCode=DoCommonInitialization(initializationParameterOfDetector);
if(errorCode!=SUCCESS)
{
LOG_ERROR(logFile,"fail to DoCommonInitialization\n");
return errorCode;
}
LOG_INFO(logFile,"succeed to DoCommonInitialization\n");
// 获取配置文件参数
FileNode netNode = configurationFile["DetectorRetinaFace"];
string modelPath=initializationParameter.parentPath+(string)netNode["ModelPath"];
scale=(float)netNode["Scale"];
meanValue.val[0]=(float)netNode["MeanValue1"];
meanValue.val[1]=(float)netNode["MeanValue2"];
meanValue.val[2]=(float)netNode["MeanValue3"];
swapRB=(bool)(int)netNode["SwapRB"];
crop=(bool)(int)netNode["Crop"];
useInt8=(bool)(int)netNode["UseInt8"];
useFP16=(bool)(int)netNode["UseFP16"];
// 加载模型
if(Exists(modelPath)==false)
{
LOG_ERROR(logFile,"%s not exist!\n",modelPath.c_str());
return MODEL_NOT_EXIST;
}
net = migraphx::parse_onnx(modelPath);
LOG_INFO(logFile,"succeed to load model: %s\n",GetFileName(modelPath).c_str());
// 获取模型输入属性
std::pair<std::string, shape> inputAttribute=*(net.get_parameter_shapes().begin());
inputName=inputAttribute.first;
inputShape=inputAttribute.second;
inputSize=Size(inputShape.lens()[3],inputShape.lens()[2]);// NCHW
// 设置模型为GPU模式
migraphx::target gpuTarget = migraphx::gpu::target{};
// 量化
if(useInt8)
{
// 创建量化校准数据,建议使用测试集中的多张典型图像
cv::Mat srcImage=imread("../Resource/Images/FaceDetect_2.jpg",1);
std::vector<cv::Mat> srcImages;
for(int i=0;i<inputShape.lens()[0];++i)
{
srcImages.push_back(srcImage);
}
cv::Mat inputBlob;
blobFromImages(srcImages,
inputBlob,
scale,
inputSize,
meanValue,
swapRB,
false);
migraphx::parameter_map inputData;
inputData[inputName]= migraphx::argument{inputShape, (float*)inputBlob.data};
std::vector<migraphx::parameter_map> calibrationData = {inputData};
// INT8量化
migraphx::quantize_int8(net, gpuTarget, calibrationData);
}
if(useFP16)
{
migraphx::quantize_fp16(net);
}
// 编译模型
migraphx::compile_options options;
options.device_id=0; // 设置GPU设备,默认为0号设备(>=1.2版本中支持)
#ifdef DMA
options.offload_copy=false; // 设置offload_copy
#else
options.offload_copy=true; // 设置offload_copy
#endif
net.compile(gpuTarget,options);
LOG_INFO(logFile,"succeed to compile model: %s\n",GetFileName(modelPath).c_str());
// Run once by itself
#ifdef DMA
ParameterMap=CreateParameterMap(net);
net.eval(ParameterMap);
#else
migraphx::parameter_map inputData;
inputData[inputName]=migraphx::generate_argument(inputShape);
net.eval(inputData);
#endif
#ifdef DMA
hipMalloc((void**)&preprocess_Image, inputSize.height * inputSize.width * 3 * sizeof(float));
#endif
// log
LOG_INFO(logFile,"InputSize:%dx%d\n",inputSize.width,inputSize.height);
LOG_INFO(logFile,"InputName:%s\n",inputName.c_str());
LOG_INFO(logFile,"Scale:%.6f\n",scale);
LOG_INFO(logFile,"Mean:%.2f,%.2f,%.2f\n",meanValue.val[0],meanValue.val[1],meanValue.val[2]);
LOG_INFO(logFile,"SwapRB:%d\n",(int)swapRB);
LOG_INFO(logFile,"Crop:%d\n",(int)crop);
LOG_INFO(logFile,"UseInt8:%d\n",(int)useInt8);
LOG_INFO(logFile,"UseFP16:%d\n",(int)useFP16);
// 读取SSD 参数
GetSSDParameter();
return SUCCESS;
}
ErrorCode DetectorRetinaFace::DoCommonInitialization(InitializationParameterOfDetector initializationParameterOfDetector)
{
initializationParameter=initializationParameterOfDetector;
// 获取日志文件
logFile=LogManager::GetInstance()->GetLogFile(initializationParameter.logName);
// 加载配置文件
std::string configFilePath=initializationParameter.configFilePath;
if(!Exists(configFilePath))
{
LOG_ERROR(logFile, "no configuration file!\n");
return CONFIG_FILE_NOT_EXIST;
}
if(!configurationFile.open(configFilePath, FileStorage::READ))
{
LOG_ERROR(logFile, "fail to open configuration file\n");
return FAIL_TO_OPEN_CONFIG_FILE;
}
LOG_INFO(logFile, "succeed to open configuration file\n");
// 修改父路径
std::string &parentPath = initializationParameter.parentPath;
if (!parentPath.empty())
{
if(!IsPathSeparator(parentPath[parentPath.size() - 1]))
{
parentPath+=PATH_SEPARATOR;
}
}
return SUCCESS;
}
#ifdef DMA
__global__ void convert_bgrp_to_rgb_and_normalization_retinaface(unsigned char* srcImage, float* outImage, int width, int height)
{
int x = blockIdx.x * blockDim.x + threadIdx.x;
if (x < width * height)
{
unsigned char r = srcImage[x + width * height * 2];
unsigned char g = srcImage[x + width * height * 1];
unsigned char b = srcImage[x + width * height * 0];
float sum = 255.0;
outImage[x + width * height * 0] = r / sum;
outImage[x + width * height * 1] = g / sum;
outImage[x + width * height * 2] = b / sum;
}
}
__global__ void convert_yuv420p_to_rgb_and_normalization_retinaface(unsigned char* srcImage, float* outImage, int width, int height)
{
int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x >= width || y >= height)
return;
int index = y * width + x;
int yIndex = index;
int uIndex = (y / 2) * (width / 2) + (x / 2) + width * height;
int vIndex = (y / 2) * (width / 2) + (x / 2) + width * height * 5 / 4;
unsigned char yValue = srcImage[yIndex];
unsigned char uValue = srcImage[uIndex];
unsigned char vValue = srcImage[vIndex];
int r = yValue + 1.370705 * (vValue - 128);
int g = yValue - 0.698001 * (vValue - 128) - 0.337633 * (uValue - 128);
int b = yValue + 1.732446 * (uValue - 128);
outImage[index + 0] = (float)r;
outImage[index + 1] = (float)g;
outImage[index + 2] = (float)b;
}
__global__ void convert_rgba_to_rgb_and_normalization_retinaface(unsigned char* srcImage, float* outImage, int width, int height)
{
int x = blockIdx.x * blockDim.x + threadIdx.x;
if (x < width * height)
{
unsigned char r = srcImage[x * 4 + 0];
unsigned char g = srcImage[x * 4 + 1];
unsigned char b = srcImage[x * 4 + 2];
float sum = 1.0;
outImage[x + width * height * 0] = (r - 123) / sum;
outImage[x + width * height * 1] = (g - 117) / sum;
outImage[x + width * height * 2] = (b - 104) / sum;
}
}
#endif
ErrorCode DetectorRetinaFace::Detect(const cv::Mat &srcImage,std::vector<ResultOfDetection> &resultsOfDetection)
{
if(srcImage.empty()||srcImage.type()!=CV_8UC3)
{
LOG_ERROR(logFile, "image error!\n");
return IMAGE_ERROR;
}
// 预处理并转换为NCHW
cv::Mat inputBlob;
blobFromImage(srcImage,
inputBlob,
scale,
inputSize,
meanValue,
swapRB,
false);
// 输入数据
migraphx::parameter_map inputData;
inputData[inputName]= migraphx::argument{inputShape, (float*)inputBlob.data};
// 推理
std::vector<migraphx::argument> inferenceResults=net.eval(inputData);
vector<vector<float>> regressions;
vector<vector<float>> classifications;
for(int i=0;i<ssdParameter.numberOfPriorBoxLayer;++i) // 执行Permute操作
{
int numberOfPriorBox=ssdParameter.detectInputChn[i]/(4*(ssdParameter.priorBoxHeight[i] * ssdParameter.priorBoxWidth[i]));
// BboxHead
std::vector<float> regression;
migraphx::argument result0 = inferenceResults[2*i];
result0.visit([&](auto output) { regression.assign(output.begin(), output.end()); });
regression=PermuteLayer(regression,ssdParameter.priorBoxWidth[i],ssdParameter.priorBoxHeight[i],numberOfPriorBox*4);
regressions.push_back(regression);
// ClassHead
std::vector<float> classification;
migraphx::argument result1 = inferenceResults[2*i+1];
result1.visit([&](auto output) { classification.assign(output.begin(), output.end()); });
classification=PermuteLayer(classification,ssdParameter.priorBoxWidth[i],ssdParameter.priorBoxHeight[i],numberOfPriorBox*ssdParameter.classNum);
classifications.push_back(classification);
}
// 对推理结果进行处理,得到最后SSD检测的结果
GetResult(classifications,regressions,resultsOfDetection);
// 转换到原图坐标
for(int i=0;i<resultsOfDetection.size();++i)
{
float ratioOfWidth=(1.0*srcImage.cols)/inputSize.width;
float ratioOfHeight=(1.0*srcImage.rows)/inputSize.height;
resultsOfDetection[i].boundingBox.x*=ratioOfWidth;
resultsOfDetection[i].boundingBox.width*=ratioOfWidth;
resultsOfDetection[i].boundingBox.y*=ratioOfHeight;
resultsOfDetection[i].boundingBox.height*=ratioOfHeight;
}
// 按照置信度排序
sort(resultsOfDetection.begin(), resultsOfDetection.end(),CompareConfidence);
return SUCCESS;
}
#ifdef DMA
ErrorCode DetectorRetinaFace::Detect(DCU_Frame &srcImage, std::vector<ResultOfDetection> &resultsOfDetection)
{
int block_size = 256;
int num_blocks = (srcImage.width * srcImage.height + block_size - 1) / block_size;
if(srcImage.format == AV_PIX_FMT_BGRP)
{
convert_bgrp_to_rgb_and_normalization_retinaface<<<num_blocks, block_size>>>(srcImage.dcu_data, preprocess_Image, srcImage.width, srcImage.height);
}
if(srcImage.format == AV_PIX_FMT_YUV420P)
{
convert_yuv420p_to_rgb_and_normalization_retinaface<<<num_blocks, block_size>>>(srcImage.dcu_data, preprocess_Image, srcImage.width, srcImage.height);
}
if(srcImage.format == AV_PIX_FMT_RGBA)
{
convert_rgba_to_rgb_and_normalization_retinaface<<<num_blocks, block_size>>>(srcImage.dcu_data, preprocess_Image, srcImage.width, srcImage.height);
}
// 输入数据
ParameterMap[inputName] = migraphx::argument{inputShape, preprocess_Image};
// 推理
std::vector<migraphx::argument> inferenceResults = net.eval(ParameterMap);
vector<vector<float>> regressions;
vector<vector<float>> classifications;
for(int i=0;i<ssdParameter.numberOfPriorBoxLayer;++i) // 执行Permute操作
{
int numberOfPriorBox=ssdParameter.detectInputChn[i]/(4*(ssdParameter.priorBoxHeight[i] * ssdParameter.priorBoxWidth[i]));
// BboxHead
std::vector<float> regression;
migraphx::argument result0 = inferenceResults[2*i];
result0.visit([&](auto output) { regression.assign(output.begin(), output.end()); });
regression=PermuteLayer(regression,ssdParameter.priorBoxWidth[i],ssdParameter.priorBoxHeight[i],numberOfPriorBox*4);
regressions.push_back(regression);
// ClassHead
std::vector<float> classification;
migraphx::argument result1 = inferenceResults[2*i+1];
result1.visit([&](auto output) { classification.assign(output.begin(), output.end()); });
classification=PermuteLayer(classification,ssdParameter.priorBoxWidth[i],ssdParameter.priorBoxHeight[i],numberOfPriorBox*ssdParameter.classNum);
classifications.push_back(classification);
}
// 对推理结果进行处理,得到最后SSD检测的结果
GetResult(classifications,regressions,resultsOfDetection);
// 转换到原图坐标
for(int i=0;i<resultsOfDetection.size();++i)
{
float ratioOfWidth=(1.0*srcImage.width)/inputSize.width;
float ratioOfHeight=(1.0*srcImage.height)/inputSize.height;
resultsOfDetection[i].boundingBox.x*=ratioOfWidth;
resultsOfDetection[i].boundingBox.width*=ratioOfWidth;
resultsOfDetection[i].boundingBox.y*=ratioOfHeight;
resultsOfDetection[i].boundingBox.height*=ratioOfHeight;
}
// 按照置信度排序
sort(resultsOfDetection.begin(), resultsOfDetection.end(),CompareConfidence);
return SUCCESS;
}
#endif
void DetectorRetinaFace::GetSSDParameter()
{
FileNode rootNode = configurationFile["DetectorRetinaFace"];
ssdParameter.numberOfPriorBoxLayer=(int)rootNode["PriorBoxLayerNumber"];
ssdParameter.srcImageHeight = inputSize.height;
ssdParameter.srcImageWidth = inputSize.width;
// MinSize,MaxSize
ssdParameter.priorBoxMinSize.resize(ssdParameter.numberOfPriorBoxLayer);
ssdParameter.priorBoxMaxSize.resize(ssdParameter.numberOfPriorBoxLayer);
for (int i = 0; i < ssdParameter.numberOfPriorBoxLayer; ++i)
{
// miniSize
{
for (int j = 1; j < 3; j++)
{
if (i == 0) {
if (j == 1) {
ssdParameter.priorBoxMinSize[i].push_back((float)rootNode["MinSize11"]);}
if (j == 2) {
ssdParameter.priorBoxMinSize[i].push_back((float)rootNode["MinSize12"]);}
} else if (i == 1) {
if (j == 1) {
ssdParameter.priorBoxMinSize[i].push_back((float)rootNode["MinSize21"]);}
if (j == 2) {
ssdParameter.priorBoxMinSize[i].push_back((float)rootNode["MinSize22"]);}
} else if (i == 2) {
if (j == 1) {
ssdParameter.priorBoxMinSize[i].push_back((float)rootNode["MinSize31"]);}
if (j == 2) {
ssdParameter.priorBoxMinSize[i].push_back((float)rootNode["MinSize32"]);}
}
}
}
}
// MinSizeNumber,MaxSizeNumber
for (int i = 0; i < ssdParameter.numberOfPriorBoxLayer; ++i)
{
ssdParameter.minSizeNum[i] = ssdParameter.priorBoxMinSize[i].size();
ssdParameter.maxSizeNum[i] = ssdParameter.priorBoxMaxSize[i].size();;
}
// Flip,Clip
for (int i = 0; i < ssdParameter.numberOfPriorBoxLayer; ++i)
{
// Flip
ssdParameter.flip[i] = 0;
// Clip
ssdParameter.clip[i] = 0;
}
// AspectRatio
ssdParameter.priorBoxAspectRatio.resize(ssdParameter.numberOfPriorBoxLayer);
for (int i = 0; i < ssdParameter.numberOfPriorBoxLayer; ++i)
{
char nodeName[256] = { 0 };
int j=0;
while(true)
{
//sprintf(nodeName, "AspectRatio%d%d", (i + 1),++j);
FileNode aspectRatioNode = rootNode["AspectRatio"];
if(aspectRatioNode.empty())
{
break;
}
else
{
ssdParameter.priorBoxAspectRatio[i].push_back((float)rootNode["AspectRatio"]);
}
}
}
// aspect ratio number
for (int i = 0; i < ssdParameter.numberOfPriorBoxLayer; ++i)
{
ssdParameter.inputAspectRatioNum[i] = ssdParameter.priorBoxAspectRatio[i].size();
}
// PriorBoxStep
for (int i = 0; i < ssdParameter.numberOfPriorBoxLayer; ++i)
{
if( i == 0) {
ssdParameter.priorBoxStepWidth[i] = 8;
ssdParameter.priorBoxStepHeight[i] = 8;
} else if (i == 1) {
ssdParameter.priorBoxStepWidth[i] = 16;
ssdParameter.priorBoxStepHeight[i] = 16;
} else if ( i == 2) {
ssdParameter.priorBoxStepWidth[i] = 32;
ssdParameter.priorBoxStepHeight[i] = 32;
}
}
// PriorBoxWidth,PriorBoxHeight
for (int i = 0; i < ssdParameter.numberOfPriorBoxLayer; ++i)
{
ssdParameter.priorBoxWidth[i] = ssdParameter.srcImageWidth/ssdParameter.priorBoxStepWidth[i];
ssdParameter.priorBoxHeight[i] = ssdParameter.srcImageHeight/ssdParameter.priorBoxStepHeight[i];
}
ssdParameter.offset = (float)rootNode["Offset"];
ssdParameter.priorBoxVar[0] = (int)(0.1f*SSD_QUANT_BASE);
ssdParameter.priorBoxVar[1] = (int)(0.1f*SSD_QUANT_BASE);
ssdParameter.priorBoxVar[2] = (int)(0.2f*SSD_QUANT_BASE);
ssdParameter.priorBoxVar[3] = (int)(0.2f*SSD_QUANT_BASE);
int classNumber = (int)rootNode["ClassNumber"];
ssdParameter.softMaxInHeight = classNumber;
ssdParameter.concatNum = ssdParameter.numberOfPriorBoxLayer;
ssdParameter.softMaxOutWidth = 1;
ssdParameter.softMaxOutHeight = classNumber;
int totalSizeOfClasReg=0;// 分类和回归一共需要的内存空间大小
for (int i = 0; i < ssdParameter.numberOfPriorBoxLayer; ++i)
{
int priorBoxNumber=0;
priorBoxNumber+=1;// aspect ratio=1
for (int j = 0; j < ssdParameter.inputAspectRatioNum[i]; j++)
{
++priorBoxNumber;
if (ssdParameter.flip[j]==1)
{
++priorBoxNumber;
}
}
priorBoxNumber = ssdParameter.minSizeNum[i] * priorBoxNumber + ssdParameter.maxSizeNum[i];
int totalPriorBoxNumber = priorBoxNumber*ssdParameter.priorBoxHeight[i] * ssdParameter.priorBoxWidth[i];
ssdParameter.softMaxInChn[i] = totalPriorBoxNumber * classNumber;
ssdParameter.softMaxOutChn += totalPriorBoxNumber;
ssdParameter.detectInputChn[i] = totalPriorBoxNumber * 4;
totalSizeOfClasReg+=(ssdParameter.softMaxInChn[i]+ssdParameter.detectInputChn[i]);
}
// DetectionOut
ssdParameter.classNum = classNumber;
ssdParameter.topK = (int)rootNode["TopK"];;
ssdParameter.keepTopK = (int)rootNode["KeepTopK"];
ssdParameter.NMSThresh = (int)((float)rootNode["NMSThreshold"]* SSD_QUANT_BASE);
ssdParameter.confThresh=(int)((float)rootNode["ConfidenceThreshold"]*SSD_QUANT_BASE);
for (int i = 0; i < ssdParameter.numberOfPriorBoxLayer ; i++)
{
int numberOfPriorBox=ssdParameter.detectInputChn[i]/(4*(ssdParameter.priorBoxHeight[i] * ssdParameter.priorBoxWidth[i]));
ssdParameter.convHeight[2*i]=ssdParameter.priorBoxHeight[i];
ssdParameter.convWidth[2*i]=ssdParameter.priorBoxWidth[i];
ssdParameter.convChannel[2*i]=numberOfPriorBox*4;
ssdParameter.convHeight[2*i+1]=ssdParameter.priorBoxHeight[i];
ssdParameter.convWidth[2*i+1]=ssdParameter.priorBoxWidth[i];
ssdParameter.convChannel[2*i+1]=numberOfPriorBox*ssdParameter.classNum;
ssdParameter.convStride[i] = SSD_ALIGN16(ssdParameter.convChannel[2*i+1] * sizeof(int)) / sizeof(int);
}
// 计算softMaxOutputData内存空间大小
int softMaxSize=0;
for(int i = 0; i < ssdParameter.concatNum; i++)
{
softMaxSize += ssdParameter.softMaxInChn[i];
}
// 计算getResultBuffer内存空间大小
int priorNum = 0;
int detectionSize = 0;
for(int i = 0; i < ssdParameter.concatNum; i++)
{
priorNum+=ssdParameter.detectInputChn[i]/SSD_COORDI_NUM;
}
detectionSize+=priorNum*SSD_COORDI_NUM;
detectionSize+=priorNum*SSD_PROPOSAL_WIDTH*2;
detectionSize+=priorNum*2;
// 计算dstRoi,classRoiNum,dstScore内存空间大小
int dstRoiSize = 0;
int dstScoreSize = 0;
int classRoiNumSize = 0;
dstRoiSize = SSD_ALIGN16(ssdParameter.classNum*ssdParameter.topK*SSD_COORDI_NUM);
dstScoreSize = SSD_ALIGN16(ssdParameter.classNum*ssdParameter.topK);
classRoiNumSize = SSD_ALIGN16(ssdParameter.classNum);
// 申请内存,并分配
int totalSize=totalSizeOfClasReg+SSD_COORDI_NUM*2*ssdParameter.softMaxOutChn+softMaxSize+detectionSize+dstRoiSize+classRoiNumSize+dstScoreSize;
ssdParameter.buffer=new int[totalSize];
int *data=ssdParameter.buffer;
memset(data,0,totalSize*sizeof(int));// 初始化0
int offset=0;
for (int i = 0; i < ssdParameter.numberOfPriorBoxLayer; ++i)
{
int *dataOfClasReg=data+offset;
ssdParameter.classification[i]=dataOfClasReg;
ssdParameter.regression[i]=dataOfClasReg+ssdParameter.softMaxInChn[i];
offset+=(ssdParameter.softMaxInChn[i]+ssdParameter.detectInputChn[i]);
}
ssdParameter.priorboxOutputData=data+totalSizeOfClasReg;
ssdParameter.softMaxOutputData=ssdParameter.priorboxOutputData+SSD_COORDI_NUM*2*ssdParameter.softMaxOutChn;
ssdParameter.getResultBuffer=ssdParameter.softMaxOutputData+softMaxSize;
ssdParameter.dstRoi=ssdParameter.getResultBuffer+detectionSize;
ssdParameter.classRoiNum=ssdParameter.dstRoi+dstRoiSize;
ssdParameter.dstScore=ssdParameter.classRoiNum+classRoiNumSize;
}
void DetectorRetinaFace::GetResult(const vector<vector<float>> &classifications,const vector<vector<float>> &regressions,vector<ResultOfDetection> &resultsOfDetection)
{
int numberOfPriorBoxLayer=ssdParameter.numberOfPriorBoxLayer;
// 类型转换
for(int i = 0; i < numberOfPriorBoxLayer; i++)
{
// 分类
vector<float> classificationOfEachLayer=classifications[i];
for(int j=0;j<classificationOfEachLayer.size();++j)
{
(ssdParameter.classification[i])[j]=classificationOfEachLayer[j]*SSD_QUANT_BASE;
}
// 回归
vector<float> regressionOfEachLayer=regressions[i];
for(int j=0;j<regressionOfEachLayer.size();++j)
{
(ssdParameter.regression[i])[j]=regressionOfEachLayer[j]*SSD_QUANT_BASE;
}
}
int* priorboxOutputData[SSD_MAX_PRIORBOX_LAYER_NUM];
int* softMaxInputData[SSD_MAX_PRIORBOX_LAYER_NUM];
int* detectionLocData[SSD_MAX_PRIORBOX_LAYER_NUM];
int* softMaxOutputData = NULL;
int* detectionOutTmpBuf = NULL;
int softMaxWidth[SSD_MAX_PRIORBOX_LAYER_NUM];
int size = 0;
int i = 0;
/////////////////////////////////// PriorBoxLayer:生成所有priorbox ///////////////////////////////////
// 分配priorboxOutputData内存空间
priorboxOutputData[0] = ssdParameter.priorboxOutputData;
for (i = 1; i < numberOfPriorBoxLayer; i++)
{
size=ssdParameter.softMaxInChn[i-1]/ssdParameter.classNum*SSD_COORDI_NUM*2;
priorboxOutputData[i] = priorboxOutputData[i - 1] + size;
}
for (i = 0; i < numberOfPriorBoxLayer; i++)
{
PriorBoxLayer(i,priorboxOutputData[i]);
}
/////////////////////////////////// SoftmaxLayer:计算所有priorbox的置信度 ///////////////////////////////////
// 分配softMaxOutputData内存空间
softMaxOutputData =ssdParameter.softMaxOutputData;
for(i = 0; i < numberOfPriorBoxLayer; i++)
{
softMaxInputData[i] = ssdParameter.classification[i];
softMaxWidth[i] = ssdParameter.convChannel[i*2+1];
}
SoftmaxLayer(softMaxWidth,softMaxInputData, softMaxOutputData);
/////////////////////////////////// DetectionOutputLayer:对网络输出值解码并经过NMS得到最后的检测结果 ///////////////////////////////////
// 分配DetectionOut内存空间
detectionOutTmpBuf = ssdParameter.getResultBuffer;
for(i = 0; i < numberOfPriorBoxLayer; i++)
{
detectionLocData[i] = ssdParameter.regression[i];
}
DetectionOutputLayer(detectionLocData, priorboxOutputData, softMaxOutputData,detectionOutTmpBuf);
// 获取最后的检测结果
CreateDetectionResults(resultsOfDetection);
}
void DetectorRetinaFace::PriorBoxLayer(int indexOfLayer,int* priorboxOutputData)
{
// 参数赋值
int priorBoxWidth=ssdParameter.priorBoxWidth[indexOfLayer];
int priorBoxHeight=ssdParameter.priorBoxHeight[indexOfLayer];
int srcImageWidth=ssdParameter.srcImageWidth;
int srcImageHeight=ssdParameter.srcImageHeight;
vector<float> priorBoxMinSize=ssdParameter.priorBoxMinSize[indexOfLayer];
int minSizeNum=ssdParameter.minSizeNum[indexOfLayer];
vector<float> priorBoxMaxSize=ssdParameter.priorBoxMaxSize[indexOfLayer];
int maxSizeNum=ssdParameter.maxSizeNum[indexOfLayer];
int flip=ssdParameter.flip[indexOfLayer];
int clip=ssdParameter.clip[indexOfLayer];
int inputAspectRatioNum=ssdParameter.inputAspectRatioNum[indexOfLayer];
vector<float> priorBoxAspectRatio=ssdParameter.priorBoxAspectRatio[indexOfLayer];
float priorBoxStepWidth=ssdParameter.priorBoxStepWidth[indexOfLayer];
float priorBoxStepHeight= ssdParameter.priorBoxStepHeight[indexOfLayer];
float offset=ssdParameter.offset;
int *priorBoxVar=ssdParameter.priorBoxVar;
int aspectRatioNum = 0;
int index = 0;
float aspectRatio[SSD_ASPECT_RATIO_NUM] = { 0 };
int numPrior = 0;
float centerX = 0;
float centerY = 0;
float boxHeight = 0;
float boxWidth = 0;
float maxBoxWidth = 0;
int i = 0;
int j = 0;
int n = 0;
int h = 0;
int w = 0;
aspectRatioNum = 0;
aspectRatio[0] = 1;
aspectRatioNum++;
for (i = 0; i < inputAspectRatioNum; i++)
{
aspectRatio[aspectRatioNum++] = priorBoxAspectRatio[i];
if (flip)
{
aspectRatio[aspectRatioNum++] = 1.0f / priorBoxAspectRatio[i];
}
}
numPrior = minSizeNum * aspectRatioNum + maxSizeNum;
index = 0;
for (h = 0; h < priorBoxHeight; h++)
{
for (w = 0; w < priorBoxWidth; w++)
{
centerX = (w + offset) * priorBoxStepWidth;
centerY = (h + offset) * priorBoxStepHeight;
for (n = 0; n < minSizeNum; n++)
{
// 首先产生宽高比为1的priorbox
boxHeight = priorBoxMinSize[n];
boxWidth = priorBoxMinSize[n];
priorboxOutputData[index++] = (int)(centerX - boxWidth * SSD_HALF);
priorboxOutputData[index++] = (int)(centerY - boxHeight * SSD_HALF);
priorboxOutputData[index++] = (int)(centerX + boxWidth * SSD_HALF);
priorboxOutputData[index++] = (int)(centerY + boxHeight * SSD_HALF);
// 对于max_size,生成宽高比为1的priorbox,宽高为sqrt(min_size * max_size)
if(maxSizeNum>0)
{
maxBoxWidth = sqrt(priorBoxMinSize[n] * priorBoxMaxSize[n]);
boxHeight = maxBoxWidth;
boxWidth = maxBoxWidth;
priorboxOutputData[index++] = (int)(centerX - boxWidth * SSD_HALF);
priorboxOutputData[index++] = (int)(centerY - boxHeight * SSD_HALF);
priorboxOutputData[index++] = (int)(centerX + boxWidth * SSD_HALF);
priorboxOutputData[index++] = (int)(centerY + boxHeight * SSD_HALF);
}
// 剩下的priorbox
for (i = 1; i < aspectRatioNum; i++)
{
boxWidth = (float)(priorBoxMinSize[n] * sqrt( aspectRatio[i] ));
boxHeight = (float)(priorBoxMinSize[n]/sqrt( aspectRatio[i] ));
priorboxOutputData[index++] = (int)(centerX - boxWidth * SSD_HALF);
priorboxOutputData[index++] = (int)(centerY - boxHeight * SSD_HALF);
priorboxOutputData[index++] = (int)(centerX + boxWidth * SSD_HALF);
priorboxOutputData[index++] = (int)(centerY + boxHeight * SSD_HALF);
}
}
}
}
// 越界处理 [0, srcImageWidth] & [0, srcImageHeight]
if (clip)
{
for (i = 0; i < (int)(priorBoxWidth * priorBoxHeight * SSD_COORDI_NUM*numPrior / 2); i++)
{
priorboxOutputData[2 * i] = SSD_MIN((int)SSD_MAX(priorboxOutputData[2 * i], 0), srcImageWidth);
priorboxOutputData[2 * i + 1] = SSD_MIN((int)SSD_MAX(priorboxOutputData[2 * i + 1], 0), srcImageHeight);
}
}
// var
for (h = 0; h < priorBoxHeight; h++)
{
for (w = 0; w < priorBoxWidth; w++)
{
for (i = 0; i < numPrior; i++)
{
for (j = 0; j < SSD_COORDI_NUM; j++)
{
priorboxOutputData[index++] = (int)priorBoxVar[j];
}
}
}
}
}
void DetectorRetinaFace::SoftmaxLayer(int softMaxWidth[],int* softMaxInputData[], int* softMaxOutputData)
{
// 参数赋值
int softMaxInHeight=ssdParameter.softMaxInHeight;
int *softMaxInChn=ssdParameter.softMaxInChn;
int concatNum=ssdParameter.concatNum;
int *convStride=ssdParameter.convStride;
int* inputData = NULL;
int* outputTmp = NULL;
int outerNum = 0;
int innerNum = 0;
int inputChannel = 0;
int i = 0;
int concatCnt = 0;
int stride = 0;
int skip = 0;
int left = 0;
outputTmp = softMaxOutputData;
for (concatCnt = 0; concatCnt < concatNum; concatCnt++)
{
inputData = softMaxInputData[concatCnt];
stride = convStride[concatCnt];
inputChannel = softMaxInChn[concatCnt];
outerNum = inputChannel / softMaxInHeight;
innerNum = softMaxInHeight;
skip = softMaxWidth[concatCnt] / innerNum;
left = stride - softMaxWidth[concatCnt];
for (i = 0; i < outerNum; i++)
{
ComputeSoftMax(inputData, (int)innerNum,outputTmp);
inputData += innerNum;
outputTmp += innerNum;
}
}
}
void DetectorRetinaFace::ComputeSoftMax(int* src, int size, int* dst)
{
int max = 0;
int sum = 0;
int i = 0;
for (i = 0; i < size; ++i)
{
if (max < src[i])
{
max = src[i];
}
}
for (i = 0; i < size; ++i)
{
dst[i] = (int)(SSD_QUANT_BASE* exp((float)(src[i] - max) / SSD_QUANT_BASE));
sum += dst[i];
}
for (i = 0; i < size; ++i)
{
dst[i] = (int)(((float)dst[i] / (float)sum) * SSD_QUANT_BASE);
}
}
void DetectorRetinaFace::DetectionOutputLayer(int* allLocPreds[], int* allPriorBoxes[],int* confScores, int* assistMemPool)
{
// 参数赋值
int concatNum=ssdParameter.concatNum;
int confThresh=ssdParameter.confThresh;
int classNum=ssdParameter.classNum;
int topK=ssdParameter.topK;
int keepTopK=ssdParameter.keepTopK;
int NMSThresh=ssdParameter.NMSThresh;
int *detectInputChn=ssdParameter.detectInputChn;
int* dstScoreSrc=ssdParameter.dstScore;
int* dstBboxSrc=ssdParameter.dstRoi;
int* roiOutCntSrc=ssdParameter.classRoiNum;
int* locPreds = NULL;
int* priorBoxes = NULL;
int* priorVar = NULL;
int* allDecodeBoxes = NULL;
int* dstScore = NULL;
int* dstBbox = NULL;
int* classRoiNum = NULL;
int roiOutCnt = 0;
int* singleProposal = NULL;
int* afterTopK = NULL;
QuickSortStack* stack = NULL;
int priorNum = 0;
int numPredsPerClass = 0;
float priorWidth = 0;
float priorHeight = 0;
float priorCenterX = 0;
float priorCenterY = 0;
float decodeBoxCenterX = 0;
float decodeBoxCenterY = 0;
float decodeBoxWidth = 0;
float decodeBoxHeight = 0;
int srcIdx = 0;
int afterFilter = 0;
int afterTopK2 = 0;
int keepCnt = 0;
int i = 0;
int j = 0;
int offset = 0;
priorNum = 0;
for (i = 0; i < concatNum; i++)
{
priorNum += detectInputChn[i] / SSD_COORDI_NUM;
}
// 缓存
allDecodeBoxes = assistMemPool;
singleProposal = allDecodeBoxes + priorNum * SSD_COORDI_NUM;
afterTopK = singleProposal + SSD_PROPOSAL_WIDTH * priorNum;
stack = (QuickSortStack*)(afterTopK + priorNum * SSD_PROPOSAL_WIDTH);
srcIdx = 0;
for (i = 0; i < concatNum; i++)
{
// 回归预测值
locPreds = allLocPreds[i];
numPredsPerClass = detectInputChn[i] / SSD_COORDI_NUM;
// 获取priorbox
priorBoxes = allPriorBoxes[i];
priorVar = priorBoxes + numPredsPerClass*SSD_COORDI_NUM;
for (j = 0; j < numPredsPerClass; j++)
{
priorWidth = (float)(priorBoxes[j*SSD_COORDI_NUM+2] - priorBoxes[j*SSD_COORDI_NUM]);
priorHeight = (float)(priorBoxes[j*SSD_COORDI_NUM+3] - priorBoxes[j*SSD_COORDI_NUM + 1]);
priorCenterX = (priorBoxes[j*SSD_COORDI_NUM+2] + priorBoxes[j*SSD_COORDI_NUM])*SSD_HALF;
priorCenterY = (priorBoxes[j*SSD_COORDI_NUM+3] + priorBoxes[j*SSD_COORDI_NUM+1])*SSD_HALF;
decodeBoxCenterX = ((float)priorVar[j*SSD_COORDI_NUM]/SSD_QUANT_BASE)*
((float)locPreds[j*SSD_COORDI_NUM]/SSD_QUANT_BASE)*priorWidth+priorCenterX;
decodeBoxCenterY = ((float)priorVar[j*SSD_COORDI_NUM+1]/SSD_QUANT_BASE)*
((float)locPreds[j*SSD_COORDI_NUM+1]/SSD_QUANT_BASE)*priorHeight+priorCenterY;
decodeBoxWidth = exp(((float)priorVar[j*SSD_COORDI_NUM+2]/SSD_QUANT_BASE)*
((float)locPreds[j*SSD_COORDI_NUM+2]/SSD_QUANT_BASE))*priorWidth;
decodeBoxHeight = exp(((float)priorVar[j*SSD_COORDI_NUM+3]/SSD_QUANT_BASE)*
((float)locPreds[j*SSD_COORDI_NUM+3]/SSD_QUANT_BASE))*priorHeight;
allDecodeBoxes[srcIdx++] = (int)(decodeBoxCenterX - decodeBoxWidth * SSD_HALF);
allDecodeBoxes[srcIdx++] = (int)(decodeBoxCenterY - decodeBoxHeight * SSD_HALF);
allDecodeBoxes[srcIdx++] = (int)(decodeBoxCenterX + decodeBoxWidth * SSD_HALF);
allDecodeBoxes[srcIdx++] = (int)(decodeBoxCenterY + decodeBoxHeight * SSD_HALF);
}
}
// 对每一类做NMS
afterTopK2 = 0;
for (i = 0; i < classNum; i++)
{
if(i==0)
continue;
for (j = 0; j < priorNum; j++)
{
singleProposal[j * SSD_PROPOSAL_WIDTH] = allDecodeBoxes[j * SSD_COORDI_NUM];
singleProposal[j * SSD_PROPOSAL_WIDTH + 1] = allDecodeBoxes[j * SSD_COORDI_NUM + 1];
singleProposal[j * SSD_PROPOSAL_WIDTH + 2] = allDecodeBoxes[j * SSD_COORDI_NUM + 2];
singleProposal[j * SSD_PROPOSAL_WIDTH + 3] = allDecodeBoxes[j * SSD_COORDI_NUM + 3];
singleProposal[j * SSD_PROPOSAL_WIDTH + 4] = confScores[j*classNum + i];
singleProposal[j * SSD_PROPOSAL_WIDTH + 5] = 0;
}
QuickSort(singleProposal, 0, priorNum - 1, stack,topK);
afterFilter = (priorNum < topK) ? priorNum : topK;
NonMaxSuppression(singleProposal, afterFilter, NMSThresh, afterFilter);
roiOutCnt = 0;
dstScore = (int*)dstScoreSrc;
dstBbox = (int*)dstBboxSrc;
classRoiNum = (int*)roiOutCntSrc;
dstScore += (int)afterTopK2;
dstBbox += (int)(afterTopK2 * SSD_COORDI_NUM);
for (j = 0; j < topK; j++)
{
if (singleProposal[j * SSD_PROPOSAL_WIDTH + 5] == 0 &&
singleProposal[j * SSD_PROPOSAL_WIDTH + 4] > (int)confThresh)
{
dstScore[roiOutCnt] = singleProposal[j * 6 + 4];
dstBbox[roiOutCnt * SSD_COORDI_NUM] = singleProposal[j * SSD_PROPOSAL_WIDTH];
dstBbox[roiOutCnt * SSD_COORDI_NUM + 1] = singleProposal[j * SSD_PROPOSAL_WIDTH + 1];
dstBbox[roiOutCnt * SSD_COORDI_NUM + 2] = singleProposal[j * SSD_PROPOSAL_WIDTH + 2];
dstBbox[roiOutCnt * SSD_COORDI_NUM + 3] = singleProposal[j * SSD_PROPOSAL_WIDTH + 3];
roiOutCnt++;
}
}
classRoiNum[i] = (int)roiOutCnt;
afterTopK2 += roiOutCnt;
}
keepCnt = 0;
offset = 0;
if (afterTopK2 > keepTopK)
{
offset = classRoiNum[0];
for (i = 1; i < classNum; i++)
{
dstScore = (int*)dstScoreSrc;
dstBbox = (int*)dstBboxSrc;
classRoiNum = (int*)roiOutCntSrc;
dstScore += (int)(offset);
dstBbox += (int)(offset * SSD_COORDI_NUM);
for (j = 0; j < (int)classRoiNum[i]; j++)
{
afterTopK[keepCnt * SSD_PROPOSAL_WIDTH] = dstBbox[j * SSD_COORDI_NUM];
afterTopK[keepCnt * SSD_PROPOSAL_WIDTH + 1] = dstBbox[j * SSD_COORDI_NUM + 1];
afterTopK[keepCnt * SSD_PROPOSAL_WIDTH + 2] = dstBbox[j * SSD_COORDI_NUM + 2];
afterTopK[keepCnt * SSD_PROPOSAL_WIDTH + 3] = dstBbox[j * SSD_COORDI_NUM + 3];
afterTopK[keepCnt * SSD_PROPOSAL_WIDTH + 4] = dstScore[j];
afterTopK[keepCnt * SSD_PROPOSAL_WIDTH + 5] = i;
keepCnt++;
}
offset = offset + classRoiNum[i];
}
QuickSort(afterTopK, 0, keepCnt - 1, stack,keepCnt);
offset = 0;
offset = classRoiNum[0];
for (i = 1; i < classNum; i++)
{
roiOutCnt = 0;
dstScore = (int*)dstScoreSrc;
dstBbox = (int*)dstBboxSrc;
classRoiNum = (int*)roiOutCntSrc;
dstScore += (int)(offset);
dstBbox += (int)(offset * SSD_COORDI_NUM);
for (j = 0; j < keepTopK; j++)
{
if (afterTopK[j * SSD_PROPOSAL_WIDTH + 5] == i)
{
dstScore[roiOutCnt] = afterTopK[j * SSD_PROPOSAL_WIDTH + 4];
dstBbox[roiOutCnt * SSD_COORDI_NUM] = afterTopK[j * SSD_PROPOSAL_WIDTH];
dstBbox[roiOutCnt * SSD_COORDI_NUM + 1] = afterTopK[j * SSD_PROPOSAL_WIDTH + 1];
dstBbox[roiOutCnt * SSD_COORDI_NUM + 2] = afterTopK[j * SSD_PROPOSAL_WIDTH + 2];
dstBbox[roiOutCnt * SSD_COORDI_NUM + 3] = afterTopK[j * SSD_PROPOSAL_WIDTH + 3];
roiOutCnt++;
}
}
classRoiNum[i] = (int)roiOutCnt;
offset += roiOutCnt;
}
}
}
vector<float> DetectorRetinaFace::PermuteLayer(const vector<float> &data,int width,int height,int channels)
{
vector<float> result(data.size());
int index=0;
int channelStep=width*height;
for(int h=0; h<height;h++)
{
for(int w=0;w<width;w++)
{
for(int c = 0;c < channels;c++)
{
result[index++] = data[c*channelStep + h*width + w];
}
}
}
return result;
}
void DetectorRetinaFace::QuickSort(int* src,int low, int high, QuickSortStack *stack,int maxNum)
{
int i = low;
int j = high;
int top = 0;
int keyConfidence = src[SSD_PROPOSAL_WIDTH * low + 4];
stack[top].min = low;
stack[top].max = high;
while(top > -1)
{
low = stack[top].min;
high = stack[top].max;
i = low;
j = high;
top--;
keyConfidence = src[SSD_PROPOSAL_WIDTH * low + 4];
while(i < j)
{
while((i < j) && (keyConfidence > src[j * SSD_PROPOSAL_WIDTH + 4]))
{
j--;
}
if(i < j)
{
Swap(&src[i*SSD_PROPOSAL_WIDTH], &src[j*SSD_PROPOSAL_WIDTH]);
i++;
}
while((i < j) && (keyConfidence < src[i*SSD_PROPOSAL_WIDTH + 4]))
{
i++;
}
if(i < j)
{
Swap(&src[i*SSD_PROPOSAL_WIDTH], &src[j*SSD_PROPOSAL_WIDTH]);
j--;
}
}
if(low <= maxNum)
{
if(low < i-1)
{
top++;
stack[top].min = low;
stack[top].max = i-1;
}
if(high > i+1)
{
top++;
stack[top].min = i+1;
stack[top].max = high;
}
}
}
}
void DetectorRetinaFace::NonMaxSuppression( int* proposals, int anchorsNum,int NMSThresh,int maxRoiNum)
{
int xMin1 = 0;
int yMin1 = 0;
int xMax1 = 0;
int yMax1 = 0;
int xMin2 = 0;
int yMin2 = 0;
int xMax2 = 0;
int yMax2 = 0;
int areaTotal = 0;
int areaInter = 0;
int i = 0;
int j = 0;
int num = 0;
int NoOverlap = 1;
for (i = 0; i < anchorsNum && num < maxRoiNum; i++)
{
if( proposals[SSD_PROPOSAL_WIDTH*i+5] == 0 )
{
num++;
xMin1 = proposals[SSD_PROPOSAL_WIDTH*i];
yMin1 = proposals[SSD_PROPOSAL_WIDTH*i+1];
xMax1 = proposals[SSD_PROPOSAL_WIDTH*i+2];
yMax1 = proposals[SSD_PROPOSAL_WIDTH*i+3];
for(j= i+1;j< anchorsNum; j++)
{
if( proposals[SSD_PROPOSAL_WIDTH*j+5] == 0 )
{
xMin2 = proposals[SSD_PROPOSAL_WIDTH*j];
yMin2 = proposals[SSD_PROPOSAL_WIDTH*j+1];
xMax2 = proposals[SSD_PROPOSAL_WIDTH*j+2];
yMax2 = proposals[SSD_PROPOSAL_WIDTH*j+3];
NoOverlap = (xMin2>xMax1)||(xMax2<xMin1)||(yMin2>yMax1)||(yMax2<yMin1);
if(NoOverlap)
{
continue;
}
ComputeOverlap(xMin1, yMin1, xMax1, yMax1, xMin2, yMin2, xMax2, yMax2, &areaTotal, &areaInter);
if(areaInter*SSD_QUANT_BASE > ((int)NMSThresh*areaTotal))
{
if( proposals[SSD_PROPOSAL_WIDTH*i+4] >= proposals[SSD_PROPOSAL_WIDTH*j+4] )
{
proposals[SSD_PROPOSAL_WIDTH*j+5] = 1;
}
else
{
proposals[SSD_PROPOSAL_WIDTH*i+5] = 1;
}
}
}
}
}
}
}
void DetectorRetinaFace::ComputeOverlap(int xMin1, int yMin1, int xMax1, int yMax1, int xMin2,
int yMin2, int xMax2, int yMax2, int* areaSum, int* areaInter)
{
int inter = 0;
int s32Total = 0;
int xMin = 0;
int yMin = 0;
int xMax = 0;
int yMax = 0;
int area1 = 0;
int area2 = 0;
int interWidth = 0;
int interHeight = 0;
xMin = SSD_MAX(xMin1, xMin2);
yMin = SSD_MAX(yMin1, yMin2);
xMax = SSD_MIN(xMax1, xMax2);
yMax = SSD_MIN(yMax1, yMax2);
interWidth = xMax - xMin + 1;
interHeight = yMax - yMin + 1;
interWidth = ( interWidth >= 0 ) ? interWidth : 0;
interHeight = ( interHeight >= 0 ) ? interHeight : 0;
inter = interWidth * interHeight;
area1 = (xMax1 - xMin1 + 1) * (yMax1 - yMin1 + 1);
area2 = (xMax2 - xMin2 + 1) * (yMax2 - yMin2 + 1);
s32Total = area1 + area2 - inter;
*areaSum = s32Total;
*areaInter = inter;
}
void DetectorRetinaFace::Swap(int* src1, int* src2)
{
int i = 0;
int temp = 0;
for( i = 0; i < SSD_PROPOSAL_WIDTH; i++ )
{
temp = src1[i];
src1[i] = src2[i];
src2[i] = temp;
}
}
void DetectorRetinaFace::CreateDetectionResults(std::vector<ResultOfDetection> &resultsOfDetection)
{
// 参数赋值
int* score=ssdParameter.dstScore;
int* roi=ssdParameter.dstRoi;
int* classRoiNum=ssdParameter.classRoiNum;
float printResultThresh=((float)ssdParameter.confThresh)/SSD_QUANT_BASE;
int classNum=ssdParameter.classNum;
int i = 0, j = 0;
int roiNumBias = 0;
int scoreBias = 0;
int bboxBias = 0;
float score2 = 0.0f;
int xMin = 0,yMin= 0,xMax = 0,yMax = 0;
roiNumBias += classRoiNum[0];
for (i = 1; i < classNum; i++)
{
scoreBias = roiNumBias;
bboxBias = roiNumBias * SSD_COORDI_NUM;
if((float)score[scoreBias] / SSD_QUANT_BASE >=
printResultThresh && classRoiNum[i]!=0)
{
//printf("==== The %d th class box info====\n", i);
}
for (j = 0; j < (int)classRoiNum[i]; j++)
{
score2 = (float)score[scoreBias + j] / SSD_QUANT_BASE;
if (score2 < printResultThresh)
{
break;
}
xMin = roi[bboxBias + j*SSD_COORDI_NUM];
yMin = roi[bboxBias + j*SSD_COORDI_NUM + 1];
xMax = roi[bboxBias + j*SSD_COORDI_NUM + 2];
yMax = roi[bboxBias + j*SSD_COORDI_NUM + 3];
ResultOfDetection result;
result.boundingBox.x=xMin;
result.boundingBox.y=yMin;
result.boundingBox.width=xMax-xMin+1;
result.boundingBox.height=yMax-yMin+1;
result.classID=i;
result.confidence=score2;
resultsOfDetection.push_back(result);
}
roiNumBias += classRoiNum[i];
}
}
}
#include <DetectorSSD.h>
#include <migraphx/onnx.hpp>
#include <migraphx/gpu/target.hpp>
#include <migraphx/gpu/hip.hpp>
#include <migraphx/generate.hpp>
#include <migraphx/quantization.hpp>
#include <opencv2/dnn.hpp>
#include <CommonUtility.h>
#include <Filesystem.h>
#include <SimpleLog.h>
#include<cmath>
#include <hip/hip_runtime.h>
#include "hip/hip_runtime.h"
using namespace cv::dnn;
namespace migraphxSamples
{
#define SSD_QUANT_BASE 4096 // 基数
#define SSD_COORDI_NUM 4 // 坐标个数(x1,y1,x2,y2)
#define SSD_PROPOSAL_WIDTH 6
#define SSD_HALF 0.5
#define SSD_ASPECT_RATIO_NUM 6 // 默认最大的宽高比个数
#define SSD_MAX(a,b) (((a) > (b)) ? (a) : (b))
#define SSD_MIN(a,b) (((a) < (b)) ? (a) : (b))
// 16字节对齐
#define SSD_ALIGN_16 16
#define SSD_ALIGN16(number) ((number + SSD_ALIGN_16-1) / SSD_ALIGN_16*SSD_ALIGN_16)
DetectorSSD::DetectorSSD():logFile(NULL)
{
}
DetectorSSD::~DetectorSSD()
{
configurationFile.release();
// 释放SSD参数的内存空间
delete[] ssdParameter.buffer;
}
ErrorCode DetectorSSD::Initialize(InitializationParameterOfDetector initializationParameterOfDetector)
{
// 初始化(获取日志文件,加载配置文件等)
ErrorCode errorCode=DoCommonInitialization(initializationParameterOfDetector);
if(errorCode!=SUCCESS)
{
LOG_ERROR(logFile,"fail to DoCommonInitialization\n");
return errorCode;
}
LOG_INFO(logFile,"succeed to DoCommonInitialization\n");
// 获取配置文件参数
FileNode netNode = configurationFile["DetectorSSD"];
string modelPath=initializationParameter.parentPath+(string)netNode["ModelPath"];
scale=(float)netNode["Scale"];
meanValue.val[0]=(float)netNode["MeanValue1"];
meanValue.val[1]=(float)netNode["MeanValue2"];
meanValue.val[2]=(float)netNode["MeanValue3"];
swapRB=(bool)(int)netNode["SwapRB"];
crop=(bool)(int)netNode["Crop"];
useInt8=(bool)(int)netNode["UseInt8"];
useFP16=(bool)(int)netNode["UseFP16"];
// 加载模型
if(Exists(modelPath)==false)
{
LOG_ERROR(logFile,"%s not exist!\n",modelPath.c_str());
return MODEL_NOT_EXIST;
}
net = migraphx::parse_onnx(modelPath);
LOG_INFO(logFile,"succeed to load model: %s\n",GetFileName(modelPath).c_str());
// 获取模型输入属性
std::pair<std::string, shape> inputAttribute=*(net.get_parameter_shapes().begin());
inputName=inputAttribute.first;
inputShape=inputAttribute.second;
inputSize=Size(inputShape.lens()[3],inputShape.lens()[2]);// NCHW
// 设置模型为GPU模式
migraphx::target gpuTarget = migraphx::gpu::target{};
// 量化
if(useInt8)
{
// 创建量化校准数据,建议使用测试集中的多张典型图像
cv::Mat srcImage=imread("../Resource/Images/FaceDetect_2.jpg",1);
std::vector<cv::Mat> srcImages;
for(int i=0;i<inputShape.lens()[0];++i)
{
srcImages.push_back(srcImage);
}
cv::Mat inputBlob;
blobFromImages(srcImages,
inputBlob,
scale,
inputSize,
meanValue,
swapRB,
false);
migraphx::parameter_map inputData;
inputData[inputName]= migraphx::argument{inputShape, (float*)inputBlob.data};
std::vector<migraphx::parameter_map> calibrationData = {inputData};
// INT8量化
migraphx::quantize_int8(net, gpuTarget, calibrationData);
}
if(useFP16)
{
migraphx::quantize_fp16(net);
}
// 编译模型
migraphx::compile_options options;
options.device_id=0; // 设置GPU设备,默认为0号设备(>=1.2版本中支持)
#ifdef DMA
options.offload_copy=false; // 设置offload_copy
#else
options.offload_copy=true; // 设置offload_copy
#endif
net.compile(gpuTarget,options);
LOG_INFO(logFile,"succeed to compile model: %s\n",GetFileName(modelPath).c_str());
// Run once by itself
#ifdef DMA
ParameterMap=CreateParameterMap(net);
net.eval(ParameterMap);
#else
migraphx::parameter_map inputData;
inputData[inputName]=migraphx::generate_argument(inputShape);
net.eval(inputData);
#endif
#ifdef DMA
hipMalloc((void**)&preprocess_Image, inputSize.height * inputSize.width * 3 * sizeof(float));
#endif
// log
LOG_INFO(logFile,"InputSize:%dx%d\n",inputSize.width,inputSize.height);
LOG_INFO(logFile,"InputName:%s\n",inputName.c_str());
LOG_INFO(logFile,"Scale:%.6f\n",scale);
LOG_INFO(logFile,"Mean:%.2f,%.2f,%.2f\n",meanValue.val[0],meanValue.val[1],meanValue.val[2]);
LOG_INFO(logFile,"SwapRB:%d\n",(int)swapRB);
LOG_INFO(logFile,"Crop:%d\n",(int)crop);
LOG_INFO(logFile,"UseInt8:%d\n",(int)useInt8);
LOG_INFO(logFile,"UseFP16:%d\n",(int)useFP16);
// 读取SSD 参数
GetSSDParameter();
return SUCCESS;
}
ErrorCode DetectorSSD::DoCommonInitialization(InitializationParameterOfDetector initializationParameterOfDetector)
{
initializationParameter=initializationParameterOfDetector;
// 获取日志文件
logFile=LogManager::GetInstance()->GetLogFile(initializationParameter.logName);
// 加载配置文件
std::string configFilePath=initializationParameter.configFilePath;
if(!Exists(configFilePath))
{
LOG_ERROR(logFile, "no configuration file!\n");
return CONFIG_FILE_NOT_EXIST;
}
if(!configurationFile.open(configFilePath, FileStorage::READ))
{
LOG_ERROR(logFile, "fail to open configuration file\n");
return FAIL_TO_OPEN_CONFIG_FILE;
}
LOG_INFO(logFile, "succeed to open configuration file\n");
// 修改父路径
std::string &parentPath = initializationParameter.parentPath;
if (!parentPath.empty())
{
if(!IsPathSeparator(parentPath[parentPath.size() - 1]))
{
parentPath+=PATH_SEPARATOR;
}
}
return SUCCESS;
}
#ifdef DMA
__global__ void convert_bgrp_to_rgb_and_normalization_ssd(unsigned char* srcImage, float* outImage, int width, int height)
{
int x = blockIdx.x * blockDim.x + threadIdx.x;
if (x < width * height)
{
unsigned char r = srcImage[x + width * height * 2];
unsigned char g = srcImage[x + width * height * 1];
unsigned char b = srcImage[x + width * height * 0];
float sum = 255.0;
outImage[x + width * height * 0] = r / sum;
outImage[x + width * height * 1] = g / sum;
outImage[x + width * height * 2] = b / sum;
}
}
__global__ void convert_yuv420p_to_rgb_and_normalization_ssd(unsigned char* srcImage, float* outImage, int width, int height)
{
int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x >= width || y >= height)
return;
int index = y * width + x;
int yIndex = index;
int uIndex = (y / 2) * (width / 2) + (x / 2) + width * height;
int vIndex = (y / 2) * (width / 2) + (x / 2) + width * height * 5 / 4;
unsigned char yValue = srcImage[yIndex];
unsigned char uValue = srcImage[uIndex];
unsigned char vValue = srcImage[vIndex];
int r = yValue + 1.370705 * (vValue - 128);
int g = yValue - 0.698001 * (vValue - 128) - 0.337633 * (uValue - 128);
int b = yValue + 1.732446 * (uValue - 128);
outImage[index + 0] = (float)r;
outImage[index + 1] = (float)g;
outImage[index + 2] = (float)b;
}
__global__ void convert_rgba_to_rgb_and_normalization_ssd(unsigned char* srcImage, float* outImage, int width, int height)
{
int x = blockIdx.x * blockDim.x + threadIdx.x;
if (x < width * height)
{
unsigned char r = srcImage[x * 4 + 0];
unsigned char g = srcImage[x * 4 + 1];
unsigned char b = srcImage[x * 4 + 2];
float sum = 1.0;
outImage[x + width * height * 0] = r / sum;
outImage[x + width * height * 1] = g / sum;
outImage[x + width * height * 2] = b / sum;
}
}
#endif
ErrorCode DetectorSSD::Detect(const cv::Mat &srcImage,std::vector<ResultOfDetection> &resultsOfDetection)
{
if(srcImage.empty()||srcImage.type()!=CV_8UC3)
{
LOG_ERROR(logFile, "image error!\n");
return IMAGE_ERROR;
}
// 预处理并转换为NCHW
cv::Mat inputBlob;
blobFromImage(srcImage,
inputBlob,
scale,
inputSize,
meanValue,
swapRB,
false);
// 输入数据
migraphx::parameter_map inputData;
inputData[inputName]= migraphx::argument{inputShape, (float*)inputBlob.data};
// 推理
std::vector<migraphx::argument> inferenceResults=net.eval(inputData);
vector<vector<float>> regressions;
vector<vector<float>> classifications;
for(int i=0;i<ssdParameter.numberOfPriorBoxLayer;++i) // 执行Permute操作
{
int numberOfPriorBox=ssdParameter.detectInputChn[i]/(4*(ssdParameter.priorBoxHeight[i] * ssdParameter.priorBoxWidth[i]));
// 回归
std::vector<float> regression;
migraphx::argument result0 = inferenceResults[2*i];
result0.visit([&](auto output) { regression.assign(output.begin(), output.end()); });
regression=PermuteLayer(regression,ssdParameter.priorBoxWidth[i],ssdParameter.priorBoxHeight[i],numberOfPriorBox*4);
regressions.push_back(regression);
// 分类
std::vector<float> classification;
migraphx::argument result1 = inferenceResults[2*i+1];
result1.visit([&](auto output) { classification.assign(output.begin(), output.end()); });
classification=PermuteLayer(classification,ssdParameter.priorBoxWidth[i],ssdParameter.priorBoxHeight[i],numberOfPriorBox*ssdParameter.classNum);
classifications.push_back(classification);
}
// 对推理结果进行处理,得到最后SSD检测的结果
GetResult(classifications,regressions,resultsOfDetection);
// 转换到原图坐标
for(int i=0;i<resultsOfDetection.size();++i)
{
float ratioOfWidth=(1.0*srcImage.cols)/inputSize.width;
float ratioOfHeight=(1.0*srcImage.rows)/inputSize.height;
resultsOfDetection[i].boundingBox.x*=ratioOfWidth;
resultsOfDetection[i].boundingBox.width*=ratioOfWidth;
resultsOfDetection[i].boundingBox.y*=ratioOfHeight;
resultsOfDetection[i].boundingBox.height*=ratioOfHeight;
}
// 按照置信度排序
sort(resultsOfDetection.begin(), resultsOfDetection.end(),CompareConfidence);
return SUCCESS;
}
#ifdef DMA
ErrorCode DetectorSSD::Detect(DCU_Frame &srcImage, std::vector<ResultOfDetection> &resultsOfDetection)
{
int block_size = 256;
int num_blocks = (srcImage.width * srcImage.height + block_size - 1) / block_size;
if(srcImage.format == AV_PIX_FMT_BGRP)
{
convert_bgrp_to_rgb_and_normalization_ssd<<<num_blocks, block_size>>>(srcImage.dcu_data, preprocess_Image, srcImage.width, srcImage.height);
}
if(srcImage.format == AV_PIX_FMT_YUV420P)
{
convert_yuv420p_to_rgb_and_normalization_ssd<<<num_blocks, block_size>>>(srcImage.dcu_data, preprocess_Image, srcImage.width, srcImage.height);
}
if(srcImage.format == AV_PIX_FMT_RGBA)
{
convert_rgba_to_rgb_and_normalization_ssd<<<num_blocks, block_size>>>(srcImage.dcu_data, preprocess_Image, srcImage.width, srcImage.height);
}
ParameterMap[inputName] = migraphx::argument{inputShape, preprocess_Image};
std::vector<migraphx::argument> inferenceResults = net.eval(ParameterMap);
vector<vector<float>> regressions;
vector<vector<float>> classifications;
for(int i=0;i<ssdParameter.numberOfPriorBoxLayer;++i)
{
int numberOfPriorBox=ssdParameter.detectInputChn[i]/(4*(ssdParameter.priorBoxHeight[i] * ssdParameter.priorBoxWidth[i]));
std::vector<float> regression;
migraphx::argument result0 = inferenceResults[2*i];
result0.visit([&](auto output) { regression.assign(output.begin(), output.end()); });
regression=PermuteLayer(regression,ssdParameter.priorBoxWidth[i],ssdParameter.priorBoxHeight[i],numberOfPriorBox*4);
regressions.push_back(regression);
std::vector<float> classification;
migraphx::argument result1 = inferenceResults[2*i+1];
result1.visit([&](auto output) { classification.assign(output.begin(), output.end()); });
classification=PermuteLayer(classification,ssdParameter.priorBoxWidth[i],ssdParameter.priorBoxHeight[i],numberOfPriorBox*ssdParameter.classNum);
classifications.push_back(classification);
}
GetResult(classifications,regressions,resultsOfDetection);
for(int i=0;i<resultsOfDetection.size();++i)
{
float ratioOfWidth=(1.0*srcImage.width)/inputSize.width;
float ratioOfHeight=(1.0*srcImage.height)/inputSize.height;
resultsOfDetection[i].boundingBox.x*=ratioOfWidth;
resultsOfDetection[i].boundingBox.width*=ratioOfWidth;
resultsOfDetection[i].boundingBox.y*=ratioOfHeight;
resultsOfDetection[i].boundingBox.height*=ratioOfHeight;
}
sort(resultsOfDetection.begin(), resultsOfDetection.end(),CompareConfidence);
return SUCCESS;
}
#endif
void DetectorSSD::GetSSDParameter()
{
FileNode rootNode = configurationFile["DetectorSSD"];
ssdParameter.numberOfPriorBoxLayer=(int)rootNode["PriorBoxLayerNumber"];
ssdParameter.srcImageHeight = inputSize.height;
ssdParameter.srcImageWidth = inputSize.width;
// MinSize,MaxSize
ssdParameter.priorBoxMinSize.resize(ssdParameter.numberOfPriorBoxLayer);
ssdParameter.priorBoxMaxSize.resize(ssdParameter.numberOfPriorBoxLayer);
for (int i = 0; i < ssdParameter.numberOfPriorBoxLayer; ++i)
{
for (int j = 1; j < 4; j++)
{
if (i == 0) {
if (j == 1) {
ssdParameter.priorBoxMinSize[i].push_back((float)rootNode["MinSize11"]);}
if (j == 2) {
ssdParameter.priorBoxMinSize[i].push_back((float)rootNode["MinSize12"]);}
if (j == 3) {
ssdParameter.priorBoxMinSize[i].push_back((float)rootNode["MinSize13"]);}
} else if (i == 1) {
if (j == 1) {
ssdParameter.priorBoxMinSize[i].push_back((float)rootNode["MinSize21"]);}
if (j == 2) {
ssdParameter.priorBoxMinSize[i].push_back((float)rootNode["MinSize22"]);}
} else if (i == 2) {
if (j == 1) {
ssdParameter.priorBoxMinSize[i].push_back((float)rootNode["MinSize31"]);}
if (j == 2) {
ssdParameter.priorBoxMinSize[i].push_back((float)rootNode["MinSize32"]);}
} else if (i == 3) {
if (j == 1) {
ssdParameter.priorBoxMinSize[i].push_back((float)rootNode["MinSize41"]);}
if (j == 2) {
ssdParameter.priorBoxMinSize[i].push_back((float)rootNode["MinSize42"]);}
if (j == 3) {
ssdParameter.priorBoxMinSize[i].push_back((float)rootNode["MinSize43"]);}
}
}
}
// MinSizeNumber,MaxSizeNumber
for (int i = 0; i < ssdParameter.numberOfPriorBoxLayer; ++i)
{
ssdParameter.minSizeNum[i] = ssdParameter.priorBoxMinSize[i].size();
ssdParameter.maxSizeNum[i] = ssdParameter.priorBoxMaxSize[i].size();;
}
// Flip,Clip
for (int i = 0; i < ssdParameter.numberOfPriorBoxLayer; ++i)
{
// Flip
ssdParameter.flip[i] = 0;
// Clip
ssdParameter.clip[i] = 0;
}
// AspectRatio
ssdParameter.priorBoxAspectRatio.resize(ssdParameter.numberOfPriorBoxLayer);
for (int i = 0; i < ssdParameter.numberOfPriorBoxLayer; ++i)
{
while(true)
{
//sprintf(nodeName, "AspectRatio%d%d", (i + 1),++j);
FileNode aspectRatioNode = rootNode["AspectRatio"];
if(aspectRatioNode.empty())
{
break;
}
else
{
ssdParameter.priorBoxAspectRatio[i].push_back((float)rootNode["AspectRatio"]);
}
}
}
// aspect ratio number
for (int i = 0; i < ssdParameter.numberOfPriorBoxLayer; ++i)
{
ssdParameter.inputAspectRatioNum[i] = ssdParameter.priorBoxAspectRatio[i].size();
}
// PriorBoxStep
for (int i = 0; i < ssdParameter.numberOfPriorBoxLayer; ++i)
{
// width
ssdParameter.priorBoxStepWidth[i] = (int)(8 * pow(2, i));
// height
ssdParameter.priorBoxStepHeight[i] = (int)(8 * pow(2, i));
}
// PriorBoxWidth,PriorBoxHeight
for (int i = 0; i < ssdParameter.numberOfPriorBoxLayer; ++i)
{
ssdParameter.priorBoxWidth[i] = ssdParameter.srcImageWidth/ssdParameter.priorBoxStepWidth[i];
ssdParameter.priorBoxHeight[i] = ssdParameter.srcImageHeight/ssdParameter.priorBoxStepHeight[i];
}
ssdParameter.offset = (float)rootNode["Offset"];
ssdParameter.priorBoxVar[0] = (int)(0.1f*SSD_QUANT_BASE);
ssdParameter.priorBoxVar[1] = (int)(0.1f*SSD_QUANT_BASE);
ssdParameter.priorBoxVar[2] = (int)(0.2f*SSD_QUANT_BASE);
ssdParameter.priorBoxVar[3] = (int)(0.2f*SSD_QUANT_BASE);
int classNumber = (int)rootNode["ClassNumber"];
ssdParameter.softMaxInHeight = classNumber;
ssdParameter.concatNum = ssdParameter.numberOfPriorBoxLayer;
ssdParameter.softMaxOutWidth = 1;
ssdParameter.softMaxOutHeight = classNumber;
int totalSizeOfClasReg=0;// 分类和回归一共需要的内存空间大小
for (int i = 0; i < ssdParameter.numberOfPriorBoxLayer; ++i)
{
int priorBoxNumber=0;
priorBoxNumber+=1;// aspect ratio=1
for (int j = 0; j < ssdParameter.inputAspectRatioNum[i]; j++)
{
++priorBoxNumber;
if (ssdParameter.flip[j]==1)
{
++priorBoxNumber;
}
}
priorBoxNumber = ssdParameter.minSizeNum[i] * priorBoxNumber + ssdParameter.maxSizeNum[i];
int totalPriorBoxNumber = priorBoxNumber*ssdParameter.priorBoxHeight[i] * ssdParameter.priorBoxWidth[i];
ssdParameter.softMaxInChn[i] = totalPriorBoxNumber * classNumber;
ssdParameter.softMaxOutChn += totalPriorBoxNumber;
ssdParameter.detectInputChn[i] = totalPriorBoxNumber * 4;
totalSizeOfClasReg+=(ssdParameter.softMaxInChn[i]+ssdParameter.detectInputChn[i]);
}
// DetectionOut
ssdParameter.classNum = classNumber;
ssdParameter.topK = (int)rootNode["TopK"];;
ssdParameter.keepTopK = (int)rootNode["KeepTopK"];
ssdParameter.NMSThresh = (int)((float)rootNode["NMSThreshold"]* SSD_QUANT_BASE);
ssdParameter.confThresh=(int)((float)rootNode["ConfidenceThreshold"]*SSD_QUANT_BASE);
for (int i = 0; i < ssdParameter.numberOfPriorBoxLayer ; i++)
{
int numberOfPriorBox=ssdParameter.detectInputChn[i]/(4*(ssdParameter.priorBoxHeight[i] * ssdParameter.priorBoxWidth[i]));
ssdParameter.convHeight[2*i]=ssdParameter.priorBoxHeight[i];
ssdParameter.convWidth[2*i]=ssdParameter.priorBoxWidth[i];
ssdParameter.convChannel[2*i]=numberOfPriorBox*4;
ssdParameter.convHeight[2*i+1]=ssdParameter.priorBoxHeight[i];
ssdParameter.convWidth[2*i+1]=ssdParameter.priorBoxWidth[i];
ssdParameter.convChannel[2*i+1]=numberOfPriorBox*ssdParameter.classNum;
ssdParameter.convStride[i] = SSD_ALIGN16(ssdParameter.convChannel[2*i+1] * sizeof(int)) / sizeof(int);
}
// 计算softMaxOutputData内存空间大小
int softMaxSize=0;
for(int i = 0; i < ssdParameter.concatNum; i++)
{
softMaxSize += ssdParameter.softMaxInChn[i];
}
// 计算getResultBuffer内存空间大小
int priorNum = 0;
int detectionSize = 0;
for(int i = 0; i < ssdParameter.concatNum; i++)
{
priorNum+=ssdParameter.detectInputChn[i]/SSD_COORDI_NUM;
}
detectionSize+=priorNum*SSD_COORDI_NUM;
detectionSize+=priorNum*SSD_PROPOSAL_WIDTH*2;
detectionSize+=priorNum*2;
// 计算dstRoi,classRoiNum,dstScore内存空间大小
int dstRoiSize = 0;
int dstScoreSize = 0;
int classRoiNumSize = 0;
dstRoiSize = SSD_ALIGN16(ssdParameter.classNum*ssdParameter.topK*SSD_COORDI_NUM);
dstScoreSize = SSD_ALIGN16(ssdParameter.classNum*ssdParameter.topK);
classRoiNumSize = SSD_ALIGN16(ssdParameter.classNum);
// 申请内存,并分配
int totalSize=totalSizeOfClasReg+SSD_COORDI_NUM*2*ssdParameter.softMaxOutChn+softMaxSize+detectionSize+dstRoiSize+classRoiNumSize+dstScoreSize;
ssdParameter.buffer=new int[totalSize];
int *data=ssdParameter.buffer;
memset(data,0,totalSize*sizeof(int));// 初始化0
int offset=0;
for (int i = 0; i < ssdParameter.numberOfPriorBoxLayer; ++i)
{
int *dataOfClasReg=data+offset;
ssdParameter.classification[i]=dataOfClasReg;
ssdParameter.regression[i]=dataOfClasReg+ssdParameter.softMaxInChn[i];
offset+=(ssdParameter.softMaxInChn[i]+ssdParameter.detectInputChn[i]);
}
ssdParameter.priorboxOutputData=data+totalSizeOfClasReg;
ssdParameter.softMaxOutputData=ssdParameter.priorboxOutputData+SSD_COORDI_NUM*2*ssdParameter.softMaxOutChn;
ssdParameter.getResultBuffer=ssdParameter.softMaxOutputData+softMaxSize;
ssdParameter.dstRoi=ssdParameter.getResultBuffer+detectionSize;
ssdParameter.classRoiNum=ssdParameter.dstRoi+dstRoiSize;
ssdParameter.dstScore=ssdParameter.classRoiNum+classRoiNumSize;
}
void DetectorSSD::GetResult(const vector<vector<float>> &classifications,const vector<vector<float>> &regressions,vector<ResultOfDetection> &resultsOfDetection)
{
int numberOfPriorBoxLayer=ssdParameter.numberOfPriorBoxLayer;
// 类型转换
for(int i = 0; i < numberOfPriorBoxLayer; i++)
{
// 分类
vector<float> classificationOfEachLayer=classifications[i];
for(int j=0;j<classificationOfEachLayer.size();++j)
{
(ssdParameter.classification[i])[j]=classificationOfEachLayer[j]*SSD_QUANT_BASE;
}
// 回归
vector<float> regressionOfEachLayer=regressions[i];
for(int j=0;j<regressionOfEachLayer.size();++j)
{
(ssdParameter.regression[i])[j]=regressionOfEachLayer[j]*SSD_QUANT_BASE;
}
}
int* priorboxOutputData[SSD_MAX_PRIORBOX_LAYER_NUM];
int* softMaxInputData[SSD_MAX_PRIORBOX_LAYER_NUM];
int* detectionLocData[SSD_MAX_PRIORBOX_LAYER_NUM];
int* softMaxOutputData = NULL;
int* detectionOutTmpBuf = NULL;
int softMaxWidth[SSD_MAX_PRIORBOX_LAYER_NUM];
int size = 0;
int i = 0;
/////////////////////////////////// PriorBoxLayer:生成所有priorbox ///////////////////////////////////
// 分配priorboxOutputData内存空间
priorboxOutputData[0] = ssdParameter.priorboxOutputData;
for (i = 1; i < numberOfPriorBoxLayer; i++)
{
size=ssdParameter.softMaxInChn[i-1]/ssdParameter.classNum*SSD_COORDI_NUM*2;
priorboxOutputData[i] = priorboxOutputData[i - 1] + size;
}
for (i = 0; i < numberOfPriorBoxLayer; i++)
{
PriorBoxLayer(i,priorboxOutputData[i]);
}
/////////////////////////////////// SoftmaxLayer:计算所有priorbox的置信度 ///////////////////////////////////
// 分配softMaxOutputData内存空间
softMaxOutputData =ssdParameter.softMaxOutputData;
for(i = 0; i < numberOfPriorBoxLayer; i++)
{
softMaxInputData[i] = ssdParameter.classification[i];
softMaxWidth[i] = ssdParameter.convChannel[i*2+1];
}
SoftmaxLayer(softMaxWidth,softMaxInputData, softMaxOutputData);
/////////////////////////////////// DetectionOutputLayer:对网络输出值解码并经过NMS得到最后的检测结果 ///////////////////////////////////
// 分配DetectionOut内存空间
detectionOutTmpBuf = ssdParameter.getResultBuffer;
for(i = 0; i < numberOfPriorBoxLayer; i++)
{
detectionLocData[i] = ssdParameter.regression[i];
}
DetectionOutputLayer(detectionLocData, priorboxOutputData, softMaxOutputData,detectionOutTmpBuf);
// 获取最后的检测结果
CreateDetectionResults(resultsOfDetection);
}
void DetectorSSD::PriorBoxLayer(int indexOfLayer,int* priorboxOutputData)
{
// 参数赋值
int priorBoxWidth=ssdParameter.priorBoxWidth[indexOfLayer];
int priorBoxHeight=ssdParameter.priorBoxHeight[indexOfLayer];
int srcImageWidth=ssdParameter.srcImageWidth;
int srcImageHeight=ssdParameter.srcImageHeight;
vector<float> priorBoxMinSize=ssdParameter.priorBoxMinSize[indexOfLayer];
int minSizeNum=ssdParameter.minSizeNum[indexOfLayer];
vector<float> priorBoxMaxSize=ssdParameter.priorBoxMaxSize[indexOfLayer];
int maxSizeNum=ssdParameter.maxSizeNum[indexOfLayer];
int flip=ssdParameter.flip[indexOfLayer];
int clip=ssdParameter.clip[indexOfLayer];
int inputAspectRatioNum=ssdParameter.inputAspectRatioNum[indexOfLayer];
vector<float> priorBoxAspectRatio=ssdParameter.priorBoxAspectRatio[indexOfLayer];
float priorBoxStepWidth=ssdParameter.priorBoxStepWidth[indexOfLayer];
float priorBoxStepHeight= ssdParameter.priorBoxStepHeight[indexOfLayer];
float offset=ssdParameter.offset;
int *priorBoxVar=ssdParameter.priorBoxVar;
int aspectRatioNum = 0;
int index = 0;
float aspectRatio[SSD_ASPECT_RATIO_NUM] = { 0 };
int numPrior = 0;
float centerX = 0;
float centerY = 0;
float boxHeight = 0;
float boxWidth = 0;
float maxBoxWidth = 0;
int i = 0;
int j = 0;
int n = 0;
int h = 0;
int w = 0;
aspectRatioNum = 0;
aspectRatio[0] = 1;
aspectRatioNum++;
for (i = 0; i < inputAspectRatioNum; i++)
{
aspectRatio[aspectRatioNum++] = priorBoxAspectRatio[i];
if (flip)
{
aspectRatio[aspectRatioNum++] = 1.0f / priorBoxAspectRatio[i];
}
}
numPrior = minSizeNum * aspectRatioNum + maxSizeNum;
index = 0;
for (h = 0; h < priorBoxHeight; h++)
{
for (w = 0; w < priorBoxWidth; w++)
{
centerX = (w + offset) * priorBoxStepWidth;
centerY = (h + offset) * priorBoxStepHeight;
for (n = 0; n < minSizeNum; n++)
{
// 首先产生宽高比为1的priorbox
boxHeight = priorBoxMinSize[n];
boxWidth = priorBoxMinSize[n];
priorboxOutputData[index++] = (int)(centerX - boxWidth * SSD_HALF);
priorboxOutputData[index++] = (int)(centerY - boxHeight * SSD_HALF);
priorboxOutputData[index++] = (int)(centerX + boxWidth * SSD_HALF);
priorboxOutputData[index++] = (int)(centerY + boxHeight * SSD_HALF);
// 对于max_size,生成宽高比为1的priorbox,宽高为sqrt(min_size * max_size)
if(maxSizeNum>0)
{
maxBoxWidth = sqrt(priorBoxMinSize[n] * priorBoxMaxSize[n]);
boxHeight = maxBoxWidth;
boxWidth = maxBoxWidth;
priorboxOutputData[index++] = (int)(centerX - boxWidth * SSD_HALF);
priorboxOutputData[index++] = (int)(centerY - boxHeight * SSD_HALF);
priorboxOutputData[index++] = (int)(centerX + boxWidth * SSD_HALF);
priorboxOutputData[index++] = (int)(centerY + boxHeight * SSD_HALF);
}
// 剩下的priorbox
for (i = 1; i < aspectRatioNum; i++)
{
boxWidth = (float)(priorBoxMinSize[n] * sqrt( aspectRatio[i] ));
boxHeight = (float)(priorBoxMinSize[n]/sqrt( aspectRatio[i] ));
priorboxOutputData[index++] = (int)(centerX - boxWidth * SSD_HALF);
priorboxOutputData[index++] = (int)(centerY - boxHeight * SSD_HALF);
priorboxOutputData[index++] = (int)(centerX + boxWidth * SSD_HALF);
priorboxOutputData[index++] = (int)(centerY + boxHeight * SSD_HALF);
}
}
}
}
// 越界处理 [0, srcImageWidth] & [0, srcImageHeight]
if (clip)
{
for (i = 0; i < (int)(priorBoxWidth * priorBoxHeight * SSD_COORDI_NUM*numPrior / 2); i++)
{
priorboxOutputData[2 * i] = SSD_MIN((int)SSD_MAX(priorboxOutputData[2 * i], 0), srcImageWidth);
priorboxOutputData[2 * i + 1] = SSD_MIN((int)SSD_MAX(priorboxOutputData[2 * i + 1], 0), srcImageHeight);
}
}
// var
for (h = 0; h < priorBoxHeight; h++)
{
for (w = 0; w < priorBoxWidth; w++)
{
for (i = 0; i < numPrior; i++)
{
for (j = 0; j < SSD_COORDI_NUM; j++)
{
priorboxOutputData[index++] = (int)priorBoxVar[j];
}
}
}
}
}
void DetectorSSD::SoftmaxLayer(int softMaxWidth[],int* softMaxInputData[], int* softMaxOutputData)
{
// 参数赋值
int softMaxInHeight=ssdParameter.softMaxInHeight;
int *softMaxInChn=ssdParameter.softMaxInChn;
int concatNum=ssdParameter.concatNum;
int *convStride=ssdParameter.convStride;
int* inputData = NULL;
int* outputTmp = NULL;
int outerNum = 0;
int innerNum = 0;
int inputChannel = 0;
int i = 0;
int concatCnt = 0;
int stride = 0;
int skip = 0;
int left = 0;
outputTmp = softMaxOutputData;
for (concatCnt = 0; concatCnt < concatNum; concatCnt++)
{
inputData = softMaxInputData[concatCnt];
stride = convStride[concatCnt];
inputChannel = softMaxInChn[concatCnt];
outerNum = inputChannel / softMaxInHeight;
innerNum = softMaxInHeight;
skip = softMaxWidth[concatCnt] / innerNum;
left = stride - softMaxWidth[concatCnt];
for (i = 0; i < outerNum; i++)
{
ComputeSoftMax(inputData, (int)innerNum,outputTmp);
inputData += innerNum;
outputTmp += innerNum;
}
}
}
void DetectorSSD::ComputeSoftMax(int* src, int size, int* dst)
{
int max = 0;
int sum = 0;
int i = 0;
for (i = 0; i < size; ++i)
{
if (max < src[i])
{
max = src[i];
}
}
for (i = 0; i < size; ++i)
{
dst[i] = (int)(SSD_QUANT_BASE* exp((float)(src[i] - max) / SSD_QUANT_BASE));
sum += dst[i];
}
for (i = 0; i < size; ++i)
{
dst[i] = (int)(((float)dst[i] / (float)sum) * SSD_QUANT_BASE);
}
}
void DetectorSSD::DetectionOutputLayer(int* allLocPreds[], int* allPriorBoxes[],int* confScores, int* assistMemPool)
{
// 参数赋值
int concatNum=ssdParameter.concatNum;
int confThresh=ssdParameter.confThresh;
int classNum=ssdParameter.classNum;
int topK=ssdParameter.topK;
int keepTopK=ssdParameter.keepTopK;
int NMSThresh=ssdParameter.NMSThresh;
int *detectInputChn=ssdParameter.detectInputChn;
int* dstScoreSrc=ssdParameter.dstScore;
int* dstBboxSrc=ssdParameter.dstRoi;
int* roiOutCntSrc=ssdParameter.classRoiNum;
int* locPreds = NULL;
int* priorBoxes = NULL;
int* priorVar = NULL;
int* allDecodeBoxes = NULL;
int* dstScore = NULL;
int* dstBbox = NULL;
int* classRoiNum = NULL;
int roiOutCnt = 0;
int* singleProposal = NULL;
int* afterTopK = NULL;
QuickSortStack* stack = NULL;
int priorNum = 0;
int numPredsPerClass = 0;
float priorWidth = 0;
float priorHeight = 0;
float priorCenterX = 0;
float priorCenterY = 0;
float decodeBoxCenterX = 0;
float decodeBoxCenterY = 0;
float decodeBoxWidth = 0;
float decodeBoxHeight = 0;
int srcIdx = 0;
int afterFilter = 0;
int afterTopK2 = 0;
int keepCnt = 0;
int i = 0;
int j = 0;
int offset = 0;
priorNum = 0;
for (i = 0; i < concatNum; i++)
{
priorNum += detectInputChn[i] / SSD_COORDI_NUM;
}
// 缓存
allDecodeBoxes = assistMemPool;
singleProposal = allDecodeBoxes + priorNum * SSD_COORDI_NUM;
afterTopK = singleProposal + SSD_PROPOSAL_WIDTH * priorNum;
stack = (QuickSortStack*)(afterTopK + priorNum * SSD_PROPOSAL_WIDTH);
srcIdx = 0;
for (i = 0; i < concatNum; i++)
{
// 回归预测值
locPreds = allLocPreds[i];
numPredsPerClass = detectInputChn[i] / SSD_COORDI_NUM;
// 获取priorbox
priorBoxes = allPriorBoxes[i];
priorVar = priorBoxes + numPredsPerClass*SSD_COORDI_NUM;
for (j = 0; j < numPredsPerClass; j++)
{
priorWidth = (float)(priorBoxes[j*SSD_COORDI_NUM+2] - priorBoxes[j*SSD_COORDI_NUM]);
priorHeight = (float)(priorBoxes[j*SSD_COORDI_NUM+3] - priorBoxes[j*SSD_COORDI_NUM + 1]);
priorCenterX = (priorBoxes[j*SSD_COORDI_NUM+2] + priorBoxes[j*SSD_COORDI_NUM])*SSD_HALF;
priorCenterY = (priorBoxes[j*SSD_COORDI_NUM+3] + priorBoxes[j*SSD_COORDI_NUM+1])*SSD_HALF;
decodeBoxCenterX = ((float)priorVar[j*SSD_COORDI_NUM]/SSD_QUANT_BASE)*
((float)locPreds[j*SSD_COORDI_NUM]/SSD_QUANT_BASE)*priorWidth+priorCenterX;
decodeBoxCenterY = ((float)priorVar[j*SSD_COORDI_NUM+1]/SSD_QUANT_BASE)*
((float)locPreds[j*SSD_COORDI_NUM+1]/SSD_QUANT_BASE)*priorHeight+priorCenterY;
decodeBoxWidth = exp(((float)priorVar[j*SSD_COORDI_NUM+2]/SSD_QUANT_BASE)*
((float)locPreds[j*SSD_COORDI_NUM+2]/SSD_QUANT_BASE))*priorWidth;
decodeBoxHeight = exp(((float)priorVar[j*SSD_COORDI_NUM+3]/SSD_QUANT_BASE)*
((float)locPreds[j*SSD_COORDI_NUM+3]/SSD_QUANT_BASE))*priorHeight;
allDecodeBoxes[srcIdx++] = (int)(decodeBoxCenterX - decodeBoxWidth * SSD_HALF);
allDecodeBoxes[srcIdx++] = (int)(decodeBoxCenterY - decodeBoxHeight * SSD_HALF);
allDecodeBoxes[srcIdx++] = (int)(decodeBoxCenterX + decodeBoxWidth * SSD_HALF);
allDecodeBoxes[srcIdx++] = (int)(decodeBoxCenterY + decodeBoxHeight * SSD_HALF);
}
}
// 对每一类做NMS
afterTopK2 = 0;
for (i = 0; i < classNum; i++)
{
if(i==0)
continue;
for (j = 0; j < priorNum; j++)
{
singleProposal[j * SSD_PROPOSAL_WIDTH] = allDecodeBoxes[j * SSD_COORDI_NUM];
singleProposal[j * SSD_PROPOSAL_WIDTH + 1] = allDecodeBoxes[j * SSD_COORDI_NUM + 1];
singleProposal[j * SSD_PROPOSAL_WIDTH + 2] = allDecodeBoxes[j * SSD_COORDI_NUM + 2];
singleProposal[j * SSD_PROPOSAL_WIDTH + 3] = allDecodeBoxes[j * SSD_COORDI_NUM + 3];
singleProposal[j * SSD_PROPOSAL_WIDTH + 4] = confScores[j*classNum + i];
singleProposal[j * SSD_PROPOSAL_WIDTH + 5] = 0;
}
QuickSort(singleProposal, 0, priorNum - 1, stack,topK);
afterFilter = (priorNum < topK) ? priorNum : topK;
NonMaxSuppression(singleProposal, afterFilter, NMSThresh, afterFilter);
roiOutCnt = 0;
dstScore = (int*)dstScoreSrc;
dstBbox = (int*)dstBboxSrc;
classRoiNum = (int*)roiOutCntSrc;
dstScore += (int)afterTopK2;
dstBbox += (int)(afterTopK2 * SSD_COORDI_NUM);
for (j = 0; j < topK; j++)
{
if (singleProposal[j * SSD_PROPOSAL_WIDTH + 5] == 0 &&
singleProposal[j * SSD_PROPOSAL_WIDTH + 4] > (int)confThresh)
{
dstScore[roiOutCnt] = singleProposal[j * 6 + 4];
dstBbox[roiOutCnt * SSD_COORDI_NUM] = singleProposal[j * SSD_PROPOSAL_WIDTH];
dstBbox[roiOutCnt * SSD_COORDI_NUM + 1] = singleProposal[j * SSD_PROPOSAL_WIDTH + 1];
dstBbox[roiOutCnt * SSD_COORDI_NUM + 2] = singleProposal[j * SSD_PROPOSAL_WIDTH + 2];
dstBbox[roiOutCnt * SSD_COORDI_NUM + 3] = singleProposal[j * SSD_PROPOSAL_WIDTH + 3];
roiOutCnt++;
}
}
classRoiNum[i] = (int)roiOutCnt;
afterTopK2 += roiOutCnt;
}
keepCnt = 0;
offset = 0;
if (afterTopK2 > keepTopK)
{
offset = classRoiNum[0];
for (i = 1; i < classNum; i++)
{
dstScore = (int*)dstScoreSrc;
dstBbox = (int*)dstBboxSrc;
classRoiNum = (int*)roiOutCntSrc;
dstScore += (int)(offset);
dstBbox += (int)(offset * SSD_COORDI_NUM);
for (j = 0; j < (int)classRoiNum[i]; j++)
{
afterTopK[keepCnt * SSD_PROPOSAL_WIDTH] = dstBbox[j * SSD_COORDI_NUM];
afterTopK[keepCnt * SSD_PROPOSAL_WIDTH + 1] = dstBbox[j * SSD_COORDI_NUM + 1];
afterTopK[keepCnt * SSD_PROPOSAL_WIDTH + 2] = dstBbox[j * SSD_COORDI_NUM + 2];
afterTopK[keepCnt * SSD_PROPOSAL_WIDTH + 3] = dstBbox[j * SSD_COORDI_NUM + 3];
afterTopK[keepCnt * SSD_PROPOSAL_WIDTH + 4] = dstScore[j];
afterTopK[keepCnt * SSD_PROPOSAL_WIDTH + 5] = i;
keepCnt++;
}
offset = offset + classRoiNum[i];
}
QuickSort(afterTopK, 0, keepCnt - 1, stack,keepCnt);
offset = 0;
offset = classRoiNum[0];
for (i = 1; i < classNum; i++)
{
roiOutCnt = 0;
dstScore = (int*)dstScoreSrc;
dstBbox = (int*)dstBboxSrc;
classRoiNum = (int*)roiOutCntSrc;
dstScore += (int)(offset);
dstBbox += (int)(offset * SSD_COORDI_NUM);
for (j = 0; j < keepTopK; j++)
{
if (afterTopK[j * SSD_PROPOSAL_WIDTH + 5] == i)
{
dstScore[roiOutCnt] = afterTopK[j * SSD_PROPOSAL_WIDTH + 4];
dstBbox[roiOutCnt * SSD_COORDI_NUM] = afterTopK[j * SSD_PROPOSAL_WIDTH];
dstBbox[roiOutCnt * SSD_COORDI_NUM + 1] = afterTopK[j * SSD_PROPOSAL_WIDTH + 1];
dstBbox[roiOutCnt * SSD_COORDI_NUM + 2] = afterTopK[j * SSD_PROPOSAL_WIDTH + 2];
dstBbox[roiOutCnt * SSD_COORDI_NUM + 3] = afterTopK[j * SSD_PROPOSAL_WIDTH + 3];
roiOutCnt++;
}
}
classRoiNum[i] = (int)roiOutCnt;
offset += roiOutCnt;
}
}
}
vector<float> DetectorSSD::PermuteLayer(const vector<float> &data,int width,int height,int channels)
{
vector<float> result(data.size());
int index=0;
int channelStep=width*height;
for(int h=0; h<height;h++)
{
for(int w=0;w<width;w++)
{
for(int c = 0;c < channels;c++)
{
result[index++] = data[c*channelStep + h*width + w];
}
}
}
return result;
}
void DetectorSSD::QuickSort(int* src,int low, int high, QuickSortStack *stack,int maxNum)
{
int i = low;
int j = high;
int top = 0;
int keyConfidence = src[SSD_PROPOSAL_WIDTH * low + 4];
stack[top].min = low;
stack[top].max = high;
while(top > -1)
{
low = stack[top].min;
high = stack[top].max;
i = low;
j = high;
top--;
keyConfidence = src[SSD_PROPOSAL_WIDTH * low + 4];
while(i < j)
{
while((i < j) && (keyConfidence > src[j * SSD_PROPOSAL_WIDTH + 4]))
{
j--;
}
if(i < j)
{
Swap(&src[i*SSD_PROPOSAL_WIDTH], &src[j*SSD_PROPOSAL_WIDTH]);
i++;
}
while((i < j) && (keyConfidence < src[i*SSD_PROPOSAL_WIDTH + 4]))
{
i++;
}
if(i < j)
{
Swap(&src[i*SSD_PROPOSAL_WIDTH], &src[j*SSD_PROPOSAL_WIDTH]);
j--;
}
}
if(low <= maxNum)
{
if(low < i-1)
{
top++;
stack[top].min = low;
stack[top].max = i-1;
}
if(high > i+1)
{
top++;
stack[top].min = i+1;
stack[top].max = high;
}
}
}
}
void DetectorSSD::NonMaxSuppression( int* proposals, int anchorsNum,int NMSThresh,int maxRoiNum)
{
int xMin1 = 0;
int yMin1 = 0;
int xMax1 = 0;
int yMax1 = 0;
int xMin2 = 0;
int yMin2 = 0;
int xMax2 = 0;
int yMax2 = 0;
int areaTotal = 0;
int areaInter = 0;
int i = 0;
int j = 0;
int num = 0;
int NoOverlap = 1;
for (i = 0; i < anchorsNum && num < maxRoiNum; i++)
{
if( proposals[SSD_PROPOSAL_WIDTH*i+5] == 0 )
{
num++;
xMin1 = proposals[SSD_PROPOSAL_WIDTH*i];
yMin1 = proposals[SSD_PROPOSAL_WIDTH*i+1];
xMax1 = proposals[SSD_PROPOSAL_WIDTH*i+2];
yMax1 = proposals[SSD_PROPOSAL_WIDTH*i+3];
for(j= i+1;j< anchorsNum; j++)
{
if( proposals[SSD_PROPOSAL_WIDTH*j+5] == 0 )
{
xMin2 = proposals[SSD_PROPOSAL_WIDTH*j];
yMin2 = proposals[SSD_PROPOSAL_WIDTH*j+1];
xMax2 = proposals[SSD_PROPOSAL_WIDTH*j+2];
yMax2 = proposals[SSD_PROPOSAL_WIDTH*j+3];
NoOverlap = (xMin2>xMax1)||(xMax2<xMin1)||(yMin2>yMax1)||(yMax2<yMin1);
if(NoOverlap)
{
continue;
}
ComputeOverlap(xMin1, yMin1, xMax1, yMax1, xMin2, yMin2, xMax2, yMax2, &areaTotal, &areaInter);
if(areaInter*SSD_QUANT_BASE > ((int)NMSThresh*areaTotal))
{
if( proposals[SSD_PROPOSAL_WIDTH*i+4] >= proposals[SSD_PROPOSAL_WIDTH*j+4] )
{
proposals[SSD_PROPOSAL_WIDTH*j+5] = 1;
}
else
{
proposals[SSD_PROPOSAL_WIDTH*i+5] = 1;
}
}
}
}
}
}
}
void DetectorSSD::ComputeOverlap(int xMin1, int yMin1, int xMax1, int yMax1, int xMin2,
int yMin2, int xMax2, int yMax2, int* areaSum, int* areaInter)
{
int inter = 0;
int s32Total = 0;
int xMin = 0;
int yMin = 0;
int xMax = 0;
int yMax = 0;
int area1 = 0;
int area2 = 0;
int interWidth = 0;
int interHeight = 0;
xMin = SSD_MAX(xMin1, xMin2);
yMin = SSD_MAX(yMin1, yMin2);
xMax = SSD_MIN(xMax1, xMax2);
yMax = SSD_MIN(yMax1, yMax2);
interWidth = xMax - xMin + 1;
interHeight = yMax - yMin + 1;
interWidth = ( interWidth >= 0 ) ? interWidth : 0;
interHeight = ( interHeight >= 0 ) ? interHeight : 0;
inter = interWidth * interHeight;
area1 = (xMax1 - xMin1 + 1) * (yMax1 - yMin1 + 1);
area2 = (xMax2 - xMin2 + 1) * (yMax2 - yMin2 + 1);
s32Total = area1 + area2 - inter;
*areaSum = s32Total;
*areaInter = inter;
}
void DetectorSSD::Swap(int* src1, int* src2)
{
int i = 0;
int temp = 0;
for( i = 0; i < SSD_PROPOSAL_WIDTH; i++ )
{
temp = src1[i];
src1[i] = src2[i];
src2[i] = temp;
}
}
void DetectorSSD::CreateDetectionResults(std::vector<ResultOfDetection> &resultsOfDetection)
{
// 参数赋值
int* score=ssdParameter.dstScore;
int* roi=ssdParameter.dstRoi;
int* classRoiNum=ssdParameter.classRoiNum;
float printResultThresh=((float)ssdParameter.confThresh)/SSD_QUANT_BASE;
int classNum=ssdParameter.classNum;
int i = 0, j = 0;
int roiNumBias = 0;
int scoreBias = 0;
int bboxBias = 0;
float score2 = 0.0f;
int xMin = 0,yMin= 0,xMax = 0,yMax = 0;
roiNumBias += classRoiNum[0];
for (i = 1; i < classNum; i++)
{
scoreBias = roiNumBias;
bboxBias = roiNumBias * SSD_COORDI_NUM;
if((float)score[scoreBias] / SSD_QUANT_BASE >=
printResultThresh && classRoiNum[i]!=0)
{
//printf("==== The %d th class box info====\n", i);
}
for (j = 0; j < (int)classRoiNum[i]; j++)
{
score2 = (float)score[scoreBias + j] / SSD_QUANT_BASE;
if (score2 < printResultThresh)
{
break;
}
xMin = roi[bboxBias + j*SSD_COORDI_NUM];
yMin = roi[bboxBias + j*SSD_COORDI_NUM + 1];
xMax = roi[bboxBias + j*SSD_COORDI_NUM + 2];
yMax = roi[bboxBias + j*SSD_COORDI_NUM + 3];
ResultOfDetection result;
result.boundingBox.x=xMin;
result.boundingBox.y=yMin;
result.boundingBox.width=xMax-xMin+1;
result.boundingBox.height=yMax-yMin+1;
result.classID=i;
result.confidence=score2;
resultsOfDetection.push_back(result);
}
roiNumBias += classRoiNum[i];
}
}
}
#include <DetectorYOLOV3.h>
#include <migraphx/onnx.hpp>
#include <migraphx/gpu/target.hpp>
#include <migraphx/gpu/hip.hpp>
#include <migraphx/generate.hpp>
#include <migraphx/quantization.hpp>
#include <opencv2/dnn.hpp>
#include <CommonUtility.h>
#include <Filesystem.h>
#include <SimpleLog.h>
#include <hip/hip_runtime.h>
#include "hip/hip_runtime.h"
using namespace cv::dnn;
namespace migraphxSamples
{
DetectorYOLOV3::DetectorYOLOV3():logFile(NULL)
{
}
DetectorYOLOV3::~DetectorYOLOV3()
{
configurationFile.release();
}
ErrorCode DetectorYOLOV3::Initialize(InitializationParameterOfDetector initializationParameterOfDetector)
{
// 初始化(获取日志文件,加载配置文件等)
ErrorCode errorCode=DoCommonInitialization(initializationParameterOfDetector);
if(errorCode!=SUCCESS)
{
LOG_ERROR(logFile,"fail to DoCommonInitialization\n");
return errorCode;
}
LOG_INFO(logFile,"succeed to DoCommonInitialization\n");
// 获取配置文件参数
FileNode netNode = configurationFile["DetectorYOLOV3"];
string modelPath=initializationParameter.parentPath+(string)netNode["ModelPath"];
string pathOfClassNameFile=(string)netNode["ClassNameFile"];
yolov3Parameter.confidenceThreshold = (float)netNode["ConfidenceThreshold"];
yolov3Parameter.nmsThreshold = (float)netNode["NMSThreshold"];
yolov3Parameter.objectThreshold = (float)netNode["ObjectThreshold"];
yolov3Parameter.numberOfClasses=(int)netNode["NumberOfClasses"];
useFP16=(bool)(int)netNode["UseFP16"];
// 加载模型
if(Exists(modelPath)==false)
{
LOG_ERROR(logFile,"%s not exist!\n",modelPath.c_str());
return MODEL_NOT_EXIST;
}
net = migraphx::parse_onnx(modelPath);
LOG_INFO(logFile,"succeed to load model: %s\n",GetFileName(modelPath).c_str());
// 获取模型输入属性
std::pair<std::string, migraphx::shape> inputAttribute=*(net.get_parameter_shapes().begin());
inputName=inputAttribute.first;
inputShape=inputAttribute.second;
inputSize=cv::Size(inputShape.lens()[3],inputShape.lens()[2]);// NCHW
// 设置模型为GPU模式
migraphx::target gpuTarget = migraphx::gpu::target{};
// 量化
if(useFP16)
{
migraphx::quantize_fp16(net);
}
// 编译模型
migraphx::compile_options options;
options.device_id=0; // 设置GPU设备,默认为0号设备(>=1.2版本中支持)
#ifdef DMA
options.offload_copy=false;
#else
options.offload_copy=true; // 设置offload_copy
#endif
net.compile(gpuTarget,options);
LOG_INFO(logFile,"succeed to compile model: %s\n",GetFileName(modelPath).c_str());
// Run once by itself
#ifdef DMA
ParameterMap=CreateParameterMap(net);
net.eval(ParameterMap);
#else
migraphx::parameter_map inputData;
inputData[inputName]=migraphx::generate_argument(inputShape);
net.eval(inputData);
#endif
// 读取类别名
if(!pathOfClassNameFile.empty())
{
ifstream classNameFile(pathOfClassNameFile);
string line;
while (getline(classNameFile, line))
{
classNames.push_back(line);
}
}
else
{
classNames.resize(yolov3Parameter.numberOfClasses);
}
#ifdef DMA
hipMalloc((void**)&preprocess_Image, inputSize.height * inputSize.width * 3 * sizeof(float));
#endif
// log
LOG_INFO(logFile,"InputSize:%dx%d\n",inputSize.width,inputSize.height);
LOG_INFO(logFile,"InputName:%s\n",inputName.c_str());
LOG_INFO(logFile,"ConfidenceThreshold:%f\n",yolov3Parameter.confidenceThreshold);
LOG_INFO(logFile,"NMSThreshold:%f\n",yolov3Parameter.nmsThreshold);
LOG_INFO(logFile,"objectThreshold:%f\n",yolov3Parameter.objectThreshold);
LOG_INFO(logFile,"NumberOfClasses:%d\n",yolov3Parameter.numberOfClasses);
return SUCCESS;
}
#ifdef DMA
__global__ void convert_bgrp_to_rgb_and_normalization_yolov3(unsigned char* srcImage, float* outImage, int width, int height)
{
int x = blockIdx.x * blockDim.x + threadIdx.x;
if (x < width * height)
{
unsigned char r = srcImage[x + width * height * 2];
unsigned char g = srcImage[x + width * height * 1];
unsigned char b = srcImage[x + width * height * 0];
float sum = 255.0;
outImage[x + width * height * 0] = r / sum;
outImage[x + width * height * 1] = g / sum;
outImage[x + width * height * 2] = b / sum;
}
}
__global__ void convert_yuv420p_to_rgb_and_normalization_yolov3(unsigned char* srcImage, float* outImage, int width, int height)
{
int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x >= width || y >= height)
return;
int index = y * width + x;
int yIndex = index;
int uIndex = (y / 2) * (width / 2) + (x / 2) + width * height;
int vIndex = (y / 2) * (width / 2) + (x / 2) + width * height * 5 / 4;
unsigned char yValue = srcImage[yIndex];
unsigned char uValue = srcImage[uIndex];
unsigned char vValue = srcImage[vIndex];
int r = yValue + 1.370705 * (vValue - 128);
int g = yValue - 0.698001 * (vValue - 128) - 0.337633 * (uValue - 128);
int b = yValue + 1.732446 * (uValue - 128);
outImage[index + 0] = (float)r;
outImage[index + 1] = (float)g;
outImage[index + 2] = (float)b;
}
__global__ void convert_rgba_to_rgb_and_normalization_yolov3(unsigned char* srcImage, float* outImage, int width, int height)
{
int x = blockIdx.x * blockDim.x + threadIdx.x;
if (x < width * height)
{
unsigned char r = srcImage[x * 4 + 0];
unsigned char g = srcImage[x * 4 + 1];
unsigned char b = srcImage[x * 4 + 2];
float sum = 255.0;
outImage[x + width * height * 0] = r / sum;
outImage[x + width * height * 1] = g / sum;
outImage[x + width * height * 2] = b / sum;
}
}
#endif
ErrorCode DetectorYOLOV3::Detect(const cv::Mat &srcImage, std::vector<ResultOfDetection> &resultsOfDetection, double* dprep_time, double* deval_time, double* dpostp_time)
{
if(srcImage.empty()||srcImage.type()!=CV_8UC3)
{
LOG_ERROR(logFile, "image error!\n");
return IMAGE_ERROR;
}
// 预处理并转换为NCHW
double prep_time0 = getTickCount();
cv::Mat inputBlob;
blobFromImage(srcImage,
inputBlob,
1 / 255.0,
inputSize,
Scalar(0, 0, 0),
true,
false);
// 输入数据
migraphx::parameter_map inputData;
inputData[inputName]= migraphx::argument{inputShape, (float*)inputBlob.data};
double prep_time1 = getTickCount();
// 推理
std::vector<migraphx::argument> inferenceResults;
double eval_time0 = getTickCount();
inferenceResults = net.eval(inputData);
double eval_time1 = getTickCount();
// 获取推理结果
double postp_time0 = getTickCount();
std::vector<cv::Mat> outs;
migraphx::argument result = inferenceResults[0];
// 转换为cv::Mat
migraphx::shape outputShape = result.get_shape();
int shape[]={static_cast<int>(outputShape.lens()[0]),static_cast<int>(outputShape.lens()[1]),static_cast<int>(outputShape.lens()[2])};
cv::Mat out(3,shape,CV_32F,(unsigned char*)result.data());
outs.push_back(out);
//获取先验框的个数
int numProposal = outs[0].size[1];
int numOut = outs[0].size[2];
//变换输出的维度
outs[0] = outs[0].reshape(0, numProposal);
//生成先验框
std::vector<float> confidences;
std::vector<cv::Rect> boxes;
std::vector<int> classIds;
float ratioh = (float)srcImage.rows / inputSize.height, ratiow = (float)srcImage.cols / inputSize.width;
//计算cx,cy,w,h,box_sore,class_sore
int n = 0, rowInd = 0;
float* pdata = (float*)outs[0].data;
for (n = 0; n < numProposal; n++)
{
float boxScores = pdata[4];
if (boxScores > yolov3Parameter.objectThreshold)
{
cv::Mat scores = outs[0].row(rowInd).colRange(5, numOut);
cv::Point classIdPoint;
double maxClassScore;
cv::minMaxLoc(scores, 0, &maxClassScore, 0, &classIdPoint);
maxClassScore *= boxScores;
if (maxClassScore > yolov3Parameter.confidenceThreshold)
{
const int classIdx = classIdPoint.x;
float cx = pdata[0] * ratiow;
float cy = pdata[1] * ratioh;
float w = pdata[2] * ratiow;
float h = pdata[3] * ratioh;
int left = int(cx - 0.5 * w);
int top = int(cy - 0.5 * h);
confidences.push_back((float)maxClassScore);
boxes.push_back(cv::Rect(left, top, (int)(w), (int)(h)));
classIds.push_back(classIdx);
}
}
rowInd++;
pdata += numOut;
}
//执行non maximum suppression消除冗余重叠boxes
std::vector<int> indices;
dnn::NMSBoxes(boxes, confidences, yolov3Parameter.confidenceThreshold, yolov3Parameter.nmsThreshold, indices);
for (size_t i = 0; i < indices.size(); ++i)
{
int idx = indices[i];
int classID=classIds[idx];
string className=classNames[classID];
float confidence=confidences[idx];
cv::Rect box = boxes[idx];
ResultOfDetection result;
result.boundingBox=box;
result.confidence=confidence;// confidence
result.classID=classID; // label
result.className=className;
resultsOfDetection.push_back(result);
}
double postp_time1 = getTickCount();
*dprep_time += (prep_time1 - prep_time0) *1000 / getTickFrequency();
*deval_time += (eval_time1 - eval_time0) *1000 / getTickFrequency();
*dpostp_time += (postp_time1 - postp_time0) *1000 / getTickFrequency();
return SUCCESS;
}
#ifdef DMA
ErrorCode DetectorYOLOV3::Detect(DCU_Frame &srcImage, std::vector<ResultOfDetection> &resultsOfDetection)
{
int block_size = 256;
int num_blocks = (srcImage.width * srcImage.height + block_size - 1) / block_size;
if(srcImage.format == AV_PIX_FMT_BGRP)
{
convert_bgrp_to_rgb_and_normalization_yolov3<<<num_blocks, block_size>>>(srcImage.dcu_data, preprocess_Image, srcImage.width, srcImage.height);
}
if(srcImage.format == AV_PIX_FMT_YUV420P)
{
convert_yuv420p_to_rgb_and_normalization_yolov3<<<num_blocks, block_size>>>(srcImage.dcu_data, preprocess_Image, srcImage.width, srcImage.height);
}
if(srcImage.format == AV_PIX_FMT_RGBA)
{
convert_rgba_to_rgb_and_normalization_yolov3<<<num_blocks, block_size>>>(srcImage.dcu_data, preprocess_Image, srcImage.width, srcImage.height);
}
// 输入数据
ParameterMap[inputName] = migraphx::argument{inputShape, preprocess_Image};
// 推理
std::vector<migraphx::argument> inferenceResults = net.eval(ParameterMap);
// 获取推理结果
std::vector<cv::Mat> outs;
migraphx::argument result = inferenceResults[0];
// 转换为cv::Mat
migraphx::shape outputShape = result.get_shape();
int shape[]={static_cast<int>(outputShape.lens()[0]), static_cast<int>(outputShape.lens()[1]), static_cast<int>(outputShape.lens()[2])};
cv::Mat out(3, shape, CV_32F, (unsigned char*)result.data());
outs.push_back(out);
//获取先验框的个数
int numProposal = outs[0].size[1];
int numOut = outs[0].size[2];
//变换输出的维度
outs[0] = outs[0].reshape(0, numProposal);
//生成先验框
std::vector<float> confidences;
std::vector<cv::Rect> boxes;
std::vector<int> classIds;
float ratioh = (float)srcImage.height / inputSize.height, ratiow = (float)srcImage.width / inputSize.width;
int n = 0, rowInd = 0;
float* pdata = (float*)outs[0].data;
for (n = 0; n < numProposal; n++)
{
float boxScores = pdata[4];
if (boxScores > yolov3Parameter.objectThreshold)
{
cv::Mat scores = outs[0].row(rowInd).colRange(5, numOut);
cv::Point classIdPoint;
double maxClassScore;
cv::minMaxLoc(scores, 0, &maxClassScore, 0, &classIdPoint);
maxClassScore *= boxScores;
if (maxClassScore > yolov3Parameter.confidenceThreshold)
{
const int classIdx = classIdPoint.x;
float cx = pdata[0] * ratiow;
float cy = pdata[1] * ratioh;
float w = pdata[2] * ratiow;
float h = pdata[3] * ratioh;
int left = int(cx - 0.5 * w);
int top = int(cy - 0.5 * h);
confidences.push_back((float)maxClassScore);
boxes.push_back(cv::Rect(left, top, (int)(w), (int)(h)));
classIds.push_back(classIdx);
}
}
rowInd++;
pdata += numOut;
}
std::vector<int> indices;
dnn::NMSBoxes(boxes, confidences, yolov3Parameter.confidenceThreshold, yolov3Parameter.nmsThreshold, indices);
for (size_t i = 0; i < indices.size(); ++i)
{
int idx = indices[i];
int classID=classIds[idx];
string className=classNames[classID];
float confidence=confidences[idx];
cv::Rect box = boxes[idx];
ResultOfDetection result;
result.boundingBox=box;
result.confidence=confidence;
result.classID=classID;
result.className=className;
resultsOfDetection.push_back(result);
}
return SUCCESS;
}
#endif
ErrorCode DetectorYOLOV3::DoCommonInitialization(InitializationParameterOfDetector initializationParameterOfDetector)
{
initializationParameter=initializationParameterOfDetector;
// 获取日志文件
logFile=LogManager::GetInstance()->GetLogFile(initializationParameter.logName);
// 加载配置文件
std::string configFilePath=initializationParameter.configFilePath;
if(!Exists(configFilePath))
{
LOG_ERROR(logFile, "no configuration file!\n");
return CONFIG_FILE_NOT_EXIST;
}
if(!configurationFile.open(configFilePath, FileStorage::READ))
{
LOG_ERROR(logFile, "fail to open configuration file\n");
return FAIL_TO_OPEN_CONFIG_FILE;
}
LOG_INFO(logFile, "succeed to open configuration file\n");
// 修改父路径
std::string &parentPath = initializationParameter.parentPath;
if (!parentPath.empty())
{
if(!IsPathSeparator(parentPath[parentPath.size() - 1]))
{
parentPath+=PATH_SEPARATOR;
}
}
return SUCCESS;
}
}
#include <DetectorYOLOV5.h>
#include <migraphx/onnx.hpp>
#include <migraphx/gpu/target.hpp>
#include <migraphx/gpu/hip.hpp>
#include <migraphx/generate.hpp>
#include <migraphx/quantization.hpp>
#include <opencv2/dnn.hpp>
#include <CommonUtility.h>
#include <Filesystem.h>
#include <SimpleLog.h>
#include <hip/hip_runtime.h>
#include "hip/hip_runtime.h"
using namespace cv::dnn;
namespace migraphxSamples
{
DetectorYOLOV5::DetectorYOLOV5():logFile(NULL)
{
}
DetectorYOLOV5::~DetectorYOLOV5()
{
configurationFile.release();
}
ErrorCode DetectorYOLOV5::Initialize(InitializationParameterOfDetector initializationParameterOfDetector)
{
// 初始化(获取日志文件,加载配置文件等)
ErrorCode errorCode=DoCommonInitialization(initializationParameterOfDetector);
if(errorCode!=SUCCESS)
{
LOG_ERROR(logFile,"fail to DoCommonInitialization\n");
return errorCode;
}
LOG_INFO(logFile,"succeed to DoCommonInitialization\n");
// 获取配置文件参数
FileNode netNode = configurationFile["DetectorYOLOV5"];
string modelPath=initializationParameter.parentPath+(string)netNode["ModelPath"];
string pathOfClassNameFile=(string)netNode["ClassNameFile"];
yolov5Parameter.confidenceThreshold = (float)netNode["ConfidenceThreshold"];
yolov5Parameter.nmsThreshold = (float)netNode["NMSThreshold"];
yolov5Parameter.objectThreshold = (float)netNode["ObjectThreshold"];
yolov5Parameter.numberOfClasses=(int)netNode["NumberOfClasses"];
useFP16=(bool)(int)netNode["UseFP16"];
// 加载模型
if(Exists(modelPath)==false)
{
LOG_ERROR(logFile,"%s not exist!\n",modelPath.c_str());
return MODEL_NOT_EXIST;
}
net = migraphx::parse_onnx(modelPath);
LOG_INFO(logFile,"succeed to load model: %s\n",GetFileName(modelPath).c_str());
// 获取模型输入属性
std::pair<std::string, migraphx::shape> inputAttribute=*(net.get_parameter_shapes().begin());
inputName=inputAttribute.first;
inputShape=inputAttribute.second;
inputSize=cv::Size(inputShape.lens()[3],inputShape.lens()[2]);// NCHW
// 设置模型为GPU模式
migraphx::target gpuTarget = migraphx::gpu::target{};
// 量化
if(useFP16)
{
migraphx::quantize_fp16(net);
}
// 编译模型
migraphx::compile_options options;
options.device_id=0; // 设置GPU设备,默认为0号设备(>=1.2版本中支持)
#ifdef DMA
options.offload_copy=false;
#else
options.offload_copy=true; // 设置offload_copy
#endif
net.compile(gpuTarget,options);
LOG_INFO(logFile,"succeed to compile model: %s\n",GetFileName(modelPath).c_str());
// Run once by itself
#ifdef DMA
ParameterMap=CreateParameterMap(net);
net.eval(ParameterMap);
#else
migraphx::parameter_map inputData;
inputData[inputName]=migraphx::generate_argument(inputShape);
net.eval(inputData);
#endif
// 读取类别名
if(!pathOfClassNameFile.empty())
{
ifstream classNameFile(pathOfClassNameFile);
string line;
while (getline(classNameFile, line))
{
classNames.push_back(line);
}
}
else
{
classNames.resize(yolov5Parameter.numberOfClasses);
}
#ifdef DMA
hipMalloc((void**)&preprocess_Image, inputSize.height * inputSize.width * 3 * sizeof(float));
#endif
// log
LOG_INFO(logFile,"InputSize:%dx%d\n",inputSize.width,inputSize.height);
LOG_INFO(logFile,"InputName:%s\n",inputName.c_str());
LOG_INFO(logFile,"ConfidenceThreshold:%f\n",yolov5Parameter.confidenceThreshold);
LOG_INFO(logFile,"NMSThreshold:%f\n",yolov5Parameter.nmsThreshold);
LOG_INFO(logFile,"objectThreshold:%f\n",yolov5Parameter.objectThreshold);
LOG_INFO(logFile,"NumberOfClasses:%d\n",yolov5Parameter.numberOfClasses);
return SUCCESS;
}
#ifdef DMA
__global__ void convert_bgrp_to_rgb_and_normalization_yolov5(unsigned char* srcImage, float* outImage, int width, int height)
{
int x = blockIdx.x * blockDim.x + threadIdx.x;
if (x < width * height)
{
unsigned char r = srcImage[x + width * height * 2];
unsigned char g = srcImage[x + width * height * 1];
unsigned char b = srcImage[x + width * height * 0];
float sum = 255.0;
outImage[x + width * height * 0] = r / sum;
outImage[x + width * height * 1] = g / sum;
outImage[x + width * height * 2] = b / sum;
}
}
__global__ void convert_yuv420p_to_rgb_and_normalization_yolov5(unsigned char* srcImage, float* outImage, int width, int height)
{
int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x >= width || y >= height)
return;
int index = y * width + x;
int yIndex = index;
int uIndex = (y / 2) * (width / 2) + (x / 2) + width * height;
int vIndex = (y / 2) * (width / 2) + (x / 2) + width * height * 5 / 4;
unsigned char yValue = srcImage[yIndex];
unsigned char uValue = srcImage[uIndex];
unsigned char vValue = srcImage[vIndex];
int r = yValue + 1.370705 * (vValue - 128);
int g = yValue - 0.698001 * (vValue - 128) - 0.337633 * (uValue - 128);
int b = yValue + 1.732446 * (uValue - 128);
outImage[index + 0] = (float)r;
outImage[index + 1] = (float)g;
outImage[index + 2] = (float)b;
}
__global__ void convert_rgba_to_rgb_and_normalization_yolov5(unsigned char* srcImage, float* outImage, int width, int height)
{
int x = blockIdx.x * blockDim.x + threadIdx.x;
if (x < width * height)
{
unsigned char r = srcImage[x * 4 + 0];
unsigned char g = srcImage[x * 4 + 1];
unsigned char b = srcImage[x * 4 + 2];
float sum = 255.0;
outImage[x + width * height * 0] = r / sum;
outImage[x + width * height * 1] = g / sum;
outImage[x + width * height * 2] = b / sum;
}
}
#endif
ErrorCode DetectorYOLOV5::Detect(const cv::Mat &srcImage, std::vector<ResultOfDetection> &resultsOfDetection)
{
if(srcImage.empty()||srcImage.type()!=CV_8UC3)
{
LOG_ERROR(logFile, "image error!\n");
return IMAGE_ERROR;
}
// 预处理并转换为NCHW
cv::Mat inputBlob;
blobFromImage(srcImage,
inputBlob,
1 / 255.0,
inputSize,
Scalar(0, 0, 0),
true,
false);
// 输入数据
migraphx::parameter_map inputData;
inputData[inputName]= migraphx::argument{inputShape, (float*)inputBlob.data};
// 推理
std::vector<migraphx::argument> inferenceResults = net.eval(inputData);
// 获取推理结果
std::vector<cv::Mat> outs;
migraphx::argument result = inferenceResults[0];
// 转换为cv::Mat
migraphx::shape outputShape = result.get_shape();
int shape[]={static_cast<int>(outputShape.lens()[0]),static_cast<int>(outputShape.lens()[1]),static_cast<int>(outputShape.lens()[2])};
cv::Mat out(3,shape,CV_32F,(unsigned char*)result.data());
//memcpy(out.data,result.data(),sizeof(float)*outputShape.elements());
outs.push_back(out);
//获取先验框的个数
int numProposal = outs[0].size[1];
int numOut = outs[0].size[2];
//变换输出的维度
outs[0] = outs[0].reshape(0, numProposal);
//生成先验框
std::vector<float> confidences;
std::vector<cv::Rect> boxes;
std::vector<int> classIds;
float ratioh = (float)srcImage.rows / inputSize.height, ratiow = (float)srcImage.cols / inputSize.width;
//计算cx,cy,w,h,box_sore,class_sore
int n = 0, rowInd = 0;
float* pdata = (float*)outs[0].data;
for (n = 0; n < numProposal; n++)
{
float boxScores = pdata[4];
if (boxScores > yolov5Parameter.objectThreshold)
{
cv::Mat scores = outs[0].row(rowInd).colRange(5, numOut);
cv::Point classIdPoint;
double maxClassScore;
cv::minMaxLoc(scores, 0, &maxClassScore, 0, &classIdPoint);
maxClassScore *= boxScores;
if (maxClassScore > yolov5Parameter.confidenceThreshold)
{
const int classIdx = classIdPoint.x;
float cx = pdata[0] * ratiow;
float cy = pdata[1] * ratioh;
float w = pdata[2] * ratiow;
float h = pdata[3] * ratioh;
int left = int(cx - 0.5 * w);
int top = int(cy - 0.5 * h);
confidences.push_back((float)maxClassScore);
boxes.push_back(cv::Rect(left, top, (int)(w), (int)(h)));
classIds.push_back(classIdx);
}
}
rowInd++;
pdata += numOut;
}
//执行non maximum suppression消除冗余重叠boxes
std::vector<int> indices;
dnn::NMSBoxes(boxes, confidences, yolov5Parameter.confidenceThreshold, yolov5Parameter.nmsThreshold, indices);
for (size_t i = 0; i < indices.size(); ++i)
{
int idx = indices[i];
int classID=classIds[idx];
string className=classNames[classID];
float confidence=confidences[idx];
cv::Rect box = boxes[idx];
ResultOfDetection result;
result.boundingBox=box;
result.confidence=confidence;// confidence
result.classID=classID; // label
result.className=className;
resultsOfDetection.push_back(result);
}
return SUCCESS;
}
#ifdef DMA
ErrorCode DetectorYOLOV5::Detect(DCU_Frame &srcImage, std::vector<ResultOfDetection> &resultsOfDetection)
{
int block_size = 256;
int num_blocks = (srcImage.width * srcImage.height + block_size - 1) / block_size;
if(srcImage.format == AV_PIX_FMT_BGRP)
{
convert_bgrp_to_rgb_and_normalization_yolov5<<<num_blocks, block_size>>>(srcImage.dcu_data, preprocess_Image, srcImage.width, srcImage.height);
}
if(srcImage.format == AV_PIX_FMT_YUV420P)
{
convert_yuv420p_to_rgb_and_normalization_yolov5<<<num_blocks, block_size>>>(srcImage.dcu_data, preprocess_Image, srcImage.width, srcImage.height);
}
if(srcImage.format == AV_PIX_FMT_RGBA)
{
convert_rgba_to_rgb_and_normalization_yolov5<<<num_blocks, block_size>>>(srcImage.dcu_data, preprocess_Image, srcImage.width, srcImage.height);
}
// 输入数据
ParameterMap[inputName] = migraphx::argument{inputShape, preprocess_Image};
// 推理
std::vector<migraphx::argument> inferenceResults = net.eval(ParameterMap);
// 获取推理结果
std::vector<cv::Mat> outs;
migraphx::argument result = inferenceResults[0];
// 转换为cv::Mat
migraphx::shape outputShape = result.get_shape();
int shape[]={static_cast<int>(outputShape.lens()[0]), static_cast<int>(outputShape.lens()[1]), static_cast<int>(outputShape.lens()[2])};
cv::Mat out(3, shape, CV_32F, (unsigned char*)result.data());
outs.push_back(out);
//获取先验框的个数
int numProposal = outs[0].size[1];
int numOut = outs[0].size[2];
//变换输出的维度
outs[0] = outs[0].reshape(0, numProposal);
//生成先验框
std::vector<float> confidences;
std::vector<cv::Rect> boxes;
std::vector<int> classIds;
float ratioh = (float)srcImage.height / inputSize.height, ratiow = (float)srcImage.width / inputSize.width;
int n = 0, rowInd = 0;
float* pdata = (float*)outs[0].data;
for (n = 0; n < numProposal; n++)
{
float boxScores = pdata[4];
if (boxScores > yolov5Parameter.objectThreshold)
{
cv::Mat scores = outs[0].row(rowInd).colRange(5, numOut);
cv::Point classIdPoint;
double maxClassScore;
cv::minMaxLoc(scores, 0, &maxClassScore, 0, &classIdPoint);
maxClassScore *= boxScores;
if (maxClassScore > yolov5Parameter.confidenceThreshold)
{
const int classIdx = classIdPoint.x;
float cx = pdata[0] * ratiow;
float cy = pdata[1] * ratioh;
float w = pdata[2] * ratiow;
float h = pdata[3] * ratioh;
int left = int(cx - 0.5 * w);
int top = int(cy - 0.5 * h);
confidences.push_back((float)maxClassScore);
boxes.push_back(cv::Rect(left, top, (int)(w), (int)(h)));
classIds.push_back(classIdx);
}
}
rowInd++;
pdata += numOut;
}
std::vector<int> indices;
dnn::NMSBoxes(boxes, confidences, yolov5Parameter.confidenceThreshold, yolov5Parameter.nmsThreshold, indices);
for (size_t i = 0; i < indices.size(); ++i)
{
int idx = indices[i];
int classID=classIds[idx];
string className=classNames[classID];
float confidence=confidences[idx];
cv::Rect box = boxes[idx];
ResultOfDetection result;
result.boundingBox=box;
result.confidence=confidence;
result.classID=classID;
result.className=className;
resultsOfDetection.push_back(result);
}
return SUCCESS;
}
#endif
ErrorCode DetectorYOLOV5::DoCommonInitialization(InitializationParameterOfDetector initializationParameterOfDetector)
{
initializationParameter=initializationParameterOfDetector;
// 获取日志文件
logFile=LogManager::GetInstance()->GetLogFile(initializationParameter.logName);
// 加载配置文件
std::string configFilePath=initializationParameter.configFilePath;
if(!Exists(configFilePath))
{
LOG_ERROR(logFile, "no configuration file!\n");
return CONFIG_FILE_NOT_EXIST;
}
if(!configurationFile.open(configFilePath, FileStorage::READ))
{
LOG_ERROR(logFile, "fail to open configuration file\n");
return FAIL_TO_OPEN_CONFIG_FILE;
}
LOG_INFO(logFile, "succeed to open configuration file\n");
// 修改父路径
std::string &parentPath = initializationParameter.parentPath;
if (!parentPath.empty())
{
if(!IsPathSeparator(parentPath[parentPath.size() - 1]))
{
parentPath+=PATH_SEPARATOR;
}
}
return SUCCESS;
}
}
#include <DetectorYOLOV7.h>
#include <migraphx/onnx.hpp>
#include <migraphx/gpu/target.hpp>
#include <migraphx/gpu/hip.hpp>
#include <migraphx/generate.hpp>
#include <migraphx/quantization.hpp>
#include <opencv2/dnn.hpp>
#include <CommonUtility.h>
#include <Filesystem.h>
#include <SimpleLog.h>
#include <hip/hip_runtime.h>
#include "hip/hip_runtime.h"
using namespace cv::dnn;
namespace migraphxSamples
{
DetectorYOLOV7::DetectorYOLOV7():logFile(NULL)
{
}
DetectorYOLOV7::~DetectorYOLOV7()
{
configurationFile.release();
}
ErrorCode DetectorYOLOV7::Initialize(InitializationParameterOfDetector initializationParameterOfDetector)
{
// 初始化(获取日志文件,加载配置文件等)
ErrorCode errorCode=DoCommonInitialization(initializationParameterOfDetector);
if(errorCode!=SUCCESS)
{
LOG_ERROR(logFile,"fail to DoCommonInitialization\n");
return errorCode;
}
LOG_INFO(logFile,"succeed to DoCommonInitialization\n");
// 获取配置文件参数
FileNode netNode = configurationFile["DetectorYOLOV7"];
string modelPath=initializationParameter.parentPath+(string)netNode["ModelPath"];
string pathOfClassNameFile=(string)netNode["ClassNameFile"];
yolov7Parameter.confidenceThreshold = (float)netNode["ConfidenceThreshold"];
yolov7Parameter.nmsThreshold = (float)netNode["NMSThreshold"];
yolov7Parameter.objectThreshold = (float)netNode["ObjectThreshold"];
yolov7Parameter.numberOfClasses=(int)netNode["NumberOfClasses"];
useFP16=(bool)(int)netNode["UseFP16"];
// 加载模型
if(Exists(modelPath)==false)
{
LOG_ERROR(logFile,"%s not exist!\n",modelPath.c_str());
return MODEL_NOT_EXIST;
}
net = migraphx::parse_onnx(modelPath);
LOG_INFO(logFile,"succeed to load model: %s\n",GetFileName(modelPath).c_str());
// 获取模型输入属性
std::pair<std::string, migraphx::shape> inputAttribute=*(net.get_parameter_shapes().begin());
inputName=inputAttribute.first;
inputShape=inputAttribute.second;
inputSize=cv::Size(inputShape.lens()[3],inputShape.lens()[2]);// NCHW
// 设置模型为GPU模式
migraphx::target gpuTarget = migraphx::gpu::target{};
// 量化
if(useFP16)
{
migraphx::quantize_fp16(net);
}
// 编译模型
migraphx::compile_options options;
options.device_id=0; // 设置GPU设备,默认为0号设备(>=1.2版本中支持)
#ifdef DMA
options.offload_copy=false; // 设置offload_copy
#else
options.offload_copy=true; // 设置offload_copy
#endif
net.compile(gpuTarget,options);
LOG_INFO(logFile,"succeed to compile model: %s\n",GetFileName(modelPath).c_str());
// Run once by itself
#ifdef DMA
ParameterMap=CreateParameterMap(net);
net.eval(ParameterMap);
#else
migraphx::parameter_map inputData;
inputData[inputName]=migraphx::generate_argument(inputShape);
net.eval(inputData);
#endif
// 读取类别名
if(!pathOfClassNameFile.empty())
{
ifstream classNameFile(pathOfClassNameFile);
string line;
while (getline(classNameFile, line))
{
classNames.push_back(line);
}
}
else
{
classNames.resize(yolov7Parameter.numberOfClasses);
}
#ifdef DMA
hipMalloc((void**)&preprocess_Image, inputSize.height * inputSize.width * 3 * sizeof(float));
#endif
// log
LOG_INFO(logFile,"InputSize:%dx%d\n",inputSize.width,inputSize.height);
LOG_INFO(logFile,"InputName:%s\n",inputName.c_str());
LOG_INFO(logFile,"ConfidenceThreshold:%f\n",yolov7Parameter.confidenceThreshold);
LOG_INFO(logFile,"objectThreshold:%f\n",yolov7Parameter.objectThreshold);
LOG_INFO(logFile,"NMSThreshold:%f\n",yolov7Parameter.nmsThreshold);
LOG_INFO(logFile,"NumberOfClasses:%d\n",yolov7Parameter.numberOfClasses);
return SUCCESS;
}
#ifdef DMA
__global__ void convert_bgrp_to_rgb_and_normalization_yolov7(unsigned char* srcImage, float* outImage, int width, int height)
{
int x = blockIdx.x * blockDim.x + threadIdx.x;
if (x < width * height)
{
unsigned char r = srcImage[x + width * height * 2];
unsigned char g = srcImage[x + width * height * 1];
unsigned char b = srcImage[x + width * height * 0];
float sum = 255.0;
outImage[x + width * height * 0] = r / sum;
outImage[x + width * height * 1] = g / sum;
outImage[x + width * height * 2] = b / sum;
}
}
__global__ void convert_yuv420p_to_rgb_and_normalization_yolov7(unsigned char* srcImage, float* outImage, int width, int height)
{
int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x >= width || y >= height)
return;
int index = y * width + x;
int yIndex = index;
int uIndex = (y / 2) * (width / 2) + (x / 2) + width * height;
int vIndex = (y / 2) * (width / 2) + (x / 2) + width * height * 5 / 4;
unsigned char yValue = srcImage[yIndex];
unsigned char uValue = srcImage[uIndex];
unsigned char vValue = srcImage[vIndex];
int r = yValue + 1.370705 * (vValue - 128);
int g = yValue - 0.698001 * (vValue - 128) - 0.337633 * (uValue - 128);
int b = yValue + 1.732446 * (uValue - 128);
outImage[index + 0] = (float)r;
outImage[index + 1] = (float)g;
outImage[index + 2] = (float)b;
}
__global__ void convert_rgba_to_rgb_and_normalization_yolov7(unsigned char* srcImage, float* outImage, int width, int height)
{
int x = blockIdx.x * blockDim.x + threadIdx.x;
if (x < width * height)
{
unsigned char r = srcImage[x * 4 + 0];
unsigned char g = srcImage[x * 4 + 1];
unsigned char b = srcImage[x * 4 + 2];
float sum = 255.0;
outImage[x + width * height * 0] = r / sum;
outImage[x + width * height * 1] = g / sum;
outImage[x + width * height * 2] = b / sum;
}
}
#endif
ErrorCode DetectorYOLOV7::Detect(const cv::Mat &srcImage, std::vector<ResultOfDetection> &resultsOfDetection)
{
if(srcImage.empty()||srcImage.type()!=CV_8UC3)
{
LOG_ERROR(logFile, "image error!\n");
return IMAGE_ERROR;
}
// 预处理并转换为NCHW
cv::Mat inputBlob;
blobFromImage(srcImage,
inputBlob,
1 / 255.0,
inputSize,
Scalar(0, 0, 0),
true,
false);
// 输入数据
migraphx::parameter_map inputData;
inputData[inputName]= migraphx::argument{inputShape, (float*)inputBlob.data};
// 推理
std::vector<migraphx::argument> inferenceResults = net.eval(inputData);
// 获取推理结果
std::vector<cv::Mat> outs;
migraphx::argument result = inferenceResults[0];
// 转换为cv::Mat
migraphx::shape outputShape = result.get_shape();
int shape[]={static_cast<int>(outputShape.lens()[0]),static_cast<int>(outputShape.lens()[1]),static_cast<int>(outputShape.lens()[2])};
cv::Mat out(3,shape,CV_32F);
memcpy(out.data,result.data(),sizeof(float)*outputShape.elements());
outs.push_back(out);
//获取先验框的个数
int numProposal = outs[0].size[1];
int numOut = outs[0].size[2];
//变换输出的维度
outs[0] = outs[0].reshape(0, numProposal);
//生成先验框
std::vector<float> confidences;
std::vector<cv::Rect> boxes;
std::vector<int> classIds;
float ratioh = (float)srcImage.rows / inputSize.height, ratiow = (float)srcImage.cols / inputSize.width;
//计算cx,cy,w,h,box_sore,class_sore
int n = 0, rowInd = 0;
float* pdata = (float*)outs[0].data;
for (n = 0; n < numProposal; n++)
{
float boxScores = pdata[4];
if (boxScores > yolov7Parameter.objectThreshold)
{
cv::Mat scores = outs[0].row(rowInd).colRange(5, numOut);
cv::Point classIdPoint;
double maxClassScore;
cv::minMaxLoc(scores, 0, &maxClassScore, 0, &classIdPoint);
maxClassScore *= boxScores;
if (maxClassScore > yolov7Parameter.confidenceThreshold)
{
const int classIdx = classIdPoint.x;
float cx = pdata[0] * ratiow;
float cy = pdata[1] * ratioh;
float w = pdata[2] * ratiow;
float h = pdata[3] * ratioh;
int left = int(cx - 0.5 * w);
int top = int(cy - 0.5 * h);
confidences.push_back((float)maxClassScore);
boxes.push_back(cv::Rect(left, top, (int)(w), (int)(h)));
classIds.push_back(classIdx);
}
}
rowInd++;
pdata += numOut;
}
//执行non maximum suppression消除冗余重叠boxes
std::vector<int> indices;
dnn::NMSBoxes(boxes, confidences, yolov7Parameter.confidenceThreshold, yolov7Parameter.nmsThreshold, indices);
for (size_t i = 0; i < indices.size(); ++i)
{
int idx = indices[i];
int classID=classIds[idx];
string className=classNames[classID];
float confidence=confidences[idx];
cv::Rect box = boxes[idx];
ResultOfDetection result;
result.boundingBox=box;
result.confidence=confidence;// confidence
result.classID=classID; // label
result.className=className;
resultsOfDetection.push_back(result);
}
return SUCCESS;
}
#ifdef DMA
ErrorCode DetectorYOLOV7::Detect(DCU_Frame &srcImage, std::vector<ResultOfDetection> &resultsOfDetection)
{
int block_size = 256;
int num_blocks = (srcImage.width * srcImage.height + block_size - 1) / block_size;
if(srcImage.format == AV_PIX_FMT_BGRP)
{
convert_bgrp_to_rgb_and_normalization_yolov7<<<num_blocks, block_size>>>(srcImage.dcu_data, preprocess_Image, srcImage.width, srcImage.height);
}
if(srcImage.format == AV_PIX_FMT_YUV420P)
{
convert_yuv420p_to_rgb_and_normalization_yolov7<<<num_blocks, block_size>>>(srcImage.dcu_data, preprocess_Image, srcImage.width, srcImage.height);
}
if(srcImage.format == AV_PIX_FMT_RGBA)
{
convert_rgba_to_rgb_and_normalization_yolov7<<<num_blocks, block_size>>>(srcImage.dcu_data, preprocess_Image, srcImage.width, srcImage.height);
}
// 输入数据
ParameterMap[inputName] = migraphx::argument{inputShape, preprocess_Image};
// 推理
std::vector<migraphx::argument> inferenceResults = net.eval(ParameterMap);
// 获取推理结果
std::vector<cv::Mat> outs;
migraphx::argument result = inferenceResults[0];
// 转换为cv::Mat
migraphx::shape outputShape = result.get_shape();
int shape[]={static_cast<int>(outputShape.lens()[0]), static_cast<int>(outputShape.lens()[1]), static_cast<int>(outputShape.lens()[2])};
cv::Mat out(3, shape, CV_32F, (unsigned char*)result.data());
outs.push_back(out);
//获取先验框的个数
int numProposal = outs[0].size[1];
int numOut = outs[0].size[2];
//变换输出的维度
outs[0] = outs[0].reshape(0, numProposal);
//生成先验框
std::vector<float> confidences;
std::vector<cv::Rect> boxes;
std::vector<int> classIds;
float ratioh = (float)srcImage.height / inputSize.height, ratiow = (float)srcImage.width / inputSize.width;
int n = 0, rowInd = 0;
float* pdata = (float*)outs[0].data;
for (n = 0; n < numProposal; n++)
{
float boxScores = pdata[4];
if (boxScores > yolov7Parameter.objectThreshold)
{
cv::Mat scores = outs[0].row(rowInd).colRange(5, numOut);
cv::Point classIdPoint;
double maxClassScore;
cv::minMaxLoc(scores, 0, &maxClassScore, 0, &classIdPoint);
maxClassScore *= boxScores;
if (maxClassScore > yolov7Parameter.confidenceThreshold)
{
const int classIdx = classIdPoint.x;
float cx = pdata[0] * ratiow;
float cy = pdata[1] * ratioh;
float w = pdata[2] * ratiow;
float h = pdata[3] * ratioh;
int left = int(cx - 0.5 * w);
int top = int(cy - 0.5 * h);
confidences.push_back((float)maxClassScore);
boxes.push_back(cv::Rect(left, top, (int)(w), (int)(h)));
classIds.push_back(classIdx);
}
}
rowInd++;
pdata += numOut;
}
std::vector<int> indices;
dnn::NMSBoxes(boxes, confidences, yolov7Parameter.confidenceThreshold, yolov7Parameter.nmsThreshold, indices);
for (size_t i = 0; i < indices.size(); ++i)
{
int idx = indices[i];
int classID=classIds[idx];
string className=classNames[classID];
float confidence=confidences[idx];
cv::Rect box = boxes[idx];
ResultOfDetection result;
result.boundingBox=box;
result.confidence=confidence;
result.classID=classID;
result.className=className;
resultsOfDetection.push_back(result);
}
return SUCCESS;
}
#endif
ErrorCode DetectorYOLOV7::DoCommonInitialization(InitializationParameterOfDetector initializationParameterOfDetector)
{
initializationParameter=initializationParameterOfDetector;
// 获取日志文件
logFile=LogManager::GetInstance()->GetLogFile(initializationParameter.logName);
// 加载配置文件
std::string configFilePath=initializationParameter.configFilePath;
if(!Exists(configFilePath))
{
LOG_ERROR(logFile, "no configuration file!\n");
return CONFIG_FILE_NOT_EXIST;
}
if(!configurationFile.open(configFilePath, FileStorage::READ))
{
LOG_ERROR(logFile, "fail to open configuration file\n");
return FAIL_TO_OPEN_CONFIG_FILE;
}
LOG_INFO(logFile, "succeed to open configuration file\n");
// 修改父路径
std::string &parentPath = initializationParameter.parentPath;
if (!parentPath.empty())
{
if(!IsPathSeparator(parentPath[parentPath.size() - 1]))
{
parentPath+=PATH_SEPARATOR;
}
}
return SUCCESS;
}
}
#include <Sample.h>
#include <SimpleLog.h>
#include <Filesystem.h>
#include <DetectorRetinaFace.h>
#include <sys/time.h>
#include <Decoder.h>
#include <Queuethread.h>
using namespace cv;
using namespace std;
using namespace cv::dnn;
using namespace migraphx;
using namespace migraphxSamples;
static void DecoderThreadFunc(Queue* queue)
{
int ret, end = 0;
int frame_cnt = 0;
Queue* que = queue;
Decoder decoder(que->device);
InitializationParameterOfDecoder initParamOfDecoderRetinaFace;
#ifndef DMA
initParamOfDecoderRetinaFace.src_filename = "../Resource/Images/Mean.mp4";
if( que->device == _HW) {
initParamOfDecoderRetinaFace.str_devid[4] = {0};
initParamOfDecoderRetinaFace.xcoder_params = "out=hw";
initParamOfDecoderRetinaFace.dec_name = "h264_ni_quadra_dec";
initParamOfDecoderRetinaFace.filters_descr = "ni_quadra_scale=640:640:format=bgrp,hwdownload,format=bgrp";
} else if (que->device == _HW_DMA) {
LOG_ERROR(stdout, "Error program param or cmake param, not USE_P2P can`t set '--dma'!\n");
que->finish();
return;
}
#else
if( que->device == _HW_DMA) {
initParamOfDecoderRetinaFace.str_devid[4] = {0};
initParamOfDecoderRetinaFace.xcoder_params = "out=hw";
initParamOfDecoderRetinaFace.dec_name = "h264_ni_quadra_dec";
initParamOfDecoderRetinaFace.filters_descr = "ni_quadra_scale=640:640:format=rgba:is_p2p=1";
initParamOfDecoderRetinaFace.src_filename = "../Resource/Images/cr7_1920x1080.h264";
} else {
LOG_ERROR(stdout, "Error program param or cmake param, USE_P2P need set '--dma'!\n");
que->finish();
return;
}
#endif
ret = decoder.DecoderInit(initParamOfDecoderRetinaFace);
if (ret == -1)
{
que->finish();
return;
}
while(true)
{
if (av_read_frame(decoder.fmt_ctx, decoder.pkt) < 0)
{
if(end == 2)
{
que->DecodeEnd = true;
break;
}
end = 1;
}
if (decoder.pkt->stream_index == decoder.video_stream_idx) {
if(!end) {
ret = avcodec_send_packet(decoder.video_dec_ctx, decoder.pkt);
} else {
ret = avcodec_send_packet(decoder.video_dec_ctx, NULL);
}
if (ret < 0 && ret != AVERROR_EOF) {
fprintf(stderr, "Error submitting a packet for decoding\n");
que->DecodeEnd = true;
break;
}
while (ret >= 0 || end == 1)
{
ret = avcodec_receive_frame(decoder.video_dec_ctx, decoder.frame);
if (ret == AVERROR(EAGAIN)) {
break;
} else if (ret == AVERROR_EOF ) {
end = 2;
break;
} else if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Error while receiving a frame from the decoder\n");
que->finish();
return;
}
decoder.frame->pts = decoder.frame->best_effort_timestamp;
frame_cnt++;
if (que->device == CPU)
{
cv::Mat srcImage = cv::Mat::zeros(decoder.frame->height*3/2, decoder.frame->width, CV_8UC1);
memcpy(srcImage.data, (unsigned char*)decoder.frame->data[0], decoder.frame->width * decoder.frame->height);
memcpy(srcImage.data + decoder.frame->width * decoder.frame->height, (unsigned char*)decoder.frame->data[1], decoder.frame->width * decoder.frame->height/4);
memcpy(srcImage.data + decoder.frame->width * decoder.frame->height*5/4, (unsigned char*)decoder.frame->data[2], decoder.frame->width * decoder.frame->height/4);
cvtColor(srcImage, srcImage, COLOR_YUV420p2RGB);
que->enQueue(srcImage);
}
if (que->device == _HW || que->device == _HW_DMA)
{
if (av_buffersrc_add_frame_flags(decoder.buffersrc_ctx, decoder.frame, AV_BUFFERSRC_FLAG_KEEP_REF) < 0) {
av_log(NULL, AV_LOG_ERROR, "Error while feeding the filtergraph\n");
break;
}
while (1)
{
ret = av_buffersink_get_frame(decoder.buffersink_ctx, decoder.filt_frame);
if (ret == AVERROR(EAGAIN))
{
break;
}
else if(ret == AVERROR_EOF)
{
end = 2;
break;
}
if (ret < 0)
{
que->finish();
return;
}
#ifndef DMA
if (que->device == _HW)
{
cv::Mat srcImage;
switch (decoder.filt_frame->format)
{
case AV_PIX_FMT_BGRP:
{
srcImage = cv::Mat::zeros(decoder.filt_frame->height, decoder.filt_frame->width, CV_8UC3);
cv::Mat mat_r = cv::Mat(decoder.filt_frame->height, decoder.filt_frame->width, CV_8UC1, (unsigned char*)decoder.filt_frame->data[0]);
cv::Mat mat_g = cv::Mat(decoder.filt_frame->height, decoder.filt_frame->width, CV_8UC1, (unsigned char*)decoder.filt_frame->data[1]);
cv::Mat mat_b = cv::Mat(decoder.filt_frame->height, decoder.filt_frame->width, CV_8UC1, (unsigned char*)decoder.filt_frame->data[2]);
cv::Mat Channels[3]{mat_r, mat_g, mat_b};
cv::merge(Channels, 3, srcImage);
break;
}
case AV_PIX_FMT_YUV420P:
{
srcImage = cv::Mat::zeros(decoder.filt_frame->height*3/2, decoder.filt_frame->width, CV_8UC1);
memcpy(srcImage.data, (unsigned char*)decoder.filt_frame->data[0], decoder.filt_frame->width * decoder.filt_frame->height);
memcpy(srcImage.data + decoder.filt_frame->width * decoder.filt_frame->height, (unsigned char*)decoder.filt_frame->data[1], decoder.filt_frame->width * decoder.filt_frame->height/4);
memcpy(srcImage.data + decoder.filt_frame->width * decoder.filt_frame->height*5/4, (unsigned char*)decoder.filt_frame->data[2], decoder.filt_frame->width * decoder.filt_frame->height/4);
cvtColor(srcImage, srcImage, COLOR_YUV420p2RGB);
break;
}
case AV_PIX_FMT_RGBA:
{
srcImage = cv::Mat::zeros(decoder.filt_frame->height, decoder.filt_frame->width, CV_8UC4);
memcpy(srcImage.data, (unsigned char*)decoder.filt_frame->data[0], decoder.filt_frame->width * decoder.filt_frame->height * 4);
cvtColor(srcImage, srcImage, COLOR_BGRA2RGB);
break;
}
default:
break;
}
que->enQueue(srcImage);
av_frame_unref(decoder.filt_frame);
}
#else
if (que->device == _HW_DMA)
{
DCU_Frame dcu_frame;
AVHWFramesContext *hwfc = (AVHWFramesContext *)decoder.filt_frame->hw_frames_ctx->data;
switch (hwfc->sw_format)
{
case AV_PIX_FMT_BGRP:
{
dcu_frame.format = AV_PIX_FMT_BGRP;
dcu_frame.data_len = decoder.filt_frame->width * decoder.filt_frame->height * 3;
dcu_frame.srcImage = cv::Mat::zeros(decoder.filt_frame->height, decoder.filt_frame->width, CV_8UC3);
break;
}
case AV_PIX_FMT_YUV420P:
{
dcu_frame.format = AV_PIX_FMT_YUV420P;
dcu_frame.data_len = decoder.filt_frame->width * decoder.filt_frame->height * 3 / 2;
dcu_frame.srcImage = cv::Mat::zeros(decoder.filt_frame->height*3/2, decoder.filt_frame->width, CV_8UC1);
break;
}
case AV_PIX_FMT_RGBA:
{
dcu_frame.format = AV_PIX_FMT_RGBA;
dcu_frame.data_len = decoder.filt_frame->width * decoder.filt_frame->height * 4;
dcu_frame.srcImage = cv::Mat::zeros(decoder.filt_frame->height, decoder.filt_frame->width, CV_8UC4);
break;
}
default:
break;
}
dcu_frame.width = decoder.filt_frame->width;
dcu_frame.height = decoder.filt_frame->height;
hipMalloc((void**)&(dcu_frame.dcu_data), dcu_frame.data_len * sizeof(unsigned char));
ret = decoder.retrieve_filter_frame(dcu_frame, decoder.filt_frame);
if (ret)
av_log(NULL, AV_LOG_ERROR, "Error while retrieve_filter_frame with p2p.\n");
if(dcu_frame.format == AV_PIX_FMT_BGRP)
{
cv::Mat mat_b = cv::Mat(decoder.filt_frame->height, decoder.filt_frame->width, CV_8UC1, (unsigned char*)dcu_frame.srcImage.data);
cv::Mat mat_g = cv::Mat(decoder.filt_frame->height, decoder.filt_frame->width, CV_8UC1, (unsigned char*)(dcu_frame.srcImage.data + decoder.filt_frame->height * decoder.filt_frame->width));
cv::Mat mat_r = cv::Mat(decoder.filt_frame->height, decoder.filt_frame->width, CV_8UC1, (unsigned char*)(dcu_frame.srcImage.data + decoder.filt_frame->height * decoder.filt_frame->width * 2));
cv::Mat Channels[3]{mat_r, mat_g, mat_b};
cv::merge(Channels, 3, dcu_frame.srcImage);
}
if(dcu_frame.format == AV_PIX_FMT_YUV420P)
cvtColor(dcu_frame.srcImage, dcu_frame.srcImage, COLOR_YUV420p2RGB);
if(dcu_frame.format == AV_PIX_FMT_RGBA)
cvtColor(dcu_frame.srcImage, dcu_frame.srcImage, COLOR_BGRA2RGB);
queue->enQueue(dcu_frame);
av_frame_unref(decoder.filt_frame);
}
#endif
}
}
av_frame_unref(decoder.frame);
}
}
av_packet_unref(decoder.pkt);
}
LOG_INFO(stdout, "Decoder: ####### frame count: %d\n", frame_cnt);
que->finish();
}
static void DetectorThreadFunc(Queue* que)
{
Queue* queue = que;
// DetectorRetinaFace Init
DetectorRetinaFace detector;
InitializationParameterOfDetector initParamOfDetectorRetinaFace;
initParamOfDetectorRetinaFace.parentPath = "";
initParamOfDetectorRetinaFace.configFilePath = CONFIG_FILE;
initParamOfDetectorRetinaFace.logName = "";
ErrorCode errorCode=detector.Initialize(initParamOfDetectorRetinaFace);
if(errorCode!=SUCCESS)
{
LOG_ERROR(stdout, "fail to initialize detector!\n");
exit(-1);
}
LOG_INFO(stdout, "succeed to initialize detector\n");
int frame_cnt = 0;
double start_time = getTickCount();
while (!queue->DecodeEnd) {
#ifdef DMA
DCU_Frame dcu_frame;
queue->deQueue(&dcu_frame);
if(dcu_frame.srcImage.empty()) {
continue;
}
#else
cv::Mat InferImage;
queue->deQueue(&InferImage);
if (InferImage.empty()) {
continue;
}
#endif
// detect
std::vector<ResultOfDetection> predictions;
double time1 = getTickCount();
#ifdef DMA
detector.Detect(dcu_frame, predictions);
#else
detector.Detect(InferImage, predictions);
#endif
double time2 = getTickCount();
double elapsedTime = (time2 - time1)*1000 / getTickFrequency();
LOG_INFO(stdout, "inference time:%f ms\n", elapsedTime);
frame_cnt++;
#ifdef DMA
hipFree(dcu_frame.dcu_data);
#endif
// process result
LOG_INFO(stdout,"////////////////Detection Results////////////////\n");
for( int i = 0; i < predictions.size(); ++i)
{
ResultOfDetection result = predictions[i];
#ifdef DMA
cv::rectangle(dcu_frame.srcImage, result.boundingBox, Scalar(0,255,255),2);
cv::putText(dcu_frame.srcImage, result.className, cv::Point(result.boundingBox.x, result.boundingBox.y-20), cv::FONT_HERSHEY_PLAIN, 2.0, Scalar(0, 0, 255), 2);
#else
cv::rectangle(InferImage, result.boundingBox, Scalar(0,255,255),2);
cv::putText(InferImage, result.className, cv::Point(result.boundingBox.x, result.boundingBox.y-20), cv::FONT_HERSHEY_PLAIN, 2.0, Scalar(0, 0, 255), 2);
#endif
LOG_INFO(stdout,"box:%d %d %d %d,label:%d,confidence:%f\n",result.boundingBox.x,
result.boundingBox.y,result.boundingBox.width,result.boundingBox.height,result.classID,result.confidence);
}
// X11 display can`t support in docker.
/*namedWindow("video", WINDOW_NORMAL | WINDOW_KEEPRATIO);
#ifdef DMA
imshow("video", dcu_frame.srcImage);
#else
imshow("video", InferImage);
#endif
if (waitKey(10) == 'q') {
break;
}*/
}
#ifdef DMA
hipFree(detector.preprocess_Image);
#endif
double end_time = getTickCount();
fprintf(stdout, "Finish ####### frame_cnt: %d, Inference fps: %.2f, all time: %.2f ms\n", frame_cnt, float(frame_cnt/((end_time - start_time)/getTickFrequency())), (end_time - start_time)/getTickFrequency()*1000);
queue->finish();
}
void Sample_DetectorRetinaFace(int device)
{
Queue* queue = new Queue(1);
queue->device = device;
std::thread ThreadDecoder(DecoderThreadFunc, queue);
std::thread ThreadDetector(DetectorThreadFunc, queue);
ThreadDecoder.join();
ThreadDetector.join();
delete queue;
queue = NULL;
return;
}
#include <Sample.h>
#include <SimpleLog.h>
#include <Filesystem.h>
#include <DetectorSSD.h>
#include <sys/time.h>
#include <Decoder.h>
#include <Queuethread.h>
using namespace cv;
using namespace std;
using namespace cv::dnn;
using namespace migraphx;
using namespace migraphxSamples;
static void DecoderThreadFunc(Queue* queue)
{
int ret, end = 0;
int frame_cnt = 0;
Queue* que = queue;
Decoder decoder(que->device);
InitializationParameterOfDecoder initParamOfDecoderSSD;
#ifndef DMA
initParamOfDecoderSSD.src_filename = "../Resource/Images/Mean.mp4";
if( que->device == _HW) {
initParamOfDecoderSSD.str_devid[4] = {0};
initParamOfDecoderSSD.xcoder_params = "out=hw";
initParamOfDecoderSSD.dec_name = "h264_ni_quadra_dec";
initParamOfDecoderSSD.filters_descr = "ni_quadra_scale=640:480:format=bgrp,hwdownload,format=bgrp";
} else if (que->device == _HW_DMA) {
LOG_ERROR(stdout, "Error program param or cmake param, not USE_P2P can`t set '--dma'!\n");
que->finish();
return;
}
#else
if( que->device == _HW_DMA) {
initParamOfDecoderSSD.str_devid[4] = {0};
initParamOfDecoderSSD.xcoder_params = "out=hw";
initParamOfDecoderSSD.dec_name = "h264_ni_quadra_dec";
initParamOfDecoderSSD.filters_descr = "ni_quadra_scale=640:480:format=rgba:is_p2p=1";
initParamOfDecoderSSD.src_filename = "../Resource/Images/cr7_1920x1080.h264";
} else {
LOG_ERROR(stdout, "Error program param or cmake param, USE_P2P need set '--dma'!\n");
que->finish();
return;
}
#endif
ret = decoder.DecoderInit(initParamOfDecoderSSD);
if (ret == -1)
{
que->finish();
return;
}
while(true)
{
if (av_read_frame(decoder.fmt_ctx, decoder.pkt) < 0)
{
if(end == 2)
{
que->DecodeEnd = true;
break;
}
end = 1;
}
if (decoder.pkt->stream_index == decoder.video_stream_idx) {
if(!end) {
ret = avcodec_send_packet(decoder.video_dec_ctx, decoder.pkt);
} else {
ret = avcodec_send_packet(decoder.video_dec_ctx, NULL);
}
if (ret < 0 && ret != AVERROR_EOF) {
fprintf(stderr, "Error submitting a packet for decoding\n");
que->DecodeEnd = true;
break;
}
while (ret >= 0 || end == 1)
{
ret = avcodec_receive_frame(decoder.video_dec_ctx, decoder.frame);
if (ret == AVERROR(EAGAIN)) {
break;
} else if (ret == AVERROR_EOF ) {
end = 2;
break;
} else if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Error while receiving a frame from the decoder\n");
que->finish();
return;
}
decoder.frame->pts = decoder.frame->best_effort_timestamp;
frame_cnt++;
if (que->device == CPU)
{
cv::Mat srcImage = cv::Mat::zeros(decoder.frame->height*3/2, decoder.frame->width, CV_8UC1);
memcpy(srcImage.data, (unsigned char*)decoder.frame->data[0], decoder.frame->width * decoder.frame->height);
memcpy(srcImage.data + decoder.frame->width * decoder.frame->height, (unsigned char*)decoder.frame->data[1], decoder.frame->width * decoder.frame->height/4);
memcpy(srcImage.data + decoder.frame->width * decoder.frame->height*5/4, (unsigned char*)decoder.frame->data[2], decoder.frame->width * decoder.frame->height/4);
cvtColor(srcImage, srcImage, COLOR_YUV420p2RGB);
que->enQueue(srcImage);
}
if (que->device == _HW || que->device == _HW_DMA)
{
if (av_buffersrc_add_frame_flags(decoder.buffersrc_ctx, decoder.frame, AV_BUFFERSRC_FLAG_KEEP_REF) < 0) {
av_log(NULL, AV_LOG_ERROR, "Error while feeding the filtergraph\n");
break;
}
while (1)
{
ret = av_buffersink_get_frame(decoder.buffersink_ctx, decoder.filt_frame);
if (ret == AVERROR(EAGAIN))
{
break;
}
else if(ret == AVERROR_EOF)
{
end = 2;
break;
}
if (ret < 0)
{
que->finish();
return;
}
#ifndef DMA
if (que->device == _HW)
{
cv::Mat srcImage;
switch (decoder.filt_frame->format)
{
case AV_PIX_FMT_BGRP:
{
srcImage = cv::Mat::zeros(decoder.filt_frame->height, decoder.filt_frame->width, CV_8UC3);
cv::Mat mat_r = cv::Mat(decoder.filt_frame->height, decoder.filt_frame->width, CV_8UC1, (unsigned char*)decoder.filt_frame->data[0]);
cv::Mat mat_g = cv::Mat(decoder.filt_frame->height, decoder.filt_frame->width, CV_8UC1, (unsigned char*)decoder.filt_frame->data[1]);
cv::Mat mat_b = cv::Mat(decoder.filt_frame->height, decoder.filt_frame->width, CV_8UC1, (unsigned char*)decoder.filt_frame->data[2]);
cv::Mat Channels[3]{mat_r, mat_g, mat_b};
cv::merge(Channels, 3, srcImage);
break;
}
case AV_PIX_FMT_YUV420P:
{
srcImage = cv::Mat::zeros(decoder.filt_frame->height*3/2, decoder.filt_frame->width, CV_8UC1);
memcpy(srcImage.data, (unsigned char*)decoder.filt_frame->data[0], decoder.filt_frame->width * decoder.filt_frame->height);
memcpy(srcImage.data + decoder.filt_frame->width * decoder.filt_frame->height, (unsigned char*)decoder.filt_frame->data[1], decoder.filt_frame->width * decoder.filt_frame->height/4);
memcpy(srcImage.data + decoder.filt_frame->width * decoder.filt_frame->height*5/4, (unsigned char*)decoder.filt_frame->data[2], decoder.filt_frame->width * decoder.filt_frame->height/4);
cvtColor(srcImage, srcImage, COLOR_YUV420p2RGB);
break;
}
case AV_PIX_FMT_RGBA:
{
srcImage = cv::Mat::zeros(decoder.filt_frame->height, decoder.filt_frame->width, CV_8UC4);
memcpy(srcImage.data, (unsigned char*)decoder.filt_frame->data[0], decoder.filt_frame->width * decoder.filt_frame->height * 4);
cvtColor(srcImage, srcImage, COLOR_BGRA2RGB);
break;
}
default:
break;
}
que->enQueue(srcImage);
av_frame_unref(decoder.filt_frame);
}
#else
if (que->device == _HW_DMA)
{
DCU_Frame dcu_frame;
AVHWFramesContext *hwfc = (AVHWFramesContext *)decoder.filt_frame->hw_frames_ctx->data;
switch (hwfc->sw_format)
{
case AV_PIX_FMT_BGRP:
{
dcu_frame.format = AV_PIX_FMT_BGRP;
dcu_frame.data_len = decoder.filt_frame->width * decoder.filt_frame->height * 3;
dcu_frame.srcImage = cv::Mat::zeros(decoder.filt_frame->height, decoder.filt_frame->width, CV_8UC3);
break;
}
case AV_PIX_FMT_YUV420P:
{
dcu_frame.format = AV_PIX_FMT_YUV420P;
dcu_frame.data_len = decoder.filt_frame->width * decoder.filt_frame->height * 3 / 2;
dcu_frame.srcImage = cv::Mat::zeros(decoder.filt_frame->height*3/2, decoder.filt_frame->width, CV_8UC1);
break;
}
case AV_PIX_FMT_RGBA:
{
dcu_frame.format = AV_PIX_FMT_RGBA;
dcu_frame.data_len = decoder.filt_frame->width * decoder.filt_frame->height * 4;
dcu_frame.srcImage = cv::Mat::zeros(decoder.filt_frame->height, decoder.filt_frame->width, CV_8UC4);
break;
}
default:
break;
}
dcu_frame.width = decoder.filt_frame->width;
dcu_frame.height = decoder.filt_frame->height;
hipMalloc((void**)&(dcu_frame.dcu_data), dcu_frame.data_len * sizeof(unsigned char));
ret = decoder.retrieve_filter_frame(dcu_frame, decoder.filt_frame);
if (ret)
av_log(NULL, AV_LOG_ERROR, "Error while retrieve_filter_frame with p2p.\n");
if(dcu_frame.format == AV_PIX_FMT_BGRP)
{
cv::Mat mat_b = cv::Mat(decoder.filt_frame->height, decoder.filt_frame->width, CV_8UC1, (unsigned char*)dcu_frame.srcImage.data);
cv::Mat mat_g = cv::Mat(decoder.filt_frame->height, decoder.filt_frame->width, CV_8UC1, (unsigned char*)(dcu_frame.srcImage.data + decoder.filt_frame->height * decoder.filt_frame->width));
cv::Mat mat_r = cv::Mat(decoder.filt_frame->height, decoder.filt_frame->width, CV_8UC1, (unsigned char*)(dcu_frame.srcImage.data + decoder.filt_frame->height * decoder.filt_frame->width * 2));
cv::Mat Channels[3]{mat_r, mat_g, mat_b};
cv::merge(Channels, 3, dcu_frame.srcImage);
}
if(dcu_frame.format == AV_PIX_FMT_YUV420P)
cvtColor(dcu_frame.srcImage, dcu_frame.srcImage, COLOR_YUV420p2RGB);
if(dcu_frame.format == AV_PIX_FMT_RGBA)
cvtColor(dcu_frame.srcImage, dcu_frame.srcImage, COLOR_BGRA2RGB);
queue->enQueue(dcu_frame);
av_frame_unref(decoder.filt_frame);
}
#endif
}
}
av_frame_unref(decoder.frame);
}
}
av_packet_unref(decoder.pkt);
}
LOG_INFO(stdout, "Decoder: ####### frame count: %d\n", frame_cnt);
que->finish();
}
static void DetectorThreadFunc(Queue* que)
{
Queue* queue = que;
// DetectorSSD Init
DetectorSSD detector;
InitializationParameterOfDetector initParamOfDetectorSSD;
initParamOfDetectorSSD.parentPath = "";
initParamOfDetectorSSD.configFilePath = CONFIG_FILE;
initParamOfDetectorSSD.logName = "";
ErrorCode errorCode=detector.Initialize(initParamOfDetectorSSD);
if(errorCode!=SUCCESS)
{
LOG_ERROR(stdout, "fail to initialize detector!\n");
exit(-1);
}
LOG_INFO(stdout, "succeed to initialize detector\n");
int frame_cnt = 0;
double start_time = getTickCount();
while (!queue->DecodeEnd) {
#ifdef DMA
DCU_Frame dcu_frame;
queue->deQueue(&dcu_frame);
if(dcu_frame.srcImage.empty()) {
continue;
}
#else
cv::Mat InferImage;
queue->deQueue(&InferImage);
if (InferImage.empty()) {
continue;
}
#endif
// detect
std::vector<ResultOfDetection> predictions;
double time1 = getTickCount();
#ifdef DMA
detector.Detect(dcu_frame, predictions);
#else
detector.Detect(InferImage, predictions);
#endif
double time2 = getTickCount();
double elapsedTime = (time2 - time1)*1000 / getTickFrequency();
LOG_INFO(stdout, "inference time:%f ms\n", elapsedTime);
frame_cnt++;
#ifdef DMA
hipFree(dcu_frame.dcu_data);
#endif
// process result
LOG_INFO(stdout,"////////////////Detection Results////////////////\n");
for( int i = 0; i < predictions.size(); ++i)
{
ResultOfDetection result = predictions[i];
#ifdef DMA
cv::rectangle(dcu_frame.srcImage, result.boundingBox, Scalar(0,255,255),2);
cv::putText(dcu_frame.srcImage, result.className, cv::Point(result.boundingBox.x, result.boundingBox.y-20), cv::FONT_HERSHEY_PLAIN, 2.0, Scalar(0, 0, 255), 2);
#else
cv::rectangle(InferImage, result.boundingBox, Scalar(0,255,255),2);
cv::putText(InferImage, result.className, cv::Point(result.boundingBox.x, result.boundingBox.y-20), cv::FONT_HERSHEY_PLAIN, 2.0, Scalar(0, 0, 255), 2);
#endif
LOG_INFO(stdout,"box:%d %d %d %d,label:%d,confidence:%f\n",result.boundingBox.x,
result.boundingBox.y,result.boundingBox.width,result.boundingBox.height,result.classID,result.confidence);
}
// X11 display can`t support in docker.
/*namedWindow("video", WINDOW_NORMAL | WINDOW_KEEPRATIO);
#ifdef DMA
imshow("video", dcu_frame.srcImage);
#else
imshow("video", InferImage);
#endif
if (waitKey(10) == 'q') {
break;
}*/
}
#ifdef DMA
hipFree(detector.preprocess_Image);
#endif
double end_time = getTickCount();
fprintf(stdout, "Finish ####### frame_cnt: %d, Inference fps: %.2f, all time: %.2f ms\n", frame_cnt, float(frame_cnt/((end_time - start_time)/getTickFrequency())), (end_time - start_time)/getTickFrequency()*1000);
queue->finish();
}
void Sample_DetectorSSD(int device)
{
Queue* queue = new Queue(1);
queue->device = device;
std::thread ThreadDecoder(DecoderThreadFunc, queue);
std::thread ThreadDetector(DetectorThreadFunc, queue);
ThreadDecoder.join();
ThreadDetector.join();
delete queue;
queue = NULL;
return;
}
#define DLLAPI_EXPORTS
#include <CommonUtility.h>
#include <assert.h>
#include <ctype.h>
#include <time.h>
#include <stdlib.h>
#include <algorithm>
#include <sstream>
#include <vector>
#ifdef _WIN32
#include <io.h>
#include <direct.h>
#include <Windows.h>
#else
#include <unistd.h>
#include <dirent.h>
#include <sys/stat.h>
#include <sys/time.h>
#endif
#include <SimpleLog.h>
namespace migraphxSamples
{
_Time GetCurrentTime3()
{
_Time currentTime;
#if (defined WIN32 || defined _WIN32)
SYSTEMTIME systemTime;
GetLocalTime(&systemTime);
char temp[8] = { 0 };
sprintf(temp, "%04d", systemTime.wYear);
currentTime.year=string(temp);
sprintf(temp, "%02d", systemTime.wMonth);
currentTime.month=string(temp);
sprintf(temp, "%02d", systemTime.wDay);
currentTime.day=string(temp);
sprintf(temp, "%02d", systemTime.wHour);
currentTime.hour=string(temp);
sprintf(temp, "%02d", systemTime.wMinute);
currentTime.minute=string(temp);
sprintf(temp, "%02d", systemTime.wSecond);
currentTime.second=string(temp);
sprintf(temp, "%03d", systemTime.wMilliseconds);
currentTime.millisecond=string(temp);
sprintf(temp, "%d", systemTime.wDayOfWeek);
currentTime.weekDay=string(temp);
#else
struct timeval tv;
struct tm *p;
gettimeofday(&tv, NULL);
p = localtime(&tv.tv_sec);
char temp[8]={0};
sprintf(temp,"%04d",1900+p->tm_year);
currentTime.year=string(temp);
sprintf(temp,"%02d",1+p->tm_mon);
currentTime.month=string(temp);
sprintf(temp,"%02d",p->tm_mday);
currentTime.day=string(temp);
sprintf(temp,"%02d",p->tm_hour);
currentTime.hour=string(temp);
sprintf(temp,"%02d",p->tm_min);
currentTime.minute=string(temp);
sprintf(temp,"%02d",p->tm_sec);
currentTime.second=string(temp);
sprintf(temp,"%03ld",tv.tv_usec/1000);
currentTime.millisecond = string(temp);
sprintf(temp, "%03ld", tv.tv_usec % 1000);
currentTime.microsecond = string(temp);
sprintf(temp, "%d", p->tm_wday);
currentTime.weekDay = string(temp);
#endif
return currentTime;
}
string GetCurrentTimeString()
{
char timeString[256]={0};
_Time currentTime=GetCurrentTime3();
sprintf(timeString,"%s%s%s%s%s%s%s",currentTime.year.c_str(),currentTime.month.c_str(),
currentTime.day.c_str(),currentTime.hour.c_str(),
currentTime.minute.c_str(),currentTime.second.c_str(),currentTime.millisecond.c_str());
return timeString;
}
vector<string> SplitString(string str, std::string separator)
{
std::string::size_type pos;
std::vector<std::string> result;
str+=separator;//扩展字符串以方便操作
int size=str.size();
for(int i=0; i<size; i++)
{
pos=str.find(separator,i);
if(pos<size)
{
std::string s=str.substr(i,pos-i);
result.push_back(s);
i=pos+separator.size()-1;
}
}
return result;
}
bool CompareConfidence(const ResultOfDetection &L,const ResultOfDetection &R)
{
return L.confidence > R.confidence;
}
bool CompareArea(const ResultOfDetection &L,const ResultOfDetection &R)
{
return L.boundingBox.area() > R.boundingBox.area();
}
void NMS(vector<ResultOfDetection> &detections, float IOUThreshold)
{
// sort
std::sort(detections.begin(), detections.end(), CompareConfidence);
for (int i = 0; i<detections.size(); ++i)
{
if (detections[i].exist)
{
for (int j = i + 1; j<detections.size(); ++j)
{
if (detections[j].exist)
{
// compute IOU
float intersectionArea = (detections[i].boundingBox & detections[j].boundingBox).area();
float intersectionRate = intersectionArea / (detections[i].boundingBox.area() + detections[j].boundingBox.area() - intersectionArea);
if (intersectionRate>IOUThreshold)
{
detections[j].exist = false;
}
}
}
}
}
}
migraphx::parameter_map CreateParameterMap(migraphx::program & p)
{
migraphx::parameter_map parameterMap;
for (std::pair<std::string, migraphx::shape> x : p.get_parameter_shapes())
{
parameterMap[x.first] = migraphx::gpu::to_gpu(migraphx::generate_argument(x.second));
}
return parameterMap;
}
//void print_result(const std::vector<OCRPredictResult> ocr_result)
//{
// for (int i = 0; i < ocr_result.size(); i++)
// {
// std::cout << i << "\t";
// std::vector<std::vector<int>> boxes = ocr_result[i].box;
// if (boxes.size() > 0)
// {
// std::cout << "det boxes: [";
// for (int n = 0; n < boxes.size(); n++)
// {
// std::cout << '[' << boxes[n][0] << ',' << boxes[n][1] << "]";
// if (n != boxes.size() - 1)
// std::cout << ',';
// }
// std::cout << "] ";
// }
// if (ocr_result[i].score != -1.0)
// {
// std::cout << "rec text: " << ocr_result[i].text << " rec score: " << ocr_result[i].score << " ";
// }
// if (ocr_result[i].cls_label != -1)
// {
// std::cout << "cls label: " << ocr_result[i].cls_label << " cls score: " << ocr_result[i].cls_score;
// }
// }
//}
//
//void VisualizeBboxes(const cv::Mat &srcimg,
// const std::vector<OCRPredictResult> &ocr_result,
// const std::string &save_path) {
// cv::Mat img_vis;
// srcimg.copyTo(img_vis);
// for (int n = 0; n < ocr_result.size(); n++) {
// cv::Point rook_points[4];
// for (int m = 0; m < ocr_result[n].box.size(); m++) {
// rook_points[m] = cv::Point(int(ocr_result[n].box[m][0]), int(ocr_result[n].box[m][1]));
// }
//
// const cv::Point *ppt[1] = {rook_points};
// int npt[] = {4};
// cv::polylines(img_vis, ppt, npt, 1, 1, CV_RGB(0, 255, 0), 2, 8, 0);
// }
//
// cv::imwrite(save_path, img_vis);
// std::cout << "The detection visualized image saved in " + save_path << std::endl;
//}
}
#define DLLAPI_EXPORTS
#include <Filesystem.h>
#include <algorithm>
#include <sys/stat.h>
#include <sys/types.h>
#include <fstream>
#ifdef _WIN32
#include <io.h>
#include <direct.h>
#include <Windows.h>
#else
#include <unistd.h>
#include <dirent.h>
#endif
#include <CommonUtility.h>
#include <opencv2/opencv.hpp>
#include <SimpleLog.h>
using namespace cv;
// 路径分隔符(Linux:‘/’,Windows:’\\’)
#ifdef _WIN32
#define PATH_SEPARATOR '\\'
#else
#define PATH_SEPARATOR '/'
#endif
namespace migraphxSamples
{
#if defined _WIN32 || defined WINCE
const char dir_separators[] = "/\\";
struct dirent
{
const char* d_name;
};
struct DIR
{
#ifdef WINRT
WIN32_FIND_DATAW data;
#else
WIN32_FIND_DATAA data;
#endif
HANDLE handle;
dirent ent;
#ifdef WINRT
DIR() { }
~DIR()
{
if (ent.d_name)
delete[] ent.d_name;
}
#endif
};
DIR* opendir(const char* path)
{
DIR* dir = new DIR;
dir->ent.d_name = 0;
#ifdef WINRT
string full_path = string(path) + "\\*";
wchar_t wfull_path[MAX_PATH];
size_t copied = mbstowcs(wfull_path, full_path.c_str(), MAX_PATH);
CV_Assert((copied != MAX_PATH) && (copied != (size_t)-1));
dir->handle = ::FindFirstFileExW(wfull_path, FindExInfoStandard,
&dir->data, FindExSearchNameMatch, NULL, 0);
#else
dir->handle = ::FindFirstFileExA((string(path) + "\\*").c_str(),
FindExInfoStandard, &dir->data, FindExSearchNameMatch, NULL, 0);
#endif
if (dir->handle == INVALID_HANDLE_VALUE)
{
/*closedir will do all cleanup*/
delete dir;
return 0;
}
return dir;
}
dirent* readdir(DIR* dir)
{
#ifdef WINRT
if (dir->ent.d_name != 0)
{
if (::FindNextFileW(dir->handle, &dir->data) != TRUE)
return 0;
}
size_t asize = wcstombs(NULL, dir->data.cFileName, 0);
CV_Assert((asize != 0) && (asize != (size_t)-1));
char* aname = new char[asize + 1];
aname[asize] = 0;
wcstombs(aname, dir->data.cFileName, asize);
dir->ent.d_name = aname;
#else
if (dir->ent.d_name != 0)
{
if (::FindNextFileA(dir->handle, &dir->data) != TRUE)
return 0;
}
dir->ent.d_name = dir->data.cFileName;
#endif
return &dir->ent;
}
void closedir(DIR* dir)
{
::FindClose(dir->handle);
delete dir;
}
#else
# include <dirent.h>
# include <sys/stat.h>
const char dir_separators[] = "/";
#endif
static bool isDir(const string &path, DIR* dir)
{
#if defined _WIN32 || defined WINCE
DWORD attributes;
BOOL status = TRUE;
if (dir)
attributes = dir->data.dwFileAttributes;
else
{
WIN32_FILE_ATTRIBUTE_DATA all_attrs;
#ifdef WINRT
wchar_t wpath[MAX_PATH];
size_t copied = mbstowcs(wpath, path.c_str(), MAX_PATH);
CV_Assert((copied != MAX_PATH) && (copied != (size_t)-1));
status = ::GetFileAttributesExW(wpath, GetFileExInfoStandard, &all_attrs);
#else
status = ::GetFileAttributesExA(path.c_str(), GetFileExInfoStandard, &all_attrs);
#endif
attributes = all_attrs.dwFileAttributes;
}
return status && ((attributes & FILE_ATTRIBUTE_DIRECTORY) != 0);
#else
(void)dir;
struct stat stat_buf;
if (0 != stat(path.c_str(), &stat_buf))
return false;
int is_dir = S_ISDIR(stat_buf.st_mode);
return is_dir != 0;
#endif
}
bool IsDirectory(const string &path)
{
return isDir(path, NULL);
}
bool Exists(const string& path)
{
#if defined _WIN32 || defined WINCE
BOOL status = TRUE;
{
WIN32_FILE_ATTRIBUTE_DATA all_attrs;
#ifdef WINRT
wchar_t wpath[MAX_PATH];
size_t copied = mbstowcs(wpath, path.c_str(), MAX_PATH);
CV_Assert((copied != MAX_PATH) && (copied != (size_t)-1));
status = ::GetFileAttributesExW(wpath, GetFileExInfoStandard, &all_attrs);
#else
status = ::GetFileAttributesExA(path.c_str(), GetFileExInfoStandard, &all_attrs);
#endif
}
return !!status;
#else
struct stat stat_buf;
return (0 == stat(path.c_str(), &stat_buf));
#endif
}
bool IsPathSeparator(char c)
{
return c == '/' || c == '\\';
}
string JoinPath(const string& base, const string& path)
{
if (base.empty())
return path;
if (path.empty())
return base;
bool baseSep = IsPathSeparator(base[base.size() - 1]);
bool pathSep = IsPathSeparator(path[0]);
string result;
if (baseSep && pathSep)
{
result = base + path.substr(1);
}
else if (!baseSep && !pathSep)
{
result = base + PATH_SEPARATOR + path;
}
else
{
result = base + path;
}
return result;
}
static bool wildcmp(const char *string, const char *wild)
{
const char *cp = 0, *mp = 0;
while ((*string) && (*wild != '*'))
{
if ((*wild != *string) && (*wild != '?'))
{
return false;
}
wild++;
string++;
}
while (*string)
{
if (*wild == '*')
{
if (!*++wild)
{
return true;
}
mp = wild;
cp = string + 1;
}
else if ((*wild == *string) || (*wild == '?'))
{
wild++;
string++;
}
else
{
wild = mp;
string = cp++;
}
}
while (*wild == '*')
{
wild++;
}
return *wild == 0;
}
static void glob_rec(const string &directory, const string& wildchart, std::vector<string>& result,
bool recursive, bool includeDirectories, const string& pathPrefix)
{
DIR *dir;
if ((dir = opendir(directory.c_str())) != 0)
{
/* find all the files and directories within directory */
try
{
struct dirent *ent;
while ((ent = readdir(dir)) != 0)
{
const char* name = ent->d_name;
if ((name[0] == 0) || (name[0] == '.' && name[1] == 0) || (name[0] == '.' && name[1] == '.' && name[2] == 0))
continue;
string path = JoinPath(directory, name);
string entry = JoinPath(pathPrefix, name);
if (isDir(path, dir))
{
if (recursive)
glob_rec(path, wildchart, result, recursive, includeDirectories, entry);
if (!includeDirectories)
continue;
}
if (wildchart.empty() || wildcmp(name, wildchart.c_str()))
result.push_back(entry);
}
}
catch (...)
{
closedir(dir);
throw;
}
closedir(dir);
}
else
{
LOG_INFO(stdout, "could not open directory: %s", directory.c_str());
}
}
void GetFileNameList(const string &directory, const string &pattern, std::vector<string>& result, bool recursive, bool addPath)
{
// split pattern
vector<string> patterns=SplitString(pattern,",");
result.clear();
for(int i=0;i<patterns.size();++i)
{
string eachPattern=patterns[i];
std::vector<string> eachResult;
glob_rec(directory, eachPattern, eachResult, recursive, true, directory);
for(int j=0;j<eachResult.size();++j)
{
if (IsDirectory(eachResult[j]))
continue;
if(addPath)
{
result.push_back(eachResult[j]);
}
else
{
result.push_back(GetFileName(eachResult[j]));
}
}
}
std::sort(result.begin(), result.end());
}
void GetFileNameList2(const string &directory, const string &pattern, std::vector<string>& result, bool recursive, bool addPath)
{
// split pattern
vector<string> patterns = SplitString(pattern, ",");
result.clear();
for (int i = 0; i<patterns.size(); ++i)
{
string eachPattern = patterns[i];
std::vector<string> eachResult;
glob_rec(directory, eachPattern, eachResult, recursive, true, directory);
for (int j = 0; j<eachResult.size(); ++j)
{
string filePath = eachResult[j];
if (IsDirectory(filePath))
{
filePath = filePath + "/";
for (int k = 0; k < filePath.size(); ++k)
{
if (IsPathSeparator(filePath[k]))
{
filePath[k] = '/';
}
}
}
if (addPath)
{
result.push_back(filePath);
}
else
{
if (!IsDirectory(filePath))
{
result.push_back(GetFileName(filePath));
}
}
}
}
std::sort(result.begin(), result.end());
}
void RemoveAll(const string& path)
{
if (!Exists(path))
return;
if (IsDirectory(path))
{
std::vector<string> entries;
GetFileNameList2(path, string(), entries, false, true);
for (size_t i = 0; i < entries.size(); i++)
{
const string& e = entries[i];
RemoveAll(e);
}
#ifdef _MSC_VER
bool result = _rmdir(path.c_str()) == 0;
#else
bool result = rmdir(path.c_str()) == 0;
#endif
if (!result)
{
LOG_INFO(stdout, "can't remove directory: %s\n", path.c_str());
}
}
else
{
#ifdef _MSC_VER
bool result = _unlink(path.c_str()) == 0;
#else
bool result = unlink(path.c_str()) == 0;
#endif
if (!result)
{
LOG_INFO(stdout, "can't remove file: %s\n", path.c_str());
}
}
}
void Remove(const string &directory, const string &extension)
{
DIR *dir;
static int numberOfFiles = 0;
if ((dir = opendir(directory.c_str())) != 0)
{
/* find all the files and directories within directory */
try
{
struct dirent *ent;
while ((ent = readdir(dir)) != 0)
{
const char* name = ent->d_name;
if ((name[0] == 0) || (name[0] == '.' && name[1] == 0) || (name[0] == '.' && name[1] == '.' && name[2] == 0))
continue;
string path = JoinPath(directory, name);
if (isDir(path, dir))
{
Remove(path, extension);
}
// �ж���չ��
if (extension.empty() || wildcmp(name, extension.c_str()))
{
RemoveAll(path);
++numberOfFiles;
LOG_INFO(stdout, "%s deleted! number of deleted files:%d\n", path.c_str(), numberOfFiles);
}
}
}
catch (...)
{
closedir(dir);
throw;
}
closedir(dir);
}
else
{
LOG_INFO(stdout, "could not open directory: %s", directory.c_str());
}
// ����RemoveAllɾ��Ŀ¼
RemoveAll(directory);
}
string GetFileName(const string &path)
{
string fileName;
int indexOfPathSeparator = -1;
for (int i = path.size() - 1; i >= 0; --i)
{
if (IsPathSeparator(path[i]))
{
fileName = path.substr(i + 1, path.size() - i - 1);
indexOfPathSeparator = i;
break;
}
}
if (indexOfPathSeparator == -1)
{
fileName = path;
}
return fileName;
}
string GetFileName_NoExtension(const string &path)
{
string fileName=GetFileName(path);
string fileName_NoExtension;
for(int i=fileName.size()-1;i>0;--i)
{
if(fileName[i]=='.')
{
fileName_NoExtension=fileName.substr(0,i);
break;
}
}
return fileName_NoExtension;
}
string GetExtension(const string &path)
{
string fileName;
for (int i = path.size() - 1; i >= 0; --i)
{
if (path[i]=='.')
{
fileName = path.substr(i, path.size() - i);
break;
}
}
return fileName;
}
string GetParentPath(const string &path)
{
string fileName;
for (int i = path.size() - 1; i >= 0; --i)
{
if (IsPathSeparator(path[i]))
{
fileName = path.substr(0, i+1);
break;
}
}
return fileName;
}
static bool CreateDirectory(const string &path)
{
#if defined WIN32 || defined _WIN32 || defined WINCE
#ifdef WINRT
wchar_t wpath[MAX_PATH];
size_t copied = mbstowcs(wpath, path.c_str(), MAX_PATH);
CV_Assert((copied != MAX_PATH) && (copied != (size_t)-1));
int result = CreateDirectoryA(wpath, NULL) ? 0 : -1;
#else
int result = _mkdir(path.c_str());
#endif
#elif defined __linux__ || defined __APPLE__
int result = mkdir(path.c_str(), 0777);
#else
int result = -1;
#endif
if (result == -1)
{
return IsDirectory(path);
}
return true;
}
bool CreateDirectories(const string &directoryPath)
{
string path = directoryPath;
for (;;)
{
char last_char = path.empty() ? 0 : path[path.length() - 1];
if (IsPathSeparator(last_char))
{
path = path.substr(0, path.length() - 1);
continue;
}
break;
}
if (path.empty() || path == "./" || path == ".\\" || path == ".")
return true;
if (IsDirectory(path))
return true;
size_t pos = path.rfind('/');
if (pos == string::npos)
pos = path.rfind('\\');
if (pos != string::npos)
{
string parent_directory = path.substr(0, pos);
if (!parent_directory.empty())
{
if (!CreateDirectories(parent_directory))
return false;
}
}
return CreateDirectory(path);
}
bool CopyFile(const string srcPath, const string dstPath)
{
std::ifstream srcFile(srcPath,ios::binary);
std::ofstream dstFile(dstPath,ios::binary);
if(!srcFile.is_open())
{
LOG_ERROR(stdout,"can not open %s\n",srcPath.c_str());
return false;
}
if(!dstFile.is_open())
{
LOG_ERROR(stdout, "can not open %s\n", dstPath.c_str());
return false;
}
if(srcPath==dstPath)
{
LOG_ERROR(stdout, "src can not be same with dst\n");
return false;
}
char buffer[2048];
unsigned int numberOfBytes=0;
while(srcFile)
{
srcFile.read(buffer,2048);
dstFile.write(buffer,srcFile.gcount());
numberOfBytes+=srcFile.gcount();
}
srcFile.close();
dstFile.close();
return true;
}
bool CopyDirectories(string srcPath, const string dstPath)
{
if(srcPath==dstPath)
{
LOG_ERROR(stdout, "src can not be same with dst\n");
return false;
}
// ȥ������·���ָ���
srcPath = srcPath.substr(0, srcPath.size() - 1);
vector<string> fileNameList;
GetFileNameList2(srcPath, "", fileNameList, true, true);
string parentPathOfSrc=GetParentPath(srcPath);
int length=parentPathOfSrc.size();
// create all directories
for(int i=0;i<fileNameList.size();++i)
{
// create directory
string srcFilePath=fileNameList[i];
string subStr=srcFilePath.substr(length,srcFilePath.size()-length);
string dstFilePath=dstPath+subStr;
string parentPathOfDst=GetParentPath(dstFilePath);
CreateDirectories(parentPathOfDst);
}
// copy file
for(int i=0;i<fileNameList.size();++i)
{
string srcFilePath=fileNameList[i];
if (IsDirectory(srcFilePath))
{
continue;
}
string subStr=srcFilePath.substr(length,srcFilePath.size()-length);
string dstFilePath=dstPath+subStr;
// copy file
CopyFile(srcFilePath,dstFilePath);
// process
double process = (1.0*(i + 1) / fileNameList.size()) * 100;
LOG_INFO(stdout, "%s done! %f% \n", GetFileName(fileNameList[i]).c_str(), process);
}
LOG_INFO(stdout, "all done!(the number of files:%zu)\n", fileNameList.size());
return true;
}
}
#include <Sample.h>
#include <SimpleLog.h>
#include <Filesystem.h>
#include <DetectorYOLOV3.h>
#include <sys/time.h>
#include <Decoder.h>
#include <Queuethread.h>
using namespace cv;
using namespace std;
using namespace cv::dnn;
using namespace migraphx;
using namespace migraphxSamples;
static void DecoderThreadFunc(Queue* queue)
{
int ret, end = 0;
int frame_cnt = 0;
Queue* que = queue;
Decoder decoder(que->device);
InitializationParameterOfDecoder initParamOfDecoderYOLOV3;
#ifndef DMA
initParamOfDecoderYOLOV3.src_filename = "../Resource/Images/Mean.mp4";
if (que->device == _HW)
{
initParamOfDecoderYOLOV3.str_devid[0] = {0};
initParamOfDecoderYOLOV3.xcoder_params = "out=hw";
initParamOfDecoderYOLOV3.dec_name = "h264_ni_quadra_dec";
initParamOfDecoderYOLOV3.filters_descr = "ni_quadra_scale=416:416:format=bgrp,hwdownload,format=bgrp";
} else if (que->device == _HW_DMA) {
LOG_ERROR(stdout, "Error program param or cmake param, not USE_P2P can`t set '--dma'!\n");
que->finish();
return;
}
#else
if (que->device == _HW_DMA) {
initParamOfDecoderYOLOV3.str_devid[0] = {0};
initParamOfDecoderYOLOV3.xcoder_params = "out=hw";
initParamOfDecoderYOLOV3.dec_name = "h264_ni_quadra_dec";
initParamOfDecoderYOLOV3.filters_descr = "ni_quadra_scale=416:416:format=rgba:is_p2p=1";
initParamOfDecoderYOLOV3.src_filename = "../Resource/Images/cr7_1920x1080.h264";
} else {
LOG_ERROR(stdout, "Error program param or cmake param, USE_P2P need set '--dma'!\n");
que->finish();
return;
}
#endif
ret = decoder.DecoderInit(initParamOfDecoderYOLOV3);
if (ret == -1)
{
que->finish();
return;
}
while(true)
{
if (av_read_frame(decoder.fmt_ctx, decoder.pkt) < 0)
{
if(end == 2)
{
que->DecodeEnd = true;
break;
}
end = 1;
}
if (decoder.pkt->stream_index == decoder.video_stream_idx) {
if(!end) {
ret = avcodec_send_packet(decoder.video_dec_ctx, decoder.pkt);
} else {
ret = avcodec_send_packet(decoder.video_dec_ctx, NULL);
}
if (ret < 0 && ret != AVERROR_EOF) {
fprintf(stderr, "Error submitting a packet for decoding\n");
que->DecodeEnd = true;
break;
}
while (ret >= 0 || end == 1)
{
ret = avcodec_receive_frame(decoder.video_dec_ctx, decoder.frame);
if (ret == AVERROR(EAGAIN)) {
break;
} else if (ret == AVERROR_EOF ) {
end = 2;
break;
} else if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Error while receiving a frame from the decoder\n");
que->finish();
return;
}
decoder.frame->pts = decoder.frame->best_effort_timestamp;
frame_cnt++;
if (que->device == CPU)
{
cv::Mat srcImage = cv::Mat::zeros(decoder.frame->height*3/2, decoder.frame->width, CV_8UC1);
memcpy(srcImage.data, (unsigned char*)decoder.frame->data[0], decoder.frame->width * decoder.frame->height);
memcpy(srcImage.data + decoder.frame->width * decoder.frame->height, (unsigned char*)decoder.frame->data[1], decoder.frame->width * decoder.frame->height/4);
memcpy(srcImage.data + decoder.frame->width * decoder.frame->height*5/4, (unsigned char*)decoder.frame->data[2], decoder.frame->width * decoder.frame->height/4);
cvtColor(srcImage, srcImage, COLOR_YUV420p2RGB);
que->enQueue(srcImage);
}
if (que->device == _HW || que->device == _HW_DMA)
{
if (av_buffersrc_add_frame_flags(decoder.buffersrc_ctx, decoder.frame, AV_BUFFERSRC_FLAG_KEEP_REF) < 0) {
av_log(NULL, AV_LOG_ERROR, "Error while feeding the filtergraph\n");
break;
}
while (1)
{
ret = av_buffersink_get_frame(decoder.buffersink_ctx, decoder.filt_frame);
if (ret == AVERROR(EAGAIN))
{
break;
}
else if(ret == AVERROR_EOF)
{
end = 2;
break;
}
if (ret < 0)
{
que->finish();
return;
}
#ifndef DMA
if (que->device == _HW)
{
cv::Mat srcImage;
switch (decoder.filt_frame->format)
{
case AV_PIX_FMT_BGRP:
{
srcImage = cv::Mat::zeros(decoder.filt_frame->height, decoder.filt_frame->width, CV_8UC3);
cv::Mat mat_r = cv::Mat(decoder.filt_frame->height, decoder.filt_frame->width, CV_8UC1, (unsigned char*)decoder.filt_frame->data[0]);
cv::Mat mat_g = cv::Mat(decoder.filt_frame->height, decoder.filt_frame->width, CV_8UC1, (unsigned char*)decoder.filt_frame->data[1]);
cv::Mat mat_b = cv::Mat(decoder.filt_frame->height, decoder.filt_frame->width, CV_8UC1, (unsigned char*)decoder.filt_frame->data[2]);
cv::Mat Channels[3]{mat_r, mat_g, mat_b};
cv::merge(Channels, 3, srcImage);
break;
}
case AV_PIX_FMT_YUV420P:
{
srcImage = cv::Mat::zeros(decoder.filt_frame->height*3/2, decoder.filt_frame->width, CV_8UC1);
memcpy(srcImage.data, (unsigned char*)decoder.filt_frame->data[0], decoder.filt_frame->width * decoder.filt_frame->height);
memcpy(srcImage.data + decoder.filt_frame->width * decoder.filt_frame->height, (unsigned char*)decoder.filt_frame->data[1], decoder.filt_frame->width * decoder.filt_frame->height/4);
memcpy(srcImage.data + decoder.filt_frame->width * decoder.filt_frame->height*5/4, (unsigned char*)decoder.filt_frame->data[2], decoder.filt_frame->width * decoder.filt_frame->height/4);
cvtColor(srcImage, srcImage, COLOR_YUV420p2RGB);
break;
}
case AV_PIX_FMT_RGBA:
{
srcImage = cv::Mat::zeros(decoder.filt_frame->height, decoder.filt_frame->width, CV_8UC4);
memcpy(srcImage.data, (unsigned char*)decoder.filt_frame->data[0], decoder.filt_frame->width * decoder.filt_frame->height * 4);
cvtColor(srcImage, srcImage, COLOR_BGRA2RGB);
break;
}
default:
break;
}
que->enQueue(srcImage);
av_frame_unref(decoder.filt_frame);
}
#else
if (que->device == _HW_DMA)
{
DCU_Frame dcu_frame;
AVHWFramesContext *hwfc = (AVHWFramesContext *)decoder.filt_frame->hw_frames_ctx->data;
switch (hwfc->sw_format)
{
case AV_PIX_FMT_BGRP:
{
dcu_frame.format = AV_PIX_FMT_BGRP;
dcu_frame.data_len = decoder.filt_frame->width * decoder.filt_frame->height * 3;
dcu_frame.srcImage = cv::Mat::zeros(decoder.filt_frame->height, decoder.filt_frame->width, CV_8UC3);
break;
}
case AV_PIX_FMT_YUV420P:
{
dcu_frame.format = AV_PIX_FMT_YUV420P;
dcu_frame.data_len = decoder.filt_frame->width * decoder.filt_frame->height * 3 / 2;
dcu_frame.srcImage = cv::Mat::zeros(decoder.filt_frame->height*3/2, decoder.filt_frame->width, CV_8UC1);
break;
}
case AV_PIX_FMT_RGBA:
{
dcu_frame.format = AV_PIX_FMT_RGBA;
dcu_frame.data_len = decoder.filt_frame->width * decoder.filt_frame->height * 4;
dcu_frame.srcImage = cv::Mat::zeros(decoder.filt_frame->height, decoder.filt_frame->width, CV_8UC4);
break;
}
default:
break;
}
dcu_frame.width = decoder.filt_frame->width;
dcu_frame.height = decoder.filt_frame->height;
hipMalloc((void**)&(dcu_frame.dcu_data), dcu_frame.data_len * sizeof(unsigned char));
ret = decoder.retrieve_filter_frame(dcu_frame, decoder.filt_frame);
if (ret)
av_log(NULL, AV_LOG_ERROR, "Error while retrieve_filter_frame with p2p.\n");
if(dcu_frame.format == AV_PIX_FMT_BGRP)
{
cv::Mat mat_b = cv::Mat(decoder.filt_frame->height, decoder.filt_frame->width, CV_8UC1, (unsigned char*)dcu_frame.srcImage.data);
cv::Mat mat_g = cv::Mat(decoder.filt_frame->height, decoder.filt_frame->width, CV_8UC1, (unsigned char*)(dcu_frame.srcImage.data + decoder.filt_frame->height * decoder.filt_frame->width));
cv::Mat mat_r = cv::Mat(decoder.filt_frame->height, decoder.filt_frame->width, CV_8UC1, (unsigned char*)(dcu_frame.srcImage.data + decoder.filt_frame->height * decoder.filt_frame->width * 2));
cv::Mat Channels[3]{mat_r, mat_g, mat_b};
cv::merge(Channels, 3, dcu_frame.srcImage);
}
if(dcu_frame.format == AV_PIX_FMT_YUV420P)
cvtColor(dcu_frame.srcImage, dcu_frame.srcImage, COLOR_YUV420p2RGB);
if(dcu_frame.format == AV_PIX_FMT_RGBA)
cvtColor(dcu_frame.srcImage, dcu_frame.srcImage, COLOR_BGRA2RGB);
queue->enQueue(dcu_frame);
av_frame_unref(decoder.filt_frame);
}
#endif
}
}
av_frame_unref(decoder.frame);
}
}
av_packet_unref(decoder.pkt);
}
LOG_INFO(stdout, "Decoder: ####### frame count: %d\n", frame_cnt);
que->finish();
}
static void DetectorThreadFunc(Queue* que)
{
Queue* queue = que;
// DetectorYOLOV3 Init
DetectorYOLOV3 detector;
InitializationParameterOfDetector initParamOfDetectorYOLOV3;
initParamOfDetectorYOLOV3.parentPath = "";
initParamOfDetectorYOLOV3.configFilePath = CONFIG_FILE;
initParamOfDetectorYOLOV3.logName = "";
ErrorCode errorCode=detector.Initialize(initParamOfDetectorYOLOV3);
if(errorCode!=SUCCESS)
{
LOG_ERROR(stdout, "fail to initialize detector!\n");
exit(-1);
}
LOG_INFO(stdout, "succeed to initialize detector\n");
int frame_cnt = 0;
double dprep_time = 0, deval_time = 0, dpostp_time = 0;
double start_time = getTickCount(), dpropTime = 0, dpostpTime = 0;
while (!queue->DecodeEnd) {
#ifdef DMA
DCU_Frame dcu_frame;
queue->deQueue(&dcu_frame);
if(dcu_frame.srcImage.empty()) {
continue;
}
#else
cv::Mat InferImage;
queue->deQueue(&InferImage);
if (InferImage.empty()) {
continue;
}
#endif
// detect
std::vector<ResultOfDetection> predictions;
double time1 = getTickCount();
#ifdef DMA
detector.Detect(dcu_frame, predictions);
#else
detector.Detect(InferImage, predictions, &dprep_time, &deval_time, &dpostp_time);
#endif
double time2 = getTickCount();
double elapsedTime = (time2 - time1)*1000 / getTickFrequency();
LOG_INFO(stdout, "inference time:%f ms\n", elapsedTime);
frame_cnt++;
#ifdef DMA
hipFree(dcu_frame.dcu_data);
#endif
// process result
double time0 = getTickCount();
LOG_INFO(stdout,"////////////////Detection Results////////////////\n");
for( int i = 0; i < predictions.size(); ++i)
{
ResultOfDetection result = predictions[i];
#ifdef DMA
cv::rectangle(dcu_frame.srcImage, result.boundingBox, Scalar(0,255,255),2);
cv::putText(dcu_frame.srcImage, result.className, cv::Point(result.boundingBox.x, result.boundingBox.y-20), cv::FONT_HERSHEY_PLAIN, 2.0, Scalar(0, 0, 255), 2);
#else
cv::rectangle(InferImage, result.boundingBox, Scalar(0,255,255),2);
cv::putText(InferImage, result.className, cv::Point(result.boundingBox.x, result.boundingBox.y-20), cv::FONT_HERSHEY_PLAIN, 2.0, Scalar(0, 0, 255), 2);
#endif
LOG_INFO(stdout,"box:%d %d %d %d,label:%d,confidence:%f\n",result.boundingBox.x,
result.boundingBox.y,result.boundingBox.width,result.boundingBox.height,result.classID,result.confidence);
}
double time5 = getTickCount();
dpostpTime += (time5 - time0) * 1000 / getTickFrequency();
// X11 display can`t support in docker.
/*namedWindow("video", WINDOW_NORMAL | WINDOW_KEEPRATIO);
#ifdef DMA
imshow("video", dcu_frame.srcImage);
#else
imshow("video", InferImage);
#endif
if (waitKey(10) == 'q') {
break;
}*/
}
#ifdef DMA
hipFree(detector.preprocess_Image);
#endif
double end_time = getTickCount();
fprintf(stdout, "Finish ####### frame_cnt: %d, Inference fps: %.2f, all time: %.2f ms, dpropTime: %.2f ms, dprep_time: %.2f ms, deval_time: %.2f ms, dpostp_time: %.2f ms, dpostpTime: %.2f ms\n", frame_cnt, float(frame_cnt/((end_time - start_time)/getTickFrequency())), (end_time - start_time)/getTickFrequency()*1000, dpropTime, dprep_time, deval_time, dpostp_time, dpostpTime);
queue->finish();
}
void Sample_DetectorYOLOV3(int device)
{
Queue* queue = new Queue(1);
queue->device = device;
std::thread ThreadDecoder(DecoderThreadFunc, queue);
std::thread ThreadDetector(DetectorThreadFunc, queue);
ThreadDecoder.join();
ThreadDetector.join();
delete queue;
queue = NULL;
return;
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment