Commit a08a6cb4 authored by liucong's avatar liucong
Browse files

精简代码

parent 6878b65f
...@@ -12,7 +12,7 @@ set(CMAKE_BUILD_TYPE release) ...@@ -12,7 +12,7 @@ set(CMAKE_BUILD_TYPE release)
# 添加头文件路径 # 添加头文件路径
set(INCLUDE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/Src/ set(INCLUDE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/Src/
${CMAKE_CURRENT_SOURCE_DIR}/Src/Utility/ ${CMAKE_CURRENT_SOURCE_DIR}/Src/Utility/
${CMAKE_CURRENT_SOURCE_DIR}/Src/PaddleOCR/VLPR/ ${CMAKE_CURRENT_SOURCE_DIR}/Src/PaddleOCR/
$ENV{DTKROOT}/include/ $ENV{DTKROOT}/include/
${CMAKE_CURRENT_SOURCE_DIR}/depend/include/) ${CMAKE_CURRENT_SOURCE_DIR}/depend/include/)
include_directories(${INCLUDE_PATH}) include_directories(${INCLUDE_PATH})
...@@ -37,12 +37,11 @@ link_libraries(${LIBRARY}) ...@@ -37,12 +37,11 @@ link_libraries(${LIBRARY})
# 添加源文件 # 添加源文件
set(SOURCE_FILES ${CMAKE_CURRENT_SOURCE_DIR}/Src/main.cpp set(SOURCE_FILES ${CMAKE_CURRENT_SOURCE_DIR}/Src/main.cpp
# ${CMAKE_CURRENT_SOURCE_DIR}/Src/Sample.cpp ${CMAKE_CURRENT_SOURCE_DIR}/Src/PaddleOCR/clipper.cpp
${CMAKE_CURRENT_SOURCE_DIR}/Src/PaddleOCR/VLPR/clipper.cpp ${CMAKE_CURRENT_SOURCE_DIR}/Src/PaddleOCR/OcrDB.cpp
${CMAKE_CURRENT_SOURCE_DIR}/Src/PaddleOCR/VLPR/OcrDB.cpp ${CMAKE_CURRENT_SOURCE_DIR}/Src/PaddleOCR/utility.cpp
${CMAKE_CURRENT_SOURCE_DIR}/Src/PaddleOCR/VLPR/utility.cpp ${CMAKE_CURRENT_SOURCE_DIR}/Src/PaddleOCR/OcrSVTR.cpp
${CMAKE_CURRENT_SOURCE_DIR}/Src/PaddleOCR/VLPR/OcrSVTR.cpp ${CMAKE_CURRENT_SOURCE_DIR}/Src/PaddleOCR/VLPR.cpp
${CMAKE_CURRENT_SOURCE_DIR}/Src/PaddleOCR/VLPR/VLPR.cpp
${CMAKE_CURRENT_SOURCE_DIR}/Src/Utility/CommonUtility.cpp ${CMAKE_CURRENT_SOURCE_DIR}/Src/Utility/CommonUtility.cpp
${CMAKE_CURRENT_SOURCE_DIR}/Src/Utility/Filesystem.cpp) ${CMAKE_CURRENT_SOURCE_DIR}/Src/Utility/Filesystem.cpp)
......
# PaddleOCR车牌识别
车牌识别(Vehicle License Plate Recognition,VLPR) 是计算机视频图像识别技术在车辆牌照识别中的一种应用。车牌识别技术要求能够将运动中的汽车牌照从复杂背景中提取并识别出来,在高速公路车辆管理,停车场管理和城市交通中得到广泛应用。本份文档主要介绍如何基于百度开源PaddleOCR车牌识别模型构建MIGraphX C++推理示例。
## 模型简介
PaddleOCR车牌识别包括本文检测和文本识别两部分内容,其中使用DBnet作为文本检测模型,SVTR作为文本识别模型。DBnet是一种基于分割的文本检测方法,相比传统分割方法需要设定固定阈值,该模型将二值化操作插入到分割网络中进行联合优化,通过网络学习可以自适应的预测图像中每一个像素点的阈值,能够在像素水平很好的检测自然场景下不同形状的文字。SVTR是一种端到端的文本识别模型,通过单个视觉模型就可以一站式解决特征提取和文本转录两个任务,同时也保证了更快的推理速度。百度PaddleOCR开源项目提供了车牌识别的预训练模型,本示例使用PaddleOCR提供的蓝绿黄牌识别模型进行推理,车牌识别过程:输入->图像预处理->文字检测->文本识别->输出。
PaddleOCR车牌识别预训练模型下载链接:https://pan.baidu.com/s/1aeIZpgOSnh52RlztGAHctw , 提取码:hs6u
## 模型转换
由于MIGraphX只支持onnx模型作为输入,所以本节介绍如何将PaddleOCR模型转换为onnx模型。
1. 下载PaddleOCR代码
```
git clone -b release/2.6 https://github.com/PaddlePaddle/PaddleOCR.git
cd PaddleOCR && python3.7 setup.py install
```
2. 安装DTK版PaddlePaddle,下载地址:https://cancon.hpccube.com:65024/4/main/paddle
3. 安装Paddle2ONNX
```
python3.7 -m pip install paddle2onnx
```
4. 将下载的车牌识别模型压缩文件放入到inference文件夹下进行解压
```
mkdir inference
tar -xvzf CCPD.tar
```
5. 模型转换
```
paddle2onnx --model_dir ./inference/CCPD/det/infer \
--model_filename inference.pdmodel \
--params_filename inference.pdiparams \
--save_file ./inference/CCPD/det_onnx/model.onnx \
--opset_version 10 \
--input_shape_dict="{'x':[-1,3,-1,-1]}" \
--enable_onnx_checker True
paddle2onnx --model_dir ./inference/CCPD/det/infer \
--model_filename inference.pdmodel \
--params_filename inference.pdiparams \
--save_file ./inference/CCPD/rec_onnx/model.onnx \
--opset_version 10 \
--input_shape_dict="{'x':[-1,3,-1,-1]}" \
--enable_onnx_checker True
```
执行上述操作之后,onnx模型被保存在`./inference/CCPD/det_onnx/`, `./inference/CCPD/rec_onnx/`
## 预处理
### DBnet预处理
待检测的车牌图像输入到DBnet模型前,需要做如下预处理:
- 数据标准化,输入图像每个像素值执行乘以缩放比例scale,然后减去均值mean,最后除以标准差std。
- 数据排布转换为NCHW
- resize图像尺寸HW维度为32的倍数
DBnet模型预处理过程定义在DB::Infer()函数中:
```
ErrorCode DB::Infer(const cv::Mat &img, std::vector<cv::Mat> &imgList)
{
...
cv::Mat srcImage;
cv::Mat resizeImg;
img.copyTo(srcImage);
int w = srcImage.cols;
int h = srcImage.rows;
float ratio = 1.f;
int maxWH = std::max(h, w);
if (maxWH > dbParameter.LimitSideLen)
{
if (h > w)
{
ratio = float(dbParameter.LimitSideLen) / float(h);
}
else
{
ratio = float(dbParameter.LimitSideLen) / float(w);
}
}
int resizeH = int(float(h) * ratio);
int resizeW = int(float(w) * ratio);
resizeH = std::max(int(round(float(resizeH) / 32) * 32), 32);
resizeW = std::max(int(round(float(resizeW) / 32) * 32), 32);
cv::resize(srcImage, resizeImg, cv::Size(resizeW, resizeH));
float ratioH = float(resizeH) / float(h);
float ratioW = float(resizeW) / float(w);
resizeImg.convertTo(resizeImg, CV_32FC3, 1.0/255.0);
std::vector<cv::Mat> bgrChannels(3);
cv::split(resizeImg, bgrChannels);
std::vector<float> mean = {0.485f, 0.456f, 0.406f};
std::vector<float> scale = {1 / 0.229f, 1 / 0.224f, 1 / 0.225f};
for (auto i = 0; i < bgrChannels.size(); i++)
{
bgrChannels[i].convertTo(bgrChannels[i], CV_32FC1, 1.0 * scale[i],
(0.0 - mean[i]) * scale[i]);
}
cv::merge(bgrChannels, resizeImg);
int rh = resizeImg.rows;
int rw = resizeImg.cols;
cv::Mat inputBlob;
inputBlob = cv::dnn::blobFromImage(resizeImg);
...
```
## SVTR预处理
SVTR模型的输入图像是DB模型检测输出裁剪的车牌区域,将裁剪图像输入到识别模型前,需要做如下预处理:
- 数据排布转换为NCHW
- resize输入图像尺寸为[1,3,48,imgW]
- 数据标准化,输入图像每个像素值执行乘以缩放比例scale,然后减去均值mean,最后除以标准差std。
SVTR模型输入预处理过程定义在SVTR::Infer()函数中:
```
ErrorCode SVTR::Infer(cv::Mat &img, std::string &resultsChar, float &resultsdScore, float &maxWHRatio)
{
...
cv::Mat srcImage;
cv::Mat resizeImg;
img.copyTo(srcImage);
float ratio = 1.f;
int imgC = 3, imgH = 48;
int resizeW;
int imgW = int((48 * maxWHRatio));
ratio = float(srcImage.cols) / float(srcImage.rows);
if (ceil(imgH * ratio) > imgW)
{
resizeW = imgW;
}
else
{
resizeW = int(ceil(imgH * ratio));
}
cv::resize(srcImage, resizeImg, cv::Size(resizeW, imgH));
cv::copyMakeBorder(resizeImg, resizeImg, 0, 0, 0,
int(imgW - resizeImg.cols), cv::BORDER_CONSTANT,
{127, 127, 127});
resizeImg.convertTo(resizeImg, CV_32FC3, 1.0/255.0);
std::vector<cv::Mat> bgrChannels(3);
cv::split(resizeImg, bgrChannels);
std::vector<float> mean = {0.485f, 0.456f, 0.406f};
std::vector<float> scale = {1 / 0.229f, 1 / 0.224f, 1 / 0.225f};
for (auto i = 0; i < bgrChannels.size(); i++)
{
bgrChannels[i].convertTo(bgrChannels[i], CV_32FC1, 1.0 * scale[i],
(0.0 - mean[i]) * scale[i]);
}
cv::merge(bgrChannels, resizeImg);
cv::Mat inputBlob = cv::dnn::blobFromImage(resizeImg);
...
```
## 推理
### DBnet推理
DBnet模型采用动态shape推理,每次输入图像的尺寸{1,3,rh,rw},获取推理结果后,将其转化为一维vector。
```
ErrorCode DB::Infer(const cv::Mat &img, std::vector<cv::Mat> &imgList)
{
...
std::vector<std::size_t> inputShapeOfInfer={1,3,rh,rw};
// 输入数据
std::unordered_map<std::string, migraphx::shape> inputData;
inputData[inputName]= migraphx::argument{migraphx::shape(inputShape.type(),inputShapeOfInfer), (float*)inputBlob.data};
// 推理
std::vector<migraphx::argument> inferenceResults = net.eval(inputData);
// 获取推理结果
migraphx::argument result = inferenceResults[0];
// 转换为vector
migraphx::shape outputShape = result.get_shape();
int shape[]={outputShape.lens()[0],outputShape.lens()[1],outputShape.lens()[2],outputShape.lens()[3]};
int n2 = outputShape.lens()[2];
int n3 = outputShape.lens()[3];
int n = n2 * n3;
std::vector<float> out(n);
memcpy(out.data(),result.data(),sizeof(float)*outputShape.elements());
out.resize(n);
...
}
```
为了得到裁剪的车牌区域图像,需要进行一系列后处理。首先对推理结果进行二值化处理获得二值化图像bitMap,然后从二值图像中获取车牌区域boxes信息,boxes信息中主要存储车牌区域矩形框的坐标,该过程定义在DBPostProcessor类中,具体代码参考BoxesFromBitmap()函数。最后对boxes信息进行过滤,过滤过程定义FilterTagDetRes()函数中,保留符合条件的boxes信息并根据坐标信息对原图进行裁剪。
```
ErrorCode DB::Infer(const cv::Mat &img, std::vector<cv::Mat> &imgList)
{
...
std::vector<float> pred(n, 0.0);
std::vector<unsigned char> cbuf(n, ' ');
for (int i = 0; i < n; i++)
{
pred[i] = (float)(out[i]);
cbuf[i] = (unsigned char)((out[i]) * 255);
}
cv::Mat cbufMap(n2, n3, CV_8UC1, (unsigned char *)cbuf.data());
cv::Mat predMap(n2, n3, CV_32F, (float *)pred.data());
const double threshold = dbParameter.BinaryThreshold * 255;
const double maxvalue = 255;
cv::Mat bitMap;
cv::threshold(cbufMap, bitMap, threshold, maxvalue, cv::THRESH_BINARY);
cv::imwrite("./det_box.jpg", bitMap);
std::vector<std::vector<std::vector<int>>> boxes;
DBPostProcessor postProcessor;
boxes = postProcessor.BoxesFromBitmap(predMap, bitMap, dbParameter.BoxThreshold, dbParameter.UnclipRatio, dbParameter.ScoreMode);
boxes = postProcessor.FilterTagDetRes(boxes, ratioH, ratioW, srcImage);
std::vector<migraphxSamples::OCRPredictResult> ocrResults;
for (int i = 0; i < boxes.size(); i++)
{
OCRPredictResult res;
res.box = boxes[i];
ocrResults.push_back(res);
}
Utility::sorted_boxes(ocrResults);
for (int j = 0; j < ocrResults.size(); j++)
{
cv::Mat cropImg;
cropImg = Utility::GetRotateCropImage(img, ocrResults[j].box);
imgList.push_back(cropImg);
}
}
```
### SVTR推理
根据SVTR输入图像预处理的结果可以看出,识别模型的输入不是固定尺寸,因此SVTR模型和DBnet模型一样需要执行动态推理,并将结果保存在一维vector中,为了得到最终的车牌识别结果,需要对out数据进行后处理,主要过程有两步:
- 第一步首先获取预测类别最大概率对应的字符索引argmaxIdx和对应最大概率maxValue
- 第二步根据字符索引信息获取对应位置的字符,然后去除重复和特殊字符#
后处理具体代码如下:
```
ErrorCode SVTR::Infer(cv::Mat &img, std::string &resultsChar, float &resultsdScore, float &maxWHRatio)
{
...
int argmaxIdx;
int lastIndex = 0;
float score = 0.f;
int count = 0;
float maxValue = 0.0f;
for (int j = 0; j < n2; j++)
{
argmaxIdx = int(std::distance(&out[(j) * n3],
std::max_element(&out[(j) * n3], &out[(j + 1) * n3])));
maxValue = float(*std::max_element(&out[(j) * n3],
&out[(j + 1) * n3]));
if (argmaxIdx > 0 && (!(n > 0 && argmaxIdx == lastIndex)))
{
score += maxValue;
count += 1;
resultsChar += charactorDict[argmaxIdx];
}
lastIndex = argmaxIdx;
}
resultsdScore = score / count;
return SUCCESS;
}
```
# PaddleOCR车牌识别
车牌识别(Vehicle License Plate Recognition,VLPR) 是计算机视频图像识别技术在车辆牌照识别中的一种应用。车牌识别技术要求能够将运动中的汽车牌照从复杂背景中提取并识别出来,在高速公路车辆管理,停车场管理和城市交通中得到广泛应用。本份文档主要介绍如何基于百度开源PaddleOCR车牌识别模型构建MIGraphX python推理示例。
## 模型简介
PaddleOCR车牌识别包括本文检测和文本识别两部分内容,其中使用DBnet作为文本检测模型,SVTR作为文本识别模型。DBnet是一种基于分割的文本检测方法,相比传统分割方法需要设定固定阈值,该模型将二值化操作插入到分割网络中进行联合优化,通过网络学习可以自适应的预测图像中每一个像素点的阈值,能够在像素水平很好的检测自然场景下不同形状的文字。SVTR是一种端到端的文本识别模型,通过单个视觉模型就可以一站式解决特征提取和文本转录两个任务,同时也保证了更快的推理速度。百度PaddleOCR开源项目提供了车牌识别的预训练模型,本示例使用PaddleOCR提供的蓝绿黄牌识别模型进行推理,车牌识别过程:输入->图像预处理->文字检测->文本识别->输出。
PaddleOCR车牌识别预训练模型下载链接:https://pan.baidu.com/s/1aeIZpgOSnh52RlztGAHctw , 提取码:hs6u
## 模型转换
由于MIGraphX只支持onnx模型作为输入,所以本节介绍如何将PaddleOCR模型转换为onnx模型。
1. 下载PaddleOCR代码
```
git clone -b release/2.6 https://github.com/PaddlePaddle/PaddleOCR.git
cd PaddleOCR && python3.7 setup.py install
```
2. 安装DTK版PaddlePaddle,下载地址:https://cancon.hpccube.com:65024/4/main/paddle
3. 安装Paddle2ONNX
```
python3.7 -m pip install paddle2onnx
```
4. 将下载的车牌识别模型压缩文件放入到inference文件夹下进行解压
```
mkdir inference
tar -xvzf CCPD.tar
```
5. 模型转换
```
paddle2onnx --model_dir ./inference/CCPD/det/infer \
--model_filename inference.pdmodel \
--params_filename inference.pdiparams \
--save_file ./inference/CCPD/det_onnx/model.onnx \
--opset_version 10 \
--input_shape_dict="{'x':[-1,3,-1,-1]}" \
--enable_onnx_checker True
paddle2onnx --model_dir ./inference/CCPD/det/infer \
--model_filename inference.pdmodel \
--params_filename inference.pdiparams \
--save_file ./inference/CCPD/rec_onnx/model.onnx \
--opset_version 10 \
--input_shape_dict="{'x':[-1,3,-1,-1]}" \
--enable_onnx_checker True
```
执行上述操作之后,onnx模型被保存在`./inference/CCPD/det_onnx/`, `./inference/CCPD/rec_onnx/`
注:模型转换时的环境配置与程序运行所需环境一致,因此运行示例时只需安装MIGraphX。
## 模型初始化
模型初始化目的主要时对模型解析、获取模型的输入属性以及编译。由于本示例中采用动态shape推理进行车牌识别,因此解析模型时需要指定推理的最大shape。DB检测模型最大输入shape设为[1,3,2496,2496],SVTR识别模型最大输入shape设为[1,3,48,320]。
```
class det_rec_functions(object):
...
# 解析检测模型
detInput = {"x":[1,3,2496,2496]}
self.modelDet = migraphx.parse_onnx(self.det_file, map_input_dims=detInput)
self.inputName = self.modelDet.get_parameter_names()[0]
self.inputShape = self.modelDet.get_parameter_shapes()[self.inputName].lens()
print("DB inputName:{0} \nDB inputShape:{1}".format(self.inputName, self.inputShape))
# 模型编译
self.modelDet.compile(t=migraphx.get_target("gpu"), device_id=0) # device_id: 设置GPU设备,默认为0号
print("Success to compile DB")
# 解析识别模型
recInput = {"x":[1,3,48,320]}
self.modelRec = migraphx.parse_onnx(self.rec_file, map_input_dims=recInput)
self.inputName = self.modelRec.get_parameter_names()[0]
self.inputShape = self.modelRec.get_parameter_shapes()[self.inputName].lens()
print("SVTR inputName:{0} \nSVTR inputShape:{1}".format(self.inputName, self.inputShape))
# 模型编译
self.modelRec.compile(t=migraphx.get_target("gpu"), device_id=0) # device_id: 设置GPU设备,默认为0号
print("Success to compile SVTR")
...
```
## 预处理
### DBnet预处理
待检测的车牌图像输入到DBnet模型前,需要做如下预处理:
- 数据标准化,输入图像每个像素值执行乘以缩放比例scale,然后减去均值mean,最后除以标准差std。
- 数据排布转换为NCHW
- resize图像尺寸HW维度为32的倍数
检测图像预处理上述操作分别定义在NormalizeImage、ToCHWImage、DetResizeForTest类中,输入图像尺寸resize相关代码如下:
```
class DetResizeForTest(object):
...
def resize_image_type0(self, img):
"""
resize image to a size multiple of 32 which is required by the network
args:
img(array): array with shape [h, w, c]
return(tuple):
img, (ratio_h, ratio_w)
"""
limit_side_len = self.limit_side_len
h, w, _ = img.shape
# limit the max side
if max(h, w) > limit_side_len:
if h > w:
ratio = float(limit_side_len) / h
else:
ratio = float(limit_side_len) / w
else:
ratio = 1.
resize_h = int(h * ratio)
resize_w = int(w * ratio)
resize_h = int(round(resize_h / 32) * 32)
resize_w = int(round(resize_w / 32) * 32)
try:
if int(resize_w) <= 0 or int(resize_h) <= 0:
return None, (None, None)
img = cv2.resize(img, (int(resize_w), int(resize_h)))
except:
print(img.shape, resize_w, resize_h)
sys.exit(0)
ratio_h = resize_h / float(h)
ratio_w = resize_w / float(w)
return img, [ratio_h, ratio_w]
```
其中limit_side_len参数主要用来限制最大输入图像的尺寸。
### SVTR预处理
SVTR模型的输入图像是DB模型检测输出裁剪的车牌区域,将裁剪图像输入到识别模型前,需要做如下预处理:
- 像素值归一化到[-1, 1]
- 数据排布转换为NCHW
- resize输入图像尺寸为[1,3,48,imgW]
其中resize输入图像的W维度尺寸大小取决于 max_wh_ratio参数设置,同时W维度为SVTR模型推理的动态shape维度。
```
class det_rec_functions(object):
...
def resize_norm_img(self, img, max_wh_ratio):
imgC, imgH, imgW = [int(v) for v in "3, 48, 320".split(",")]
assert imgC == img.shape[2]
imgW = int((48 * max_wh_ratio))
h, w = img.shape[:2]
ratio = w / float(h)
if math.ceil(imgH * ratio) > imgW:
resized_w = imgW
else:
resized_w = int(math.ceil(imgH * ratio))
resized_image = cv2.resize(img, (resized_w, imgH))
resized_image = resized_image.astype('float32')
resized_image = resized_image.transpose((2, 0, 1)) / 255
resized_image -= 0.5
resized_image /= 0.5
padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32)
padding_im[:, :, 0:resized_w] = resized_image
return padding_im
...
```
## 推理
### DBnet推理
获取MIGraphX的推理结果之后,首先需要进行如下后处理才能得到文本框boxes:
- 第一步对概率图进行二值化处理,利用DBPostProcess类中的二值化阈值thresh=0.3生成二值化图bitmap,利用cv2.findContours()函数从二值化图中寻找车牌所在区域的轮廓,其中max_candidates阈值用来限制轮廓点的数量;
- 第二步使用get_mini_boxes()函数处理获取车牌轮廓的最小外接矩形框的坐标points、矩形框的最小边长sside,通过阈值min_size筛选上一步中的矩形框,滤掉sside小于min_size的矩形框;
- 第三步保留下来的矩形框使用box_score_fast()函数进行计算预测概率score,进一步使用box_thresh阈值过滤scores小于该阈值的矩形框;
- 第四步对保留下来的矩形框进行反向shrink操作,重复进行第二步内容,并将坐标转换为原图坐标,经过上述处理获取了输入图像中车牌区域的坐标集和boxes。
具体代码如下:
```
# 检测结果后处理
class DBPostProcess(object):
...
def boxes_from_bitmap(self, pred, _bitmap, dest_width, dest_height):
'''
_bitmap: single map with shape (1, H, W),
whose values are binarized as {0, 1}
'''
bitmap = _bitmap
height, width = bitmap.shape
outs = cv2.findContours((bitmap * 255).astype(np.uint8), cv2.RETR_LIST,
cv2.CHAIN_APPROX_SIMPLE)
if len(outs) == 3:
img, contours, _ = outs[0], outs[1], outs[2]
elif len(outs) == 2:
contours, _ = outs[0], outs[1]
num_contours = min(len(contours), self.max_candidates)
boxes = []
scores = []
for index in range(num_contours):
contour = contours[index]
points, sside = self.get_mini_boxes(contour)
if sside < self.min_size:
continue
points = np.array(points)
score = self.box_score_fast(pred, points.reshape(-1, 2))
if self.box_thresh > score:
continue
box = self.unclip(points).reshape(-1, 1, 2)
box, sside = self.get_mini_boxes(box)
if sside < self.min_size + 2:
continue
box = np.array(box)
box[:, 0] = np.clip(
np.round(box[:, 0] / width * dest_width), 0, dest_width)
box[:, 1] = np.clip(
np.round(box[:, 1] / height * dest_height), 0, dest_height)
boxes.append(box.astype(np.int16))
scores.append(score)
return np.array(boxes, dtype=np.int16), scores
...
```
获取了模型预测的文本boxes之后,下一步就是需要对文本boxes进行过滤,主要步骤有:
- 把boxes中点坐标按顺时针排序;
- 对点坐标进行裁剪,使其坐标不超过图片范围;
- 求矩阵的二范式,即求两点间的直线距离;
- 将两点间的直线距离小于3的过滤掉。
```
class det_rec_functions(object):
...
def filter_tag_det_res(self, dt_boxes, image_shape):
img_height, img_width = image_shape[0:2]
dt_boxes_new = []
for box in dt_boxes:
box = self.order_points_clockwise(box)
box = self.clip_det_res(box, img_height, img_width)
rect_width = int(np.linalg.norm(box[0] - box[1]))
rect_height = int(np.linalg.norm(box[0] - box[3]))
if rect_width <= 3 or rect_height <= 3:
continue
dt_boxes_new.append(box)
dt_boxes = np.array(dt_boxes_new)
return dt_boxes
def sorted_boxes(self, dt_boxes):
"""
Sort text boxes in order from top to bottom, left to right
args:
dt_boxes(array):detected text boxes with shape [4, 2]
return:
sorted boxes(array) with shape [4, 2]
"""
num_boxes = dt_boxes.shape[0]
sorted_boxes = sorted(dt_boxes, key=lambda x: (x[0][1], x[0][0]))
_boxes = list(sorted_boxes)
for i in range(num_boxes - 1):
if abs(_boxes[i + 1][0][1] - _boxes[i][0][1]) < 10 and \
(_boxes[i + 1][0][0] < _boxes[i][0][0]):
tmp = _boxes[i]
_boxes[i] = _boxes[i + 1]
_boxes[i + 1] = tmp
return _boxes
...
```
### SVTR后处理
识别模型SVTR的后处理的目的是对推理的结果进行解码,从而获取字符识别的结果。识别模型预测的字符类别保存在ppocr_keys_v1.txt文件中,共有6625个文本字符,其中第一个字符为#,最后一个字符为空格,后处理过程定义在process_pred()类中。MIGraphX的推理结果pred包含三个维度,第一个维度表示batch信息,第二个维度表示当前图像预测的字符数量,其中包括重复字符和空格,第三个维度表示预测的类别概率。因此后处理第一步首先获取预测类别最大概率对应的字符索引和对应最大概率,然后第二步根据字符索引信息获取对应位置的字符,然后去除重复和特殊字符,最后获得当前图像的字符识别结果。
```
class process_pred(object):
...
def decode(self, text_index, text_prob=None, is_remove_duplicate=False):
result_list = []
ignored_tokens = [0]
batch_size = len(text_index)
for batch_idx in range(batch_size):
char_list = []
conf_list = []
for idx in range(len(text_index[batch_idx])):
if text_index[batch_idx][idx] in ignored_tokens:
continue
if is_remove_duplicate:
if idx > 0 and text_index[batch_idx][idx - 1] == text_index[batch_idx][idx]:
continue
char_list.append(self.character[int(text_index[batch_idx][idx])])
if text_prob is not None:
conf_list.append(text_prob[batch_idx][idx])
else:
conf_list.append(1)
text = ''.join(char_list)
result_list.append((text, np.mean(conf_list)))
return result_list
def __call__(self, preds, label=None):
if not isinstance(preds, np.ndarray):
preds = np.array(preds)
preds_idx = preds.argmax(axis=2)
preds_prob = preds.max(axis=2)
text = self.decode(preds_idx, preds_prob, is_remove_duplicate=True)
if label is None:
return text
label = self.decode(label)
return text, label
```
...@@ -290,8 +290,8 @@ class det_rec_functions(object): ...@@ -290,8 +290,8 @@ class det_rec_functions(object):
def __init__(self, image): def __init__(self, image):
self.img = image.copy() self.img = image.copy()
self.det_file = '../Resource/Models/PaddleOCR/VLPR/db.onnx' self.det_file = '../Resource/Models/db.onnx'
self.rec_file = '../Resource/Models/PaddleOCR/VLPR/svtr.onnx' self.rec_file = '../Resource/Models/svtr.onnx'
# 解析检测模型 # 解析检测模型
detInput = {"x":[1,3,2496,2496]} detInput = {"x":[1,3,2496,2496]}
...@@ -318,7 +318,7 @@ class det_rec_functions(object): ...@@ -318,7 +318,7 @@ class det_rec_functions(object):
print("Success to compile SVTR") print("Success to compile SVTR")
self.infer_before_process_op, self.det_re_process_op = self.get_process() self.infer_before_process_op, self.det_re_process_op = self.get_process()
self.postprocess_op = process_pred('../Resource/Models/PaddleOCR/VLPR/ppocr_keys_v1.txt', 'ch', True) self.postprocess_op = process_pred('../Resource/Models/ppocr_keys_v1.txt', 'ch', True)
# 图片预处理过程 # 图片预处理过程
def transform(self, data, ops=None): def transform(self, data, ops=None):
......
...@@ -8,7 +8,9 @@ ...@@ -8,7 +8,9 @@
DBnet是一种基于分割的文本检测方法,相比传统分割方法需要设定固定阈值,该模型将二值化操作插入到分割网络中进行联合优化,通过网络学习可以自适应的预测图像中每一个像素点的阈值,能够在像素水平很好的检测自然场景下不同形状的文字。SVTR是一种端到端的文本识别模型,通过单个视觉模型就可以一站式解决特征提取和文本转录两个任务,同时也保证了更快的推理速度。百度PaddleOCR开源项目提供了车牌识别的预训练模型,本示例使用PaddleOCR提供的蓝绿黄牌识别模型进行推理。 DBnet是一种基于分割的文本检测方法,相比传统分割方法需要设定固定阈值,该模型将二值化操作插入到分割网络中进行联合优化,通过网络学习可以自适应的预测图像中每一个像素点的阈值,能够在像素水平很好的检测自然场景下不同形状的文字。SVTR是一种端到端的文本识别模型,通过单个视觉模型就可以一站式解决特征提取和文本转录两个任务,同时也保证了更快的推理速度。百度PaddleOCR开源项目提供了车牌识别的预训练模型,本示例使用PaddleOCR提供的蓝绿黄牌识别模型进行推理。
## C++版本推理 ## python版本推理
下面介绍如何运行python代码示例,具体推理代码解析,在Doc/Tutorial_Python.md中有详细说明。
### 拉取镜像 ### 拉取镜像
...@@ -18,6 +20,40 @@ DBnet是一种基于分割的文本检测方法,相比传统分割方法需要 ...@@ -18,6 +20,40 @@ DBnet是一种基于分割的文本检测方法,相比传统分割方法需要
docker pull image.sourcefind.cn:5000/dcu/admin/base/custom:ort1.14.0_migraphx3.0.0-dtk22.10.1 docker pull image.sourcefind.cn:5000/dcu/admin/base/custom:ort1.14.0_migraphx3.0.0-dtk22.10.1
``` ```
### 推理示例
python依赖安装:
```
# 进入python示例目录
cd ./Python
# 安装依赖
pip install -r requirements.txt
```
依赖安装成功之后,可在当前目录执行如下指令运行程序推理:
```
# 开启环境变量
export MIGRAPHX_DYNAMIC_SHAPE=1
# 运行示例程序
python PaddleOCR_infer_migraphx.py
```
PaddleOCR车牌识别结果为:
```
皖AD19906
```
## C++版本推理
下面介绍如何运行C++代码示例,具体推理代码解析,在Doc/Tutorial_Cpp.md目录中有详细说明。
参考Python版本推理中的构建安装,在光源中拉取推理的docker镜像。
### 安装Opencv依赖 ### 安装Opencv依赖
```python ```python
...@@ -61,11 +97,17 @@ export LD_LIBRARY_PATH=<path_to_migraphx_samples>/depend/lib/:$LD_LIBRARY_PATH ...@@ -61,11 +97,17 @@ export LD_LIBRARY_PATH=<path_to_migraphx_samples>/depend/lib/:$LD_LIBRARY_PATH
source ~/.bashrc source ~/.bashrc
``` ```
### 运行示例 ### 推理示例
成功编译PaddleOCR车牌识别工程后,在build目录下输入如下命令运行该示例: 成功编译PaddleOCR车牌识别工程后,执行如下命令运行该示例:
``` ```
# 进入migraphx samples工程根目录
cd <path_to_migraphx_samples>
# 进入build目录
cd ./build/
# 开启环境变量 # 开启环境变量
export MIGRAPHX_DYNAMIC_SHAPE=1 export MIGRAPHX_DYNAMIC_SHAPE=1
...@@ -79,36 +121,6 @@ PaddleOCR车牌识别结果为: ...@@ -79,36 +121,6 @@ PaddleOCR车牌识别结果为:
皖AD19906 皖AD19906
``` ```
## python版本推理
PaddleOCR工程同时提供了python推理示例。
### 环境配置
python推理示例推荐使用C++版本示例同一镜像,python依赖安装:
```
# 进入python示例目录
cd ./Python
# 安装依赖
pip install -r requirements.txt
```
### 运行示例
环境配置成功之后,可在当前目录执行如下指令运行程序推理:
```
python PaddleOCR_infer_migraphx.py
```
PaddleOCR车牌识别结果为:
```
皖AD19906
```
## 历史版本 ## 历史版本
​ https://developer.hpccube.com/codes/modelzoo/paddleocr_migraphx ​ https://developer.hpccube.com/codes/modelzoo/paddleocr_migraphx
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
<!--PaddleOCR车牌检测 --> <!--PaddleOCR车牌检测 -->
<OcrDB> <OcrDB>
<ModelPath>"../Resource/Models/PaddleOCR/VLPR/db.onnx"</ModelPath> <ModelPath>"../Resource/Models/db.onnx"</ModelPath>
<BinaryThreshold>0.3</BinaryThreshold> <BinaryThreshold>0.3</BinaryThreshold>
<BoxThreshold>0.5</BoxThreshold> <BoxThreshold>0.5</BoxThreshold>
<UnclipRatio>1.6</UnclipRatio> <UnclipRatio>1.6</UnclipRatio>
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
<!--PaddleOCR车牌识别 --> <!--PaddleOCR车牌识别 -->
<OcrSVTR> <OcrSVTR>
<ModelPath>"../Resource/Models/PaddleOCR/VLPR/svtr.onnx"</ModelPath> <ModelPath>"../Resource/Models/svtr.onnx"</ModelPath>
<DictPath>"../Resource/Models/PaddleOCR/VLPR/ppocr_keys_v1.txt"</DictPath> <DictPath>"../Resource/Models/ppocr_keys_v1.txt"</DictPath>
</OcrSVTR> </OcrSVTR>
</opencv_storage> </opencv_storage>
#include <OcrDB.h> #include <OcrDB.h>
#include <migraphx/onnx.hpp> #include <migraphx/onnx.hpp>
#include <migraphx/gpu/target.hpp> #include <migraphx/gpu/target.hpp>
#include <migraphx/gpu/hip.hpp>
#include <migraphx/generate.hpp>
#include <migraphx/quantization.hpp>
#include <migraphx/reshape2.hpp>
#include <opencv2/dnn.hpp>
#include <CommonUtility.h>
#include <Filesystem.h> #include <Filesystem.h>
#include <SimpleLog.h> #include <SimpleLog.h>
...@@ -15,7 +11,7 @@ using namespace cv::dnn; ...@@ -15,7 +11,7 @@ using namespace cv::dnn;
namespace migraphxSamples namespace migraphxSamples
{ {
DB::DB():logFile(NULL) DB::DB()
{ {
} }
...@@ -29,18 +25,23 @@ DB::~DB() ...@@ -29,18 +25,23 @@ DB::~DB()
ErrorCode DB::Initialize(InitializationParameterOfDB InitializationParameterOfDB) ErrorCode DB::Initialize(InitializationParameterOfDB InitializationParameterOfDB)
{ {
// 初始化(获取日志文件,加载配置文件等) // 读取配置文件
ErrorCode errorCode = DoCommonInitialization(InitializationParameterOfDB); std::string configFilePath=InitializationParameterOfDB.configFilePath;
if (errorCode!=SUCCESS) if(Exists(configFilePath)==false)
{ {
LOG_ERROR(logFile, "fail to DoCommonInitialization\n"); LOG_ERROR(stdout, "no configuration file!\n");
return errorCode; return CONFIG_FILE_NOT_EXIST;
} }
LOG_INFO(logFile, "success to DoCommonInitialization\n"); if(!configurationFile.open(configFilePath, cv::FileStorage::READ))
{
LOG_ERROR(stdout, "fail to open configuration file\n");
return FAIL_TO_OPEN_CONFIG_FILE;
}
LOG_INFO(stdout, "succeed to open configuration file\n");
// 获取配置文件参数 // 获取配置文件参数
FileNode netNode = configurationFile["OcrDB"]; cv::FileNode netNode = configurationFile["OcrDB"];
string modelPath = initializationParameter.parentPath + (string)netNode["ModelPath"]; std::string modelPath = (string)netNode["ModelPath"];
dbParameter.BinaryThreshold = (float)netNode["BinaryThreshold"]; dbParameter.BinaryThreshold = (float)netNode["BinaryThreshold"];
dbParameter.BoxThreshold = (float)netNode["BoxThreshold"]; dbParameter.BoxThreshold = (float)netNode["BoxThreshold"];
dbParameter.UnclipRatio = (float)netNode["UnclipRatio"]; dbParameter.UnclipRatio = (float)netNode["UnclipRatio"];
...@@ -50,20 +51,23 @@ ErrorCode DB::Initialize(InitializationParameterOfDB InitializationParameterOfDB ...@@ -50,20 +51,23 @@ ErrorCode DB::Initialize(InitializationParameterOfDB InitializationParameterOfDB
// 加载模型 // 加载模型
if(Exists(modelPath)==false) if(Exists(modelPath)==false)
{ {
LOG_ERROR(logFile,"%s not exist!\n",modelPath.c_str()); LOG_ERROR(stdout,"%s not exist!\n",modelPath.c_str());
return MODEL_NOT_EXIST; return MODEL_NOT_EXIST;
} }
migraphx::onnx_options onnx_options; migraphx::onnx_options onnx_options;
onnx_options.map_input_dims["x"]={1,3,2496,2496}; onnx_options.map_input_dims["x"]={1,3,2496,2496}; // 设置最大shape
net = migraphx::parse_onnx(modelPath, onnx_options); net = migraphx::parse_onnx(modelPath, onnx_options);
LOG_INFO(logFile,"succeed to load model: %s\n",GetFileName(modelPath).c_str()); LOG_INFO(stdout,"succeed to load model: %s\n",GetFileName(modelPath).c_str());
// 获取模型输入属性 // 获取模型输入属性
std::pair<std::string, migraphx::shape> inputAttribute=*(net.get_parameter_shapes().begin()); std::unordered_map<std::string, migraphx::shape> inputMap=net.get_parameter_shapes();
inputName=inputAttribute.first; inputName=inputMap.begin()->first;
inputShape=inputAttribute.second; inputShape=inputMap.begin()->second;
inputSize=cv::Size(inputShape.lens()[3],inputShape.lens()[2]); int N=inputShape.lens()[0];
int C=inputShape.lens()[1];
int H=inputShape.lens()[2];
int W=inputShape.lens()[3];
inputSize=cv::Size(W,H);
// 设置模型为GPU模式 // 设置模型为GPU模式
migraphx::target gpuTarget = migraphx::gpu::target{}; migraphx::target gpuTarget = migraphx::gpu::target{};
...@@ -71,18 +75,18 @@ ErrorCode DB::Initialize(InitializationParameterOfDB InitializationParameterOfDB ...@@ -71,18 +75,18 @@ ErrorCode DB::Initialize(InitializationParameterOfDB InitializationParameterOfDB
// 编译模型 // 编译模型
migraphx::compile_options options; migraphx::compile_options options;
options.device_id=0; // 设置GPU设备,默认为0号设备 options.device_id=0; // 设置GPU设备,默认为0号设备
options.offload_copy=true; // 设置offload_copy options.offload_copy=true;
net.compile(gpuTarget,options); net.compile(gpuTarget,options);
LOG_INFO(logFile,"succeed to compile model: %s\n",GetFileName(modelPath).c_str()); LOG_INFO(stdout,"succeed to compile model: %s\n",GetFileName(modelPath).c_str());
// Run once by itself // warm up
migraphx::parameter_map inputData; std::unordered_map<std::string, migraphx::argument> inputData;
inputData[inputName]=migraphx::generate_argument(inputShape); inputData[inputName]=migraphx::argument{inputShape};
net.eval(inputData); net.eval(inputData);
// log // log
LOG_INFO(logFile,"InputMaxSize:%dx%d\n",inputSize.width,inputSize.height); LOG_INFO(stdout,"InputMaxSize:%dx%d\n",inputSize.width,inputSize.height);
LOG_INFO(logFile,"InputName:%s\n",inputName.c_str()); LOG_INFO(stdout,"InputName:%s\n",inputName.c_str());
return SUCCESS; return SUCCESS;
} }
...@@ -91,7 +95,7 @@ ErrorCode DB::Infer(const cv::Mat &img, std::vector<cv::Mat> &imgList) ...@@ -91,7 +95,7 @@ ErrorCode DB::Infer(const cv::Mat &img, std::vector<cv::Mat> &imgList)
{ {
if(img.empty()||img.type()!=CV_8UC3) if(img.empty()||img.type()!=CV_8UC3)
{ {
LOG_ERROR(logFile, "image error!\n"); LOG_ERROR(stdout, "image error!\n");
return IMAGE_ERROR; return IMAGE_ERROR;
} }
...@@ -140,11 +144,10 @@ ErrorCode DB::Infer(const cv::Mat &img, std::vector<cv::Mat> &imgList) ...@@ -140,11 +144,10 @@ ErrorCode DB::Infer(const cv::Mat &img, std::vector<cv::Mat> &imgList)
int rw = resizeImg.cols; int rw = resizeImg.cols;
cv::Mat inputBlob; cv::Mat inputBlob;
inputBlob = cv::dnn::blobFromImage(resizeImg); inputBlob = cv::dnn::blobFromImage(resizeImg);
std::vector<std::size_t> inputShapeOfInfer={1,3,rh,rw}; std::vector<std::size_t> inputShapeOfInfer={1,3,rh,rw};
// 输入数据 // 创建输入数据
migraphx::parameter_map inputData; std::unordered_map<std::string, migraphx::argument> inputData;
inputData[inputName]= migraphx::argument{migraphx::shape(inputShape.type(),inputShapeOfInfer), (float*)inputBlob.data}; inputData[inputName]= migraphx::argument{migraphx::shape(inputShape.type(),inputShapeOfInfer), (float*)inputBlob.data};
// 推理 // 推理
...@@ -534,39 +537,4 @@ std::vector<std::vector<std::vector<int>>> DBPostProcessor::FilterTagDetRes( ...@@ -534,39 +537,4 @@ std::vector<std::vector<std::vector<int>>> DBPostProcessor::FilterTagDetRes(
return root_points; return root_points;
} }
ErrorCode DB::DoCommonInitialization(InitializationParameterOfDB InitializationParameterOfDB)
{
initializationParameter=InitializationParameterOfDB;
// 获取日志文件
logFile=LogManager::GetInstance()->GetLogFile(initializationParameter.logName);
// 加载配置文件
std::string configFilePath=initializationParameter.configFilePath;
if(!Exists(configFilePath))
{
LOG_ERROR(logFile, "no configuration file!\n");
return CONFIG_FILE_NOT_EXIST;
}
if(!configurationFile.open(configFilePath, FileStorage::READ))
{
LOG_ERROR(logFile, "fail to open configuration file\n");
return FAIL_TO_OPEN_CONFIG_FILE;
}
LOG_INFO(logFile, "succeed to open configuration file\n");
// 修改父路径
std::string &parentPath = initializationParameter.parentPath;
if (!parentPath.empty())
{
if(!IsPathSeparator(parentPath[parentPath.size() - 1]))
{
parentPath+=PATH_SEPARATOR;
}
}
return SUCCESS;
}
} }
\ No newline at end of file
...@@ -3,17 +3,12 @@ ...@@ -3,17 +3,12 @@
#ifndef __OCR_DB_H__ #ifndef __OCR_DB_H__
#define __OCR_DB_H__ #define __OCR_DB_H__
#include <string>
#include <migraphx/program.hpp> #include <migraphx/program.hpp>
#include <opencv2/opencv.hpp>
#include <CommonDefinition.h> #include <CommonDefinition.h>
#include <clipper.h> #include <clipper.h>
#include <utility.h> #include <utility.h>
using namespace std;
using namespace cv;
using namespace migraphx;
namespace migraphxSamples namespace migraphxSamples
{ {
...@@ -24,7 +19,7 @@ typedef struct _DBParameter ...@@ -24,7 +19,7 @@ typedef struct _DBParameter
float BoxThreshold; float BoxThreshold;
float UnclipRatio; float UnclipRatio;
int LimitSideLen; int LimitSideLen;
string ScoreMode; std::string ScoreMode;
}DBParameter; }DBParameter;
...@@ -39,17 +34,12 @@ public: ...@@ -39,17 +34,12 @@ public:
ErrorCode Infer(const cv::Mat &img, std::vector<cv::Mat> &imgList); ErrorCode Infer(const cv::Mat &img, std::vector<cv::Mat> &imgList);
private:
ErrorCode DoCommonInitialization(InitializationParameterOfDB InitializationParameterOfDB);
private: private:
cv::FileStorage configurationFile; cv::FileStorage configurationFile;
InitializationParameterOfDB initializationParameter;
FILE *logFile;
migraphx::program net; migraphx::program net;
cv::Size inputSize; cv::Size inputSize;
string inputName; std::string inputName;
migraphx::shape inputShape; migraphx::shape inputShape;
DBParameter dbParameter; DBParameter dbParameter;
......
#include <OcrSVTR.h> #include <OcrSVTR.h>
#include <migraphx/onnx.hpp> #include <migraphx/onnx.hpp>
#include <migraphx/gpu/target.hpp> #include <migraphx/gpu/target.hpp>
#include <migraphx/gpu/hip.hpp>
#include <migraphx/generate.hpp>
#include <migraphx/quantization.hpp>
#include <migraphx/reshape2.hpp>
#include <opencv2/dnn.hpp>
#include <CommonUtility.h>
#include <Filesystem.h> #include <Filesystem.h>
#include <SimpleLog.h> #include <SimpleLog.h>
using namespace cv::dnn;
namespace migraphxSamples namespace migraphxSamples
{ {
SVTR::SVTR():logFile(NULL) SVTR::SVTR()
{ {
} }
...@@ -28,37 +22,45 @@ SVTR::~SVTR() ...@@ -28,37 +22,45 @@ SVTR::~SVTR()
ErrorCode SVTR::Initialize(InitializationParameterOfSVTR InitializationParameterOfSVTR) ErrorCode SVTR::Initialize(InitializationParameterOfSVTR InitializationParameterOfSVTR)
{ {
// 初始化(获取日志文件,加载配置文件等) // 读取配置文件
ErrorCode errorCode = DoCommonInitialization(InitializationParameterOfSVTR); std::string configFilePath=InitializationParameterOfSVTR.configFilePath;
if (errorCode!=SUCCESS) if(Exists(configFilePath)==false)
{
LOG_ERROR(stdout, "no configuration file!\n");
return CONFIG_FILE_NOT_EXIST;
}
if(!configurationFile.open(configFilePath, cv::FileStorage::READ))
{ {
LOG_ERROR(logFile, "fail to DoCommonInitialization\n"); LOG_ERROR(stdout, "fail to open configuration file\n");
return errorCode; return FAIL_TO_OPEN_CONFIG_FILE;
} }
LOG_INFO(logFile, "success to DoCommonInitialization\n"); LOG_INFO(stdout, "succeed to open configuration file\n");
// 获取配置文件参数 // 获取配置文件参数
FileNode netNode = configurationFile["OcrSVTR"]; cv::FileNode netNode = configurationFile["OcrSVTR"];
string modelPath = initializationParameter.parentPath + (string)netNode["ModelPath"]; std::string modelPath = (std::string)netNode["ModelPath"];
string dictPath = (string)netNode["DictPath"]; std::string dictPath = (std::string)netNode["DictPath"];
// 加载模型 // 加载模型
if(Exists(modelPath)==false) if(Exists(modelPath)==false)
{ {
LOG_ERROR(logFile,"%s not exist!\n",modelPath.c_str()); LOG_ERROR(stdout,"%s not exist!\n",modelPath.c_str());
return MODEL_NOT_EXIST; return MODEL_NOT_EXIST;
} }
migraphx::onnx_options onnx_options; migraphx::onnx_options onnx_options;
onnx_options.map_input_dims["x"]={1,3,48,320}; onnx_options.map_input_dims["x"]={1,3,48,320}; // 设置最大shape
net = migraphx::parse_onnx(modelPath, onnx_options); net = migraphx::parse_onnx(modelPath, onnx_options);
LOG_INFO(logFile,"succeed to load model: %s\n",GetFileName(modelPath).c_str()); LOG_INFO(stdout,"succeed to load model: %s\n",GetFileName(modelPath).c_str());
// 获取模型输入属性 // 获取模型输入属性
std::pair<std::string, migraphx::shape> inputAttribute=*(net.get_parameter_shapes().begin()); std::unordered_map<std::string, migraphx::shape> inputMap=net.get_parameter_shapes();
inputName=inputAttribute.first; inputName=inputMap.begin()->first;
inputShape=inputAttribute.second; inputShape=inputMap.begin()->second;
inputSize=cv::Size(inputShape.lens()[3],inputShape.lens()[2]); int N=inputShape.lens()[0];
int C=inputShape.lens()[1];
int H=inputShape.lens()[2];
int W=inputShape.lens()[3];
inputSize=cv::Size(W,H);
// 设置模型为GPU模式 // 设置模型为GPU模式
migraphx::target gpuTarget = migraphx::gpu::target{}; migraphx::target gpuTarget = migraphx::gpu::target{};
...@@ -66,14 +68,14 @@ ErrorCode SVTR::Initialize(InitializationParameterOfSVTR InitializationParameter ...@@ -66,14 +68,14 @@ ErrorCode SVTR::Initialize(InitializationParameterOfSVTR InitializationParameter
// 编译模型 // 编译模型
migraphx::compile_options options; migraphx::compile_options options;
options.device_id=0; // 设置GPU设备,默认为0号设备 options.device_id=0; // 设置GPU设备,默认为0号设备
options.offload_copy=true; // 设置offload_copy options.offload_copy=true;
net.compile(gpuTarget,options); net.compile(gpuTarget,options);
LOG_INFO(logFile,"succeed to compile model: %s\n",GetFileName(modelPath).c_str()); LOG_INFO(stdout,"succeed to compile model: %s\n",GetFileName(modelPath).c_str());
// Run once by itself // warm up
migraphx::parameter_map inputData; std::unordered_map<std::string, migraphx::argument> inputData;
inputData[inputName]=migraphx::generate_argument(inputShape); inputData[inputName]=migraphx::argument{inputShape};
net.eval(inputData); net.eval(inputData);
std::ifstream in(dictPath); std::ifstream in(dictPath);
std::string line; std::string line;
...@@ -93,8 +95,8 @@ ErrorCode SVTR::Initialize(InitializationParameterOfSVTR InitializationParameter ...@@ -93,8 +95,8 @@ ErrorCode SVTR::Initialize(InitializationParameterOfSVTR InitializationParameter
} }
// log // log
LOG_INFO(logFile,"InputMaxSize:%dx%d\n",inputSize.width,inputSize.height); LOG_INFO(stdout,"InputMaxSize:%dx%d\n",inputSize.width,inputSize.height);
LOG_INFO(logFile,"InputName:%s\n",inputName.c_str()); LOG_INFO(stdout,"InputName:%s\n",inputName.c_str());
return SUCCESS; return SUCCESS;
} }
...@@ -103,7 +105,7 @@ ErrorCode SVTR::Infer(cv::Mat &img, std::string &resultsChar, float &resultsdSco ...@@ -103,7 +105,7 @@ ErrorCode SVTR::Infer(cv::Mat &img, std::string &resultsChar, float &resultsdSco
{ {
if(img.empty()||img.type()!=CV_8UC3) if(img.empty()||img.type()!=CV_8UC3)
{ {
LOG_ERROR(logFile, "image error!\n"); LOG_ERROR(stdout, "image error!\n");
return IMAGE_ERROR; return IMAGE_ERROR;
} }
...@@ -141,11 +143,10 @@ ErrorCode SVTR::Infer(cv::Mat &img, std::string &resultsChar, float &resultsdSco ...@@ -141,11 +143,10 @@ ErrorCode SVTR::Infer(cv::Mat &img, std::string &resultsChar, float &resultsdSco
} }
cv::merge(bgrChannels, resizeImg); cv::merge(bgrChannels, resizeImg);
cv::Mat inputBlob = cv::dnn::blobFromImage(resizeImg); cv::Mat inputBlob = cv::dnn::blobFromImage(resizeImg);
std::vector<std::size_t> inputShapeOfInfer={1,3,48,resizeW}; std::vector<std::size_t> inputShapeOfInfer={1,3,48,resizeW};
// 输入数据 // 创建输入数据
migraphx::parameter_map inputData; std::unordered_map<std::string, migraphx::argument> inputData;
inputData[inputName]= migraphx::argument{migraphx::shape(inputShape.type(),inputShapeOfInfer), (float*)inputBlob.data}; inputData[inputName]= migraphx::argument{migraphx::shape(inputShape.type(),inputShapeOfInfer), (float*)inputBlob.data};
// 推理 // 推理
...@@ -186,40 +187,4 @@ ErrorCode SVTR::Infer(cv::Mat &img, std::string &resultsChar, float &resultsdSco ...@@ -186,40 +187,4 @@ ErrorCode SVTR::Infer(cv::Mat &img, std::string &resultsChar, float &resultsdSco
return SUCCESS; return SUCCESS;
} }
ErrorCode SVTR::DoCommonInitialization(InitializationParameterOfSVTR InitializationParameterOfSVTR)
{
initializationParameter = InitializationParameterOfSVTR;
// 获取日志文件
logFile=LogManager::GetInstance()->GetLogFile(initializationParameter.logName);
// 加载配置文件
std::string configFilePath=initializationParameter.configFilePath;
if(!Exists(configFilePath))
{
LOG_ERROR(logFile, "no configuration file!\n");
return CONFIG_FILE_NOT_EXIST;
}
if(!configurationFile.open(configFilePath, FileStorage::READ))
{
LOG_ERROR(logFile, "fail to open configuration file\n");
return FAIL_TO_OPEN_CONFIG_FILE;
}
LOG_INFO(logFile, "succeed to open configuration file\n");
// 修改父路径
std::string &parentPath = initializationParameter.parentPath;
if (!parentPath.empty())
{
if(!IsPathSeparator(parentPath[parentPath.size() - 1]))
{
parentPath+=PATH_SEPARATOR;
}
}
return SUCCESS;
}
} }
...@@ -3,14 +3,9 @@ ...@@ -3,14 +3,9 @@
#ifndef __OCR_SVTR_H__ #ifndef __OCR_SVTR_H__
#define __OCR_SVTR_H__ #define __OCR_SVTR_H__
#include <string>
#include <migraphx/program.hpp> #include <migraphx/program.hpp>
#include <opencv2/opencv.hpp>
#include <CommonDefinition.h>
using namespace std; #include <CommonDefinition.h>
using namespace cv;
using namespace migraphx;
namespace migraphxSamples namespace migraphxSamples
{ {
...@@ -26,19 +21,14 @@ public: ...@@ -26,19 +21,14 @@ public:
ErrorCode Infer(cv::Mat &img, std::string &resultsChar, float &resultsdScore, float &maxWHRatio); ErrorCode Infer(cv::Mat &img, std::string &resultsChar, float &resultsdScore, float &maxWHRatio);
private:
ErrorCode DoCommonInitialization(InitializationParameterOfSVTR InitializationParameterOfSVTR);
private: private:
cv::FileStorage configurationFile; cv::FileStorage configurationFile;
InitializationParameterOfSVTR initializationParameter;
FILE *logFile;
migraphx::program net; migraphx::program net;
cv::Size inputSize; cv::Size inputSize;
string inputName; std::string inputName;
migraphx::shape inputShape; migraphx::shape inputShape;
string dictPath; std::string dictPath;
std::vector<std::string> charactorDict; std::vector<std::string> charactorDict;
}; };
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
namespace migraphxSamples namespace migraphxSamples
{ {
VLPR::VLPR():logFile(NULL) VLPR::VLPR()
{ {
} }
...@@ -18,9 +18,7 @@ VLPR::~VLPR() ...@@ -18,9 +18,7 @@ VLPR::~VLPR()
ErrorCode VLPR::Initialize(InitializationParameterOfDB initParamOfDB, InitializationParameterOfSVTR initParamOfSVTR) ErrorCode VLPR::Initialize(InitializationParameterOfDB initParamOfDB, InitializationParameterOfSVTR initParamOfSVTR)
{ {
// 初始化DB // 初始化DB
initParamOfDB.parentPath = "";
initParamOfDB.configFilePath = CONFIG_FILE; initParamOfDB.configFilePath = CONFIG_FILE;
initParamOfDB.logName = "";
ErrorCode errorCode=db.Initialize(initParamOfDB); ErrorCode errorCode=db.Initialize(initParamOfDB);
if(errorCode!=SUCCESS) if(errorCode!=SUCCESS)
{ {
...@@ -30,9 +28,7 @@ ErrorCode VLPR::Initialize(InitializationParameterOfDB initParamOfDB, Initializa ...@@ -30,9 +28,7 @@ ErrorCode VLPR::Initialize(InitializationParameterOfDB initParamOfDB, Initializa
LOG_INFO(stdout, "succeed to initialize db\n"); LOG_INFO(stdout, "succeed to initialize db\n");
// 初始化SVTR // 初始化SVTR
initParamOfSVTR.parentPath = "";
initParamOfSVTR.configFilePath = CONFIG_FILE; initParamOfSVTR.configFilePath = CONFIG_FILE;
initParamOfSVTR.logName = "";
errorCode=svtr.Initialize(initParamOfSVTR); errorCode=svtr.Initialize(initParamOfSVTR);
if(errorCode!=SUCCESS) if(errorCode!=SUCCESS)
{ {
......
...@@ -24,7 +24,6 @@ public: ...@@ -24,7 +24,6 @@ public:
private: private:
DB db; DB db;
SVTR svtr; SVTR svtr;
FILE *logFile;
cv::FileStorage configurationFile; cv::FileStorage configurationFile;
std::vector<cv::Mat> imgLists; std::vector<cv::Mat> imgLists;
std::string recText; std::string recText;
......
// 常用数据类型和宏定义 // 常用定义
#ifndef __COMMON_DEFINITION_H__ #ifndef __COMMON_DEFINITION_H__
#define __COMMON_DEFINITION_H__ #define __COMMON_DEFINITION_H__
#include <string>
#include <opencv2/opencv.hpp> #include <opencv2/opencv.hpp>
using namespace std;
using namespace cv;
namespace migraphxSamples namespace migraphxSamples
{ {
...@@ -21,20 +17,7 @@ namespace migraphxSamples ...@@ -21,20 +17,7 @@ namespace migraphxSamples
#define CONFIG_FILE "../Resource/Configuration.xml" #define CONFIG_FILE "../Resource/Configuration.xml"
typedef struct __Time typedef enum _ErrorCode
{
string year;
string month;
string day;
string hour;
string minute;
string second;
string millisecond; // ms
string microsecond; // us
string weekDay;
}_Time;
typedef enum _ErrorCode
{ {
SUCCESS=0, // 0 SUCCESS=0, // 0
MODEL_NOT_EXIST, // 模型不存在 MODEL_NOT_EXIST, // 模型不存在
...@@ -44,7 +27,7 @@ typedef enum _ErrorCode ...@@ -44,7 +27,7 @@ typedef enum _ErrorCode
IMAGE_ERROR, // 图像错误 IMAGE_ERROR, // 图像错误
}ErrorCode; }ErrorCode;
typedef struct _ResultOfPrediction typedef struct _ResultOfPrediction
{ {
float confidence; float confidence;
int label; int label;
...@@ -52,26 +35,29 @@ typedef struct _ResultOfPrediction ...@@ -52,26 +35,29 @@ typedef struct _ResultOfPrediction
}ResultOfPrediction; }ResultOfPrediction;
typedef struct _ResultOfDetection typedef struct _ResultOfDetection
{ {
Rect boundingBox; cv::Rect boundingBox;
float confidence; float confidence;
int classID; int classID;
string className; std::string className;
bool exist; bool exist;
_ResultOfDetection():confidence(0.0f),classID(0),exist(true){} _ResultOfDetection():confidence(0.0f),classID(0),exist(true){}
}ResultOfDetection; }ResultOfDetection;
typedef struct _InitializationParameterOfDetector typedef struct _InitializationParameterOfDetector
{ {
std::string parentPath; std::string parentPath;
std::string configFilePath; std::string configFilePath;
cv::Size inputSize;
std::string logName;
}InitializationParameterOfDetector; }InitializationParameterOfDetector;
typedef struct _InitializationParameterOfDetector InitializationParameterOfClassifier;
typedef struct _InitializationParameterOfDetector InitializationParameterOfSuperresolution;
typedef struct _InitializationParameterOfDetector InitializationParameterOfSegmentation;
typedef struct _InitializationParameterOfDetector InitializationParameterOfNLP;
typedef struct _InitializationParameterOfDetector InitializationParameterOfOcr;
typedef struct _InitializationParameterOfDetector InitializationParameterOfDB; typedef struct _InitializationParameterOfDetector InitializationParameterOfDB;
typedef struct _InitializationParameterOfDetector InitializationParameterOfSVTR; typedef struct _InitializationParameterOfDetector InitializationParameterOfSVTR;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment