Commit fccfdfa5 authored by dlyrm's avatar dlyrm
Browse files

update code

parent dcc7bf4f
Pipeline #681 canceled with stages
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "include/keypoint_postprocess.h"
#define PI 3.1415926535
#define HALF_CIRCLE_DEGREE 180
cv::Point2f get_3rd_point(cv::Point2f& a, cv::Point2f& b) {
cv::Point2f direct{a.x - b.x, a.y - b.y};
return cv::Point2f(a.x - direct.y, a.y + direct.x);
}
std::vector<float> get_dir(float src_point_x,
float src_point_y,
float rot_rad) {
float sn = sin(rot_rad);
float cs = cos(rot_rad);
std::vector<float> src_result{0.0, 0.0};
src_result[0] = src_point_x * cs - src_point_y * sn;
src_result[1] = src_point_x * sn + src_point_y * cs;
return src_result;
}
void affine_tranform(
float pt_x, float pt_y, cv::Mat& trans, std::vector<float>& preds, int p) {
double new1[3] = {pt_x, pt_y, 1.0};
cv::Mat new_pt(3, 1, trans.type(), new1);
cv::Mat w = trans * new_pt;
preds[p * 3 + 1] = static_cast<float>(w.at<double>(0, 0));
preds[p * 3 + 2] = static_cast<float>(w.at<double>(1, 0));
}
void get_affine_transform(std::vector<float>& center,
std::vector<float>& scale,
float rot,
std::vector<int>& output_size,
cv::Mat& trans,
int inv) {
float src_w = scale[0];
float dst_w = static_cast<float>(output_size[0]);
float dst_h = static_cast<float>(output_size[1]);
float rot_rad = rot * PI / HALF_CIRCLE_DEGREE;
std::vector<float> src_dir = get_dir(-0.5 * src_w, 0, rot_rad);
std::vector<float> dst_dir{static_cast<float>(-0.5) * dst_w, 0.0};
cv::Point2f srcPoint2f[3], dstPoint2f[3];
srcPoint2f[0] = cv::Point2f(center[0], center[1]);
srcPoint2f[1] = cv::Point2f(center[0] + src_dir[0], center[1] + src_dir[1]);
srcPoint2f[2] = get_3rd_point(srcPoint2f[0], srcPoint2f[1]);
dstPoint2f[0] = cv::Point2f(dst_w * 0.5, dst_h * 0.5);
dstPoint2f[1] =
cv::Point2f(dst_w * 0.5 + dst_dir[0], dst_h * 0.5 + dst_dir[1]);
dstPoint2f[2] = get_3rd_point(dstPoint2f[0], dstPoint2f[1]);
if (inv == 0) {
trans = cv::getAffineTransform(srcPoint2f, dstPoint2f);
} else {
trans = cv::getAffineTransform(dstPoint2f, srcPoint2f);
}
}
void transform_preds(std::vector<float>& coords,
std::vector<float>& center,
std::vector<float>& scale,
std::vector<int>& output_size,
std::vector<int64_t>& dim,
std::vector<float>& target_coords,
bool affine=false) {
if (affine) {
cv::Mat trans(2, 3, CV_64FC1);
get_affine_transform(center, scale, 0, output_size, trans, 1);
for (int p = 0; p < dim[1]; ++p) {
affine_tranform(
coords[p * 2], coords[p * 2 + 1], trans, target_coords, p);
}
} else {
float heat_w = static_cast<float>(output_size[0]);
float heat_h = static_cast<float>(output_size[1]);
float x_scale = scale[0] / heat_w;
float y_scale = scale[1] / heat_h;
float offset_x = center[0] - scale[0] / 2.;
float offset_y = center[1] - scale[1] / 2.;
for (int i = 0; i < dim[1]; i++) {
target_coords[i * 3 + 1] = x_scale * coords[i * 2] + offset_x;
target_coords[i * 3 + 2] = y_scale * coords[i * 2 + 1] + offset_y;
}
}
}
// only for batchsize == 1
void get_max_preds(std::vector<float>& heatmap,
std::vector<int>& dim,
std::vector<float>& preds,
std::vector<float>& maxvals,
int batchid,
int joint_idx) {
int num_joints = dim[1];
int width = dim[3];
std::vector<int> idx;
idx.resize(num_joints * 2);
for (int j = 0; j < dim[1]; j++) {
float* index = &(
heatmap[batchid * num_joints * dim[2] * dim[3] + j * dim[2] * dim[3]]);
float* end = index + dim[2] * dim[3];
float* max_dis = std::max_element(index, end);
auto max_id = std::distance(index, max_dis);
maxvals[j] = *max_dis;
if (*max_dis > 0) {
preds[j * 2] = static_cast<float>(max_id % width);
preds[j * 2 + 1] = static_cast<float>(max_id / width);
}
}
}
void dark_parse(std::vector<float>& heatmap,
std::vector<int64_t>& dim,
std::vector<float>& coords,
int px,
int py,
int index,
int ch){
/*DARK postpocessing, Zhang et al. Distribution-Aware Coordinate
Representation for Human Pose Estimation (CVPR 2020).
1) offset = - hassian.inv() * derivative
2) dx = (heatmap[x+1] - heatmap[x-1])/2.
3) dxx = (dx[x+1] - dx[x-1])/2.
4) derivative = Mat([dx, dy])
5) hassian = Mat([[dxx, dxy], [dxy, dyy]])
*/
std::vector<float>::const_iterator first1 = heatmap.begin() + index;
std::vector<float>::const_iterator last1 = heatmap.begin() + index + dim[2] * dim[3];
std::vector<float> heatmap_ch(first1, last1);
cv::Mat heatmap_mat = cv::Mat(heatmap_ch).reshape(0,dim[2]);
heatmap_mat.convertTo(heatmap_mat, CV_32FC1);
cv::GaussianBlur(heatmap_mat, heatmap_mat, cv::Size(3, 3), 0, 0);
heatmap_mat = heatmap_mat.reshape(1,1);
heatmap_ch = std::vector<float>(heatmap_mat.reshape(1,1));
float epsilon = 1e-10;
//sample heatmap to get values in around target location
float xy = log(fmax(heatmap_ch[py * dim[3] + px], epsilon));
float xr = log(fmax(heatmap_ch[py * dim[3] + px + 1], epsilon));
float xl = log(fmax(heatmap_ch[py * dim[3] + px - 1], epsilon));
float xr2 = log(fmax(heatmap_ch[py * dim[3] + px + 2], epsilon));
float xl2 = log(fmax(heatmap_ch[py * dim[3] + px - 2], epsilon));
float yu = log(fmax(heatmap_ch[(py + 1) * dim[3] + px], epsilon));
float yd = log(fmax(heatmap_ch[(py - 1) * dim[3] + px], epsilon));
float yu2 = log(fmax(heatmap_ch[(py + 2) * dim[3] + px], epsilon));
float yd2 = log(fmax(heatmap_ch[(py - 2) * dim[3] + px], epsilon));
float xryu = log(fmax(heatmap_ch[(py + 1) * dim[3] + px + 1], epsilon));
float xryd = log(fmax(heatmap_ch[(py - 1) * dim[3] + px + 1], epsilon));
float xlyu = log(fmax(heatmap_ch[(py + 1) * dim[3] + px - 1], epsilon));
float xlyd = log(fmax(heatmap_ch[(py - 1) * dim[3] + px - 1], epsilon));
//compute dx/dy and dxx/dyy with sampled values
float dx = 0.5 * (xr - xl);
float dy = 0.5 * (yu - yd);
float dxx = 0.25 * (xr2 - 2*xy + xl2);
float dxy = 0.25 * (xryu - xryd - xlyu + xlyd);
float dyy = 0.25 * (yu2 - 2*xy + yd2);
//finally get offset by derivative and hassian, which combined by dx/dy and dxx/dyy
if(dxx * dyy - dxy*dxy != 0){
float M[2][2] = {dxx, dxy, dxy, dyy};
float D[2] = {dx, dy};
cv::Mat hassian(2,2,CV_32F,M);
cv::Mat derivative(2,1,CV_32F,D);
cv::Mat offset = - hassian.inv() * derivative;
coords[ch * 2] += offset.at<float>(0,0);
coords[ch * 2 + 1] += offset.at<float>(1,0);
}
}
void get_final_preds(std::vector<float>& heatmap,
std::vector<int64_t>& dim,
std::vector<int64_t>& idxout,
std::vector<int64_t>& idxdim,
std::vector<float>& center,
std::vector<float> scale,
std::vector<float>& preds,
int batchid,
bool DARK) {
std::vector<float> coords;
coords.resize(dim[1] * 2);
int heatmap_height = dim[2];
int heatmap_width = dim[3];
for (int j = 0; j < dim[1]; ++j) {
int index = (batchid * dim[1] + j) * dim[2] * dim[3];
int idx = idxout[batchid * dim[1] + j];
preds[j * 3] = heatmap[index + idx];
coords[j * 2] = idx % heatmap_width;
coords[j * 2 + 1] = idx / heatmap_width;
int px = int(coords[j * 2] + 0.5);
int py = int(coords[j * 2 + 1] + 0.5);
if(DARK && px > 1 && px < heatmap_width - 2){
dark_parse(heatmap, dim, coords, px, py, index, j);
}
else{
if (px > 0 && px < heatmap_width - 1) {
float diff_x = heatmap[index + py * dim[3] + px + 1] -
heatmap[index + py * dim[3] + px - 1];
coords[j * 2] += diff_x > 0 ? 1 : -1 * 0.25;
}
if (py > 0 && py < heatmap_height - 1) {
float diff_y = heatmap[index + (py + 1) * dim[3] + px] -
heatmap[index + (py - 1) * dim[3] + px];
coords[j * 2 + 1] += diff_y > 0 ? 1 : -1 * 0.25;
}
}
}
std::vector<int> img_size{heatmap_width, heatmap_height};
transform_preds(coords, center, scale, img_size, dim, preds);
}
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <math.h>
#include <stdarg.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <algorithm>
#include <iostream>
#include <numeric>
#include <string>
#include <vector>
#include "include/config_parser.h"
#include "include/keypoint_detector.h"
#include "include/object_detector.h"
#include "include/preprocess_op.h"
#include "json/json.h"
Json::Value RT_Config;
void PrintBenchmarkLog(std::vector<double> det_time, int img_num) {
std::cout << "----------------------- Config info -----------------------"
<< std::endl;
std::cout << "num_threads: " << RT_Config["cpu_threads"].as<int>()
<< std::endl;
std::cout << "----------------------- Data info -----------------------"
<< std::endl;
std::cout << "batch_size_det: " << RT_Config["batch_size_det"].as<int>()
<< std::endl;
std::cout << "----------------------- Model info -----------------------"
<< std::endl;
RT_Config["model_dir_det"].as<std::string>().erase(
RT_Config["model_dir_det"].as<std::string>().find_last_not_of("/") + 1);
std::cout << "detection model_name: "
<< RT_Config["model_dir_det"].as<std::string>() << std::endl;
std::cout << "----------------------- Perf info ------------------------"
<< std::endl;
std::cout << "Total number of predicted data: " << img_num
<< " and total time spent(ms): "
<< std::accumulate(det_time.begin(), det_time.end(), 0.)
<< std::endl;
img_num = std::max(1, img_num);
std::cout << "preproce_time(ms): " << det_time[0] / img_num
<< ", inference_time(ms): " << det_time[1] / img_num
<< ", postprocess_time(ms): " << det_time[2] / img_num << std::endl;
}
void PrintKptsBenchmarkLog(std::vector<double> det_time, int img_num) {
std::cout << "----------------------- Data info -----------------------"
<< std::endl;
std::cout << "batch_size_keypoint: "
<< RT_Config["batch_size_keypoint"].as<int>() << std::endl;
std::cout << "----------------------- Model info -----------------------"
<< std::endl;
RT_Config["model_dir_keypoint"].as<std::string>().erase(
RT_Config["model_dir_keypoint"].as<std::string>().find_last_not_of("/") +
1);
std::cout << "keypoint model_name: "
<< RT_Config["model_dir_keypoint"].as<std::string>() << std::endl;
std::cout << "----------------------- Perf info ------------------------"
<< std::endl;
std::cout << "Total number of predicted data: " << img_num
<< " and total time spent(ms): "
<< std::accumulate(det_time.begin(), det_time.end(), 0.)
<< std::endl;
img_num = std::max(1, img_num);
std::cout << "Average time cost per person:" << std::endl
<< "preproce_time(ms): " << det_time[0] / img_num
<< ", inference_time(ms): " << det_time[1] / img_num
<< ", postprocess_time(ms): " << det_time[2] / img_num << std::endl;
}
void PrintTotalIimeLog(double det_time,
double keypoint_time,
double crop_time) {
std::cout << "----------------------- Time info ------------------------"
<< std::endl;
std::cout << "Total Pipeline time(ms) per image: "
<< det_time + keypoint_time + crop_time << std::endl;
std::cout << "Average det time(ms) per image: " << det_time
<< ", average keypoint time(ms) per image: " << keypoint_time
<< ", average crop time(ms) per image: " << crop_time << std::endl;
}
static std::string DirName(const std::string& filepath) {
auto pos = filepath.rfind(OS_PATH_SEP);
if (pos == std::string::npos) {
return "";
}
return filepath.substr(0, pos);
}
static bool PathExists(const std::string& path) {
struct stat buffer;
return (stat(path.c_str(), &buffer) == 0);
}
static void MkDir(const std::string& path) {
if (PathExists(path)) return;
int ret = 0;
ret = mkdir(path.c_str(), 0755);
if (ret != 0) {
std::string path_error(path);
path_error += " mkdir failed!";
throw std::runtime_error(path_error);
}
}
static void MkDirs(const std::string& path) {
if (path.empty()) return;
if (PathExists(path)) return;
MkDirs(DirName(path));
MkDir(path);
}
void PredictImage(const std::vector<std::string> all_img_paths,
const int batch_size_det,
const double threshold_det,
const bool run_benchmark,
PaddleDetection::ObjectDetector* det,
PaddleDetection::KeyPointDetector* keypoint,
const std::string& output_dir = "output") {
std::vector<double> det_t = {0, 0, 0};
int steps = ceil(static_cast<float>(all_img_paths.size()) / batch_size_det);
int kpts_imgs = 0;
std::vector<double> keypoint_t = {0, 0, 0};
double midtimecost = 0;
for (int idx = 0; idx < steps; idx++) {
std::vector<cv::Mat> batch_imgs;
int left_image_cnt = all_img_paths.size() - idx * batch_size_det;
if (left_image_cnt > batch_size_det) {
left_image_cnt = batch_size_det;
}
for (int bs = 0; bs < left_image_cnt; bs++) {
std::string image_file_path = all_img_paths.at(idx * batch_size_det + bs);
cv::Mat im = cv::imread(image_file_path, 1);
batch_imgs.insert(batch_imgs.end(), im);
}
// Store all detected result
std::vector<PaddleDetection::ObjectResult> result;
std::vector<int> bbox_num;
std::vector<double> det_times;
// Store keypoint results
std::vector<PaddleDetection::KeyPointResult> result_kpts;
std::vector<cv::Mat> imgs_kpts;
std::vector<std::vector<float>> center_bs;
std::vector<std::vector<float>> scale_bs;
std::vector<int> colormap_kpts = PaddleDetection::GenerateColorMap(20);
bool is_rbox = false;
if (run_benchmark) {
det->Predict(
batch_imgs, threshold_det, 50, 50, &result, &bbox_num, &det_times);
} else {
det->Predict(
batch_imgs, threshold_det, 0, 1, &result, &bbox_num, &det_times);
}
// get labels and colormap
auto labels = det->GetLabelList();
auto colormap = PaddleDetection::GenerateColorMap(labels.size());
int item_start_idx = 0;
for (int i = 0; i < left_image_cnt; i++) {
cv::Mat im = batch_imgs[i];
std::vector<PaddleDetection::ObjectResult> im_result;
int detect_num = 0;
for (int j = 0; j < bbox_num[i]; j++) {
PaddleDetection::ObjectResult item = result[item_start_idx + j];
if (item.confidence < threshold_det || item.class_id == -1) {
continue;
}
detect_num += 1;
im_result.push_back(item);
if (item.rect.size() > 6) {
is_rbox = true;
printf("class=%d confidence=%.4f rect=[%d %d %d %d %d %d %d %d]\n",
item.class_id,
item.confidence,
item.rect[0],
item.rect[1],
item.rect[2],
item.rect[3],
item.rect[4],
item.rect[5],
item.rect[6],
item.rect[7]);
} else {
printf("class=%d confidence=%.4f rect=[%d %d %d %d]\n",
item.class_id,
item.confidence,
item.rect[0],
item.rect[1],
item.rect[2],
item.rect[3]);
}
}
std::cout << all_img_paths.at(idx * batch_size_det + i)
<< " The number of detected box: " << detect_num << std::endl;
item_start_idx = item_start_idx + bbox_num[i];
std::vector<int> compression_params;
compression_params.push_back(cv::IMWRITE_JPEG_QUALITY);
compression_params.push_back(95);
std::string output_path(output_dir);
if (output_dir.rfind(OS_PATH_SEP) != output_dir.size() - 1) {
output_path += OS_PATH_SEP;
}
std::string image_file_path = all_img_paths.at(idx * batch_size_det + i);
if (keypoint) {
int imsize = im_result.size();
for (int i = 0; i < imsize; i++) {
auto keypoint_start_time = std::chrono::steady_clock::now();
auto item = im_result[i];
cv::Mat crop_img;
std::vector<double> keypoint_times;
std::vector<int> rect = {
item.rect[0], item.rect[1], item.rect[2], item.rect[3]};
std::vector<float> center;
std::vector<float> scale;
if (item.class_id == 0) {
PaddleDetection::CropImg(im, crop_img, rect, center, scale);
center_bs.emplace_back(center);
scale_bs.emplace_back(scale);
imgs_kpts.emplace_back(crop_img);
kpts_imgs += 1;
}
auto keypoint_crop_time = std::chrono::steady_clock::now();
std::chrono::duration<float> midtimediff =
keypoint_crop_time - keypoint_start_time;
midtimecost += static_cast<double>(midtimediff.count() * 1000);
if (imgs_kpts.size() == RT_Config["batch_size_keypoint"].as<int>() ||
((i == imsize - 1) && !imgs_kpts.empty())) {
if (run_benchmark) {
keypoint->Predict(imgs_kpts,
center_bs,
scale_bs,
10,
10,
&result_kpts,
&keypoint_times);
} else {
keypoint->Predict(imgs_kpts,
center_bs,
scale_bs,
0,
1,
&result_kpts,
&keypoint_times);
}
imgs_kpts.clear();
center_bs.clear();
scale_bs.clear();
keypoint_t[0] += keypoint_times[0];
keypoint_t[1] += keypoint_times[1];
keypoint_t[2] += keypoint_times[2];
}
}
std::string kpts_savepath =
output_path + "keypoint_" +
image_file_path.substr(image_file_path.find_last_of('/') + 1);
cv::Mat kpts_vis_img = VisualizeKptsResult(
im, result_kpts, colormap_kpts, keypoint->get_threshold());
cv::imwrite(kpts_savepath, kpts_vis_img, compression_params);
printf("Visualized output saved as %s\n", kpts_savepath.c_str());
} else {
// Visualization result
cv::Mat vis_img = PaddleDetection::VisualizeResult(
im, im_result, labels, colormap, is_rbox);
std::string det_savepath =
output_path + "result_" +
image_file_path.substr(image_file_path.find_last_of('/') + 1);
cv::imwrite(det_savepath, vis_img, compression_params);
printf("Visualized output saved as %s\n", det_savepath.c_str());
}
}
det_t[0] += det_times[0];
det_t[1] += det_times[1];
det_t[2] += det_times[2];
}
PrintBenchmarkLog(det_t, all_img_paths.size());
if (keypoint) {
PrintKptsBenchmarkLog(keypoint_t, kpts_imgs);
PrintTotalIimeLog(
(det_t[0] + det_t[1] + det_t[2]) / all_img_paths.size(),
(keypoint_t[0] + keypoint_t[1] + keypoint_t[2]) / all_img_paths.size(),
midtimecost / all_img_paths.size());
}
}
int main(int argc, char** argv) {
std::cout << "Usage: " << argv[0] << " [config_path] [image_dir](option)\n";
if (argc < 2) {
std::cout << "Usage: ./main det_runtime_config.json" << std::endl;
return -1;
}
std::string config_path = argv[1];
std::string img_path = "";
if (argc >= 3) {
img_path = argv[2];
}
// Parsing command-line
PaddleDetection::load_jsonf(config_path, RT_Config);
if (RT_Config["model_dir_det"].as<std::string>().empty()) {
std::cout << "Please set [model_det_dir] in " << config_path << std::endl;
return -1;
}
if (RT_Config["image_file"].as<std::string>().empty() &&
RT_Config["image_dir"].as<std::string>().empty() && img_path.empty()) {
std::cout << "Please set [image_file] or [image_dir] in " << config_path
<< " Or use command: <" << argv[0] << " [image_dir]>"
<< std::endl;
return -1;
}
if (!img_path.empty()) {
std::cout << "Use image_dir in command line overide the path in config file"
<< std::endl;
RT_Config["image_dir"] = img_path;
RT_Config["image_file"] = "";
}
// Load model and create a object detector
PaddleDetection::ObjectDetector det(
RT_Config["model_dir_det"].as<std::string>(),
RT_Config["cpu_threads"].as<int>(),
RT_Config["batch_size_det"].as<int>());
PaddleDetection::KeyPointDetector* keypoint = nullptr;
if (!RT_Config["model_dir_keypoint"].as<std::string>().empty()) {
keypoint = new PaddleDetection::KeyPointDetector(
RT_Config["model_dir_keypoint"].as<std::string>(),
RT_Config["cpu_threads"].as<int>(),
RT_Config["batch_size_keypoint"].as<int>(),
RT_Config["use_dark_decode"].as<bool>());
RT_Config["batch_size_det"] = 1;
printf(
"batchsize of detection forced to be 1 while keypoint model is not "
"empty()");
}
// Do inference on input image
if (!RT_Config["image_file"].as<std::string>().empty() ||
!RT_Config["image_dir"].as<std::string>().empty()) {
if (!PathExists(RT_Config["output_dir"].as<std::string>())) {
MkDirs(RT_Config["output_dir"].as<std::string>());
}
std::vector<std::string> all_img_paths;
std::vector<cv::String> cv_all_img_paths;
if (!RT_Config["image_file"].as<std::string>().empty()) {
all_img_paths.push_back(RT_Config["image_file"].as<std::string>());
if (RT_Config["batch_size_det"].as<int>() > 1) {
std::cout << "batch_size_det should be 1, when set `image_file`."
<< std::endl;
return -1;
}
} else {
cv::glob(RT_Config["image_dir"].as<std::string>(), cv_all_img_paths);
for (const auto& img_path : cv_all_img_paths) {
all_img_paths.push_back(img_path);
}
}
PredictImage(all_img_paths,
RT_Config["batch_size_det"].as<int>(),
RT_Config["threshold_det"].as<float>(),
RT_Config["run_benchmark"].as<bool>(),
&det,
keypoint,
RT_Config["output_dir"].as<std::string>());
}
delete keypoint;
keypoint = nullptr;
return 0;
}
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <sstream>
// for setprecision
#include <chrono>
#include <iomanip>
#include "include/object_detector.h"
namespace PaddleDetection {
// Load Model and create model predictor
void ObjectDetector::LoadModel(std::string model_file, int num_theads) {
MobileConfig config;
config.set_threads(num_theads);
config.set_model_from_file(model_file + "/model.nb");
config.set_power_mode(LITE_POWER_HIGH);
predictor_ = CreatePaddlePredictor<MobileConfig>(config);
}
// Visualiztion MaskDetector results
cv::Mat VisualizeResult(const cv::Mat& img,
const std::vector<PaddleDetection::ObjectResult>& results,
const std::vector<std::string>& lables,
const std::vector<int>& colormap,
const bool is_rbox = false) {
cv::Mat vis_img = img.clone();
for (int i = 0; i < results.size(); ++i) {
// Configure color and text size
std::ostringstream oss;
oss << std::setiosflags(std::ios::fixed) << std::setprecision(4);
oss << lables[results[i].class_id] << " ";
oss << results[i].confidence;
std::string text = oss.str();
int c1 = colormap[3 * results[i].class_id + 0];
int c2 = colormap[3 * results[i].class_id + 1];
int c3 = colormap[3 * results[i].class_id + 2];
cv::Scalar roi_color = cv::Scalar(c1, c2, c3);
int font_face = cv::FONT_HERSHEY_COMPLEX_SMALL;
double font_scale = 0.5f;
float thickness = 0.5;
cv::Size text_size =
cv::getTextSize(text, font_face, font_scale, thickness, nullptr);
cv::Point origin;
if (is_rbox) {
// Draw object, text, and background
for (int k = 0; k < 4; k++) {
cv::Point pt1 = cv::Point(results[i].rect[(k * 2) % 8],
results[i].rect[(k * 2 + 1) % 8]);
cv::Point pt2 = cv::Point(results[i].rect[(k * 2 + 2) % 8],
results[i].rect[(k * 2 + 3) % 8]);
cv::line(vis_img, pt1, pt2, roi_color, 2);
}
} else {
int w = results[i].rect[2] - results[i].rect[0];
int h = results[i].rect[3] - results[i].rect[1];
cv::Rect roi = cv::Rect(results[i].rect[0], results[i].rect[1], w, h);
// Draw roi object, text, and background
cv::rectangle(vis_img, roi, roi_color, 2);
}
origin.x = results[i].rect[0];
origin.y = results[i].rect[1];
// Configure text background
cv::Rect text_back = cv::Rect(results[i].rect[0],
results[i].rect[1] - text_size.height,
text_size.width,
text_size.height);
// Draw text, and background
cv::rectangle(vis_img, text_back, roi_color, -1);
cv::putText(vis_img,
text,
origin,
font_face,
font_scale,
cv::Scalar(255, 255, 255),
thickness);
}
return vis_img;
}
void ObjectDetector::Preprocess(const cv::Mat& ori_im) {
// Clone the image : keep the original mat for postprocess
cv::Mat im = ori_im.clone();
cv::cvtColor(im, im, cv::COLOR_BGR2RGB);
preprocessor_.Run(&im, &inputs_);
}
void ObjectDetector::Postprocess(const std::vector<cv::Mat> mats,
std::vector<PaddleDetection::ObjectResult>* result,
std::vector<int> bbox_num,
bool is_rbox = false) {
result->clear();
int start_idx = 0;
for (int im_id = 0; im_id < mats.size(); im_id++) {
cv::Mat raw_mat = mats[im_id];
int rh = 1;
int rw = 1;
if (config_.arch_ == "Face") {
rh = raw_mat.rows;
rw = raw_mat.cols;
}
for (int j = start_idx; j < start_idx + bbox_num[im_id]; j++) {
if (is_rbox) {
// Class id
int class_id = static_cast<int>(round(output_data_[0 + j * 10]));
// Confidence score
float score = output_data_[1 + j * 10];
int x1 = (output_data_[2 + j * 10] * rw);
int y1 = (output_data_[3 + j * 10] * rh);
int x2 = (output_data_[4 + j * 10] * rw);
int y2 = (output_data_[5 + j * 10] * rh);
int x3 = (output_data_[6 + j * 10] * rw);
int y3 = (output_data_[7 + j * 10] * rh);
int x4 = (output_data_[8 + j * 10] * rw);
int y4 = (output_data_[9 + j * 10] * rh);
PaddleDetection::ObjectResult result_item;
result_item.rect = {x1, y1, x2, y2, x3, y3, x4, y4};
result_item.class_id = class_id;
result_item.confidence = score;
result->push_back(result_item);
} else {
// Class id
int class_id = static_cast<int>(round(output_data_[0 + j * 6]));
// Confidence score
float score = output_data_[1 + j * 6];
int xmin = (output_data_[2 + j * 6] * rw);
int ymin = (output_data_[3 + j * 6] * rh);
int xmax = (output_data_[4 + j * 6] * rw);
int ymax = (output_data_[5 + j * 6] * rh);
int wd = xmax - xmin;
int hd = ymax - ymin;
PaddleDetection::ObjectResult result_item;
result_item.rect = {xmin, ymin, xmax, ymax};
result_item.class_id = class_id;
result_item.confidence = score;
result->push_back(result_item);
}
}
start_idx += bbox_num[im_id];
}
}
void ObjectDetector::Predict(const std::vector<cv::Mat>& imgs,
const double threshold,
const int warmup,
const int repeats,
std::vector<PaddleDetection::ObjectResult>* result,
std::vector<int>* bbox_num,
std::vector<double>* times) {
auto preprocess_start = std::chrono::steady_clock::now();
int batch_size = imgs.size();
// in_data_batch
std::vector<float> in_data_all;
std::vector<float> im_shape_all(batch_size * 2);
std::vector<float> scale_factor_all(batch_size * 2);
// Preprocess image
for (int bs_idx = 0; bs_idx < batch_size; bs_idx++) {
cv::Mat im = imgs.at(bs_idx);
Preprocess(im);
im_shape_all[bs_idx * 2] = inputs_.im_shape_[0];
im_shape_all[bs_idx * 2 + 1] = inputs_.im_shape_[1];
scale_factor_all[bs_idx * 2] = inputs_.scale_factor_[0];
scale_factor_all[bs_idx * 2 + 1] = inputs_.scale_factor_[1];
// TODO: reduce cost time
in_data_all.insert(
in_data_all.end(), inputs_.im_data_.begin(), inputs_.im_data_.end());
}
auto preprocess_end = std::chrono::steady_clock::now();
std::vector<const float *> output_data_list_;
// Prepare input tensor
auto input_names = predictor_->GetInputNames();
for (const auto& tensor_name : input_names) {
auto in_tensor = predictor_->GetInputByName(tensor_name);
if (tensor_name == "image") {
int rh = inputs_.in_net_shape_[0];
int rw = inputs_.in_net_shape_[1];
in_tensor->Resize({batch_size, 3, rh, rw});
auto* inptr = in_tensor->mutable_data<float>();
std::copy_n(in_data_all.data(), in_data_all.size(), inptr);
} else if (tensor_name == "im_shape") {
in_tensor->Resize({batch_size, 2});
auto* inptr = in_tensor->mutable_data<float>();
std::copy_n(im_shape_all.data(), im_shape_all.size(), inptr);
} else if (tensor_name == "scale_factor") {
in_tensor->Resize({batch_size, 2});
auto* inptr = in_tensor->mutable_data<float>();
std::copy_n(scale_factor_all.data(), scale_factor_all.size(), inptr);
}
}
// Run predictor
// warmup
for (int i = 0; i < warmup; i++) {
predictor_->Run();
// Get output tensor
auto output_names = predictor_->GetOutputNames();
if (config_.arch_ == "PicoDet") {
for (int j = 0; j < output_names.size(); j++) {
auto output_tensor = predictor_->GetTensor(output_names[j]);
const float* outptr = output_tensor->data<float>();
std::vector<int64_t> output_shape = output_tensor->shape();
output_data_list_.push_back(outptr);
}
} else {
auto out_tensor = predictor_->GetTensor(output_names[0]);
auto out_bbox_num = predictor_->GetTensor(output_names[1]);
}
}
bool is_rbox = false;
auto inference_start = std::chrono::steady_clock::now();
for (int i = 0; i < repeats; i++) {
predictor_->Run();
}
auto inference_end = std::chrono::steady_clock::now();
auto postprocess_start = std::chrono::steady_clock::now();
// Get output tensor
output_data_list_.clear();
int num_class = 80;
int reg_max = 7;
auto output_names = predictor_->GetOutputNames();
// TODO: Unified model output.
if (config_.arch_ == "PicoDet") {
for (int i = 0; i < output_names.size(); i++) {
auto output_tensor = predictor_->GetTensor(output_names[i]);
const float* outptr = output_tensor->data<float>();
std::vector<int64_t> output_shape = output_tensor->shape();
if (i == 0) {
num_class = output_shape[2];
}
if (i == config_.fpn_stride_.size()) {
reg_max = output_shape[2] / 4 - 1;
}
output_data_list_.push_back(outptr);
}
} else {
auto output_tensor = predictor_->GetTensor(output_names[0]);
auto output_shape = output_tensor->shape();
auto out_bbox_num = predictor_->GetTensor(output_names[1]);
auto out_bbox_num_shape = out_bbox_num->shape();
// Calculate output length
int output_size = 1;
for (int j = 0; j < output_shape.size(); ++j) {
output_size *= output_shape[j];
}
is_rbox = output_shape[output_shape.size() - 1] % 10 == 0;
if (output_size < 6) {
std::cerr << "[WARNING] No object detected." << std::endl;
}
output_data_.resize(output_size);
std::copy_n(
output_tensor->mutable_data<float>(), output_size, output_data_.data());
int out_bbox_num_size = 1;
for (int j = 0; j < out_bbox_num_shape.size(); ++j) {
out_bbox_num_size *= out_bbox_num_shape[j];
}
out_bbox_num_data_.resize(out_bbox_num_size);
std::copy_n(out_bbox_num->mutable_data<int>(),
out_bbox_num_size,
out_bbox_num_data_.data());
}
// Postprocessing result
result->clear();
if (config_.arch_ == "PicoDet") {
PaddleDetection::PicoDetPostProcess(
result, output_data_list_, config_.fpn_stride_,
inputs_.im_shape_, inputs_.scale_factor_,
config_.nms_info_["score_threshold"].as<float>(),
config_.nms_info_["nms_threshold"].as<float>(), num_class, reg_max);
bbox_num->push_back(result->size());
} else {
Postprocess(imgs, result, out_bbox_num_data_, is_rbox);
bbox_num->clear();
for (int k = 0; k < out_bbox_num_data_.size(); k++) {
int tmp = out_bbox_num_data_[k];
bbox_num->push_back(tmp);
}
}
auto postprocess_end = std::chrono::steady_clock::now();
std::chrono::duration<float> preprocess_diff =
preprocess_end - preprocess_start;
times->push_back(double(preprocess_diff.count() * 1000));
std::chrono::duration<float> inference_diff = inference_end - inference_start;
times->push_back(double(inference_diff.count() / repeats * 1000));
std::chrono::duration<float> postprocess_diff =
postprocess_end - postprocess_start;
times->push_back(double(postprocess_diff.count() * 1000));
}
std::vector<int> GenerateColorMap(int num_class) {
auto colormap = std::vector<int>(3 * num_class, 0);
for (int i = 0; i < num_class; ++i) {
int j = 0;
int lab = i;
while (lab) {
colormap[i * 3] |= (((lab >> 0) & 1) << (7 - j));
colormap[i * 3 + 1] |= (((lab >> 1) & 1) << (7 - j));
colormap[i * 3 + 2] |= (((lab >> 2) & 1) << (7 - j));
++j;
lab >>= 3;
}
}
return colormap;
}
} // namespace PaddleDetection
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// The code is based on:
// https://github.com/RangiLyu/nanodet/blob/main/demo_mnn/nanodet_mnn.cpp
#include "include/picodet_postprocess.h"
namespace PaddleDetection {
float fast_exp(float x) {
union {
uint32_t i;
float f;
} v{};
v.i = (1 << 23) * (1.4426950409 * x + 126.93490512f);
return v.f;
}
template <typename _Tp>
int activation_function_softmax(const _Tp *src, _Tp *dst, int length) {
const _Tp alpha = *std::max_element(src, src + length);
_Tp denominator{0};
for (int i = 0; i < length; ++i) {
dst[i] = fast_exp(src[i] - alpha);
denominator += dst[i];
}
for (int i = 0; i < length; ++i) {
dst[i] /= denominator;
}
return 0;
}
// PicoDet decode
PaddleDetection::ObjectResult
disPred2Bbox(const float *&dfl_det, int label, float score, int x, int y,
int stride, std::vector<float> im_shape, int reg_max) {
float ct_x = (x + 0.5) * stride;
float ct_y = (y + 0.5) * stride;
std::vector<float> dis_pred;
dis_pred.resize(4);
for (int i = 0; i < 4; i++) {
float dis = 0;
float *dis_after_sm = new float[reg_max + 1];
activation_function_softmax(dfl_det + i * (reg_max + 1), dis_after_sm,
reg_max + 1);
for (int j = 0; j < reg_max + 1; j++) {
dis += j * dis_after_sm[j];
}
dis *= stride;
dis_pred[i] = dis;
delete[] dis_after_sm;
}
int xmin = (int)(std::max)(ct_x - dis_pred[0], .0f);
int ymin = (int)(std::max)(ct_y - dis_pred[1], .0f);
int xmax = (int)(std::min)(ct_x + dis_pred[2], (float)im_shape[0]);
int ymax = (int)(std::min)(ct_y + dis_pred[3], (float)im_shape[1]);
PaddleDetection::ObjectResult result_item;
result_item.rect = {xmin, ymin, xmax, ymax};
result_item.class_id = label;
result_item.confidence = score;
return result_item;
}
void PicoDetPostProcess(std::vector<PaddleDetection::ObjectResult> *results,
std::vector<const float *> outs,
std::vector<int> fpn_stride,
std::vector<float> im_shape,
std::vector<float> scale_factor, float score_threshold,
float nms_threshold, int num_class, int reg_max) {
std::vector<std::vector<PaddleDetection::ObjectResult>> bbox_results;
bbox_results.resize(num_class);
int in_h = im_shape[0], in_w = im_shape[1];
for (int i = 0; i < fpn_stride.size(); ++i) {
int feature_h = ceil((float)in_h / fpn_stride[i]);
int feature_w = ceil((float)in_w / fpn_stride[i]);
for (int idx = 0; idx < feature_h * feature_w; idx++) {
const float *scores = outs[i] + (idx * num_class);
int row = idx / feature_w;
int col = idx % feature_w;
float score = 0;
int cur_label = 0;
for (int label = 0; label < num_class; label++) {
if (scores[label] > score) {
score = scores[label];
cur_label = label;
}
}
if (score > score_threshold) {
const float *bbox_pred =
outs[i + fpn_stride.size()] + (idx * 4 * (reg_max + 1));
bbox_results[cur_label].push_back(
disPred2Bbox(bbox_pred, cur_label, score, col, row, fpn_stride[i],
im_shape, reg_max));
}
}
}
for (int i = 0; i < (int)bbox_results.size(); i++) {
PaddleDetection::nms(bbox_results[i], nms_threshold);
for (auto box : bbox_results[i]) {
box.rect[0] = box.rect[0] / scale_factor[1];
box.rect[2] = box.rect[2] / scale_factor[1];
box.rect[1] = box.rect[1] / scale_factor[0];
box.rect[3] = box.rect[3] / scale_factor[0];
results->push_back(box);
}
}
}
} // namespace PaddleDetection
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <string>
#include <thread>
#include <vector>
#include "include/preprocess_op.h"
namespace PaddleDetection {
void InitInfo::Run(cv::Mat* im, ImageBlob* data) {
data->im_shape_ = {static_cast<float>(im->rows),
static_cast<float>(im->cols)};
data->scale_factor_ = {1., 1.};
data->in_net_shape_ = {static_cast<float>(im->rows),
static_cast<float>(im->cols)};
}
void NormalizeImage::Run(cv::Mat* im, ImageBlob* data) {
double e = 1.0;
if (is_scale_) {
e *= 1./255.0;
}
(*im).convertTo(*im, CV_32FC3, e);
for (int h = 0; h < im->rows; h++) {
for (int w = 0; w < im->cols; w++) {
im->at<cv::Vec3f>(h, w)[0] =
(im->at<cv::Vec3f>(h, w)[0] - mean_[0]) / scale_[0];
im->at<cv::Vec3f>(h, w)[1] =
(im->at<cv::Vec3f>(h, w)[1] - mean_[1]) / scale_[1];
im->at<cv::Vec3f>(h, w)[2] =
(im->at<cv::Vec3f>(h, w)[2] - mean_[2]) / scale_[2];
}
}
}
void Permute::Run(cv::Mat* im, ImageBlob* data) {
(*im).convertTo(*im, CV_32FC3);
int rh = im->rows;
int rw = im->cols;
int rc = im->channels();
(data->im_data_).resize(rc * rh * rw);
float* base = (data->im_data_).data();
for (int i = 0; i < rc; ++i) {
cv::extractChannel(*im, cv::Mat(rh, rw, CV_32FC1, base + i * rh * rw), i);
}
}
void Resize::Run(cv::Mat* im, ImageBlob* data) {
auto resize_scale = GenerateScale(*im);
data->im_shape_ = {static_cast<float>(im->cols * resize_scale.first),
static_cast<float>(im->rows * resize_scale.second)};
data->in_net_shape_ = {static_cast<float>(im->cols * resize_scale.first),
static_cast<float>(im->rows * resize_scale.second)};
cv::resize(
*im, *im, cv::Size(), resize_scale.first, resize_scale.second, interp_);
data->im_shape_ = {
static_cast<float>(im->rows), static_cast<float>(im->cols),
};
data->scale_factor_ = {
resize_scale.second, resize_scale.first,
};
}
std::pair<float, float> Resize::GenerateScale(const cv::Mat& im) {
std::pair<float, float> resize_scale;
int origin_w = im.cols;
int origin_h = im.rows;
if (keep_ratio_) {
int im_size_max = std::max(origin_w, origin_h);
int im_size_min = std::min(origin_w, origin_h);
int target_size_max =
*std::max_element(target_size_.begin(), target_size_.end());
int target_size_min =
*std::min_element(target_size_.begin(), target_size_.end());
float scale_min =
static_cast<float>(target_size_min) / static_cast<float>(im_size_min);
float scale_max =
static_cast<float>(target_size_max) / static_cast<float>(im_size_max);
float scale_ratio = std::min(scale_min, scale_max);
resize_scale = {scale_ratio, scale_ratio};
} else {
resize_scale.first =
static_cast<float>(target_size_[1]) / static_cast<float>(origin_w);
resize_scale.second =
static_cast<float>(target_size_[0]) / static_cast<float>(origin_h);
}
return resize_scale;
}
void PadStride::Run(cv::Mat* im, ImageBlob* data) {
if (stride_ <= 0) {
return;
}
int rc = im->channels();
int rh = im->rows;
int rw = im->cols;
int nh = (rh / stride_) * stride_ + (rh % stride_ != 0) * stride_;
int nw = (rw / stride_) * stride_ + (rw % stride_ != 0) * stride_;
cv::copyMakeBorder(
*im, *im, 0, nh - rh, 0, nw - rw, cv::BORDER_CONSTANT, cv::Scalar(0));
data->in_net_shape_ = {
static_cast<float>(im->rows), static_cast<float>(im->cols),
};
}
void TopDownEvalAffine::Run(cv::Mat* im, ImageBlob* data) {
cv::resize(*im, *im, cv::Size(trainsize_[0], trainsize_[1]), 0, 0, interp_);
// todo: Simd::ResizeBilinear();
data->in_net_shape_ = {
static_cast<float>(trainsize_[1]), static_cast<float>(trainsize_[0]),
};
}
// Preprocessor op running order
const std::vector<std::string> Preprocessor::RUN_ORDER = {"InitInfo",
"TopDownEvalAffine",
"Resize",
"NormalizeImage",
"PadStride",
"Permute"};
void Preprocessor::Run(cv::Mat* im, ImageBlob* data) {
for (const auto& name : RUN_ORDER) {
if (ops_.find(name) != ops_.end()) {
ops_[name]->Run(im, data);
}
}
}
void CropImg(cv::Mat& img,
cv::Mat& crop_img,
std::vector<int>& area,
std::vector<float>& center,
std::vector<float>& scale,
float expandratio) {
int crop_x1 = std::max(0, area[0]);
int crop_y1 = std::max(0, area[1]);
int crop_x2 = std::min(img.cols - 1, area[2]);
int crop_y2 = std::min(img.rows - 1, area[3]);
int center_x = (crop_x1 + crop_x2) / 2.;
int center_y = (crop_y1 + crop_y2) / 2.;
int half_h = (crop_y2 - crop_y1) / 2.;
int half_w = (crop_x2 - crop_x1) / 2.;
if (half_h * 3 > half_w * 4) {
half_w = static_cast<int>(half_h * 0.75);
} else {
half_h = static_cast<int>(half_w * 4 / 3);
}
crop_x1 =
std::max(0, center_x - static_cast<int>(half_w * (1 + expandratio)));
crop_y1 =
std::max(0, center_y - static_cast<int>(half_h * (1 + expandratio)));
crop_x2 = std::min(img.cols - 1,
static_cast<int>(center_x + half_w * (1 + expandratio)));
crop_y2 = std::min(img.rows - 1,
static_cast<int>(center_y + half_h * (1 + expandratio)));
crop_img =
img(cv::Range(crop_y1, crop_y2 + 1), cv::Range(crop_x1, crop_x2 + 1));
center.clear();
center.emplace_back((crop_x1 + crop_x2) / 2);
center.emplace_back((crop_y1 + crop_y2) / 2);
scale.clear();
scale.emplace_back((crop_x2 - crop_x1));
scale.emplace_back((crop_y2 - crop_y1));
}
} // namespace PaddleDetection
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "include/utils.h"
namespace PaddleDetection {
void nms(std::vector<ObjectResult> &input_boxes, float nms_threshold) {
std::sort(input_boxes.begin(),
input_boxes.end(),
[](ObjectResult a, ObjectResult b) { return a.confidence > b.confidence; });
std::vector<float> vArea(input_boxes.size());
for (int i = 0; i < int(input_boxes.size()); ++i) {
vArea[i] = (input_boxes.at(i).rect[2] - input_boxes.at(i).rect[0] + 1)
* (input_boxes.at(i).rect[3] - input_boxes.at(i).rect[1] + 1);
}
for (int i = 0; i < int(input_boxes.size()); ++i) {
for (int j = i + 1; j < int(input_boxes.size());) {
float xx1 = (std::max)(input_boxes[i].rect[0], input_boxes[j].rect[0]);
float yy1 = (std::max)(input_boxes[i].rect[1], input_boxes[j].rect[1]);
float xx2 = (std::min)(input_boxes[i].rect[2], input_boxes[j].rect[2]);
float yy2 = (std::min)(input_boxes[i].rect[3], input_boxes[j].rect[3]);
float w = (std::max)(float(0), xx2 - xx1 + 1);
float h = (std::max)(float(0), yy2 - yy1 + 1);
float inter = w * h;
float ovr = inter / (vArea[i] + vArea[j] - inter);
if (ovr >= nms_threshold) {
input_boxes.erase(input_boxes.begin() + j);
vArea.erase(vArea.begin() + j);
}
else {
j++;
}
}
}
}
} // namespace PaddleDetection
# Python端预测部署
在PaddlePaddle中预测引擎和训练引擎底层有着不同的优化方法, 预测引擎使用了AnalysisPredictor,专门针对推理进行了优化,是基于[C++预测库](https://www.paddlepaddle.org.cn/documentation/docs/zh/advanced_guide/inference_deployment/inference/native_infer.html)的Python接口,该引擎可以对模型进行多项图优化,减少不必要的内存拷贝。如果用户在部署已训练模型的过程中对性能有较高的要求,我们提供了独立于PaddleDetection的预测脚本,方便用户直接集成部署。
Python端预测部署主要包含两个步骤:
- 导出预测模型
- 基于Python进行预测
## 1. 导出预测模型
PaddleDetection在训练过程包括网络的前向和优化器相关参数,而在部署过程中,我们只需要前向参数,具体参考:[导出模型](../EXPORT_MODEL.md),例如
```bash
# 导出YOLOv3检测模型
python tools/export_model.py -c configs/yolov3/yolov3_darknet53_270e_coco.yml --output_dir=./inference_model \
-o weights=https://paddledet.bj.bcebos.com/models/yolov3_darknet53_270e_coco.pdparams
```
导出后目录下,包括`infer_cfg.yml`, `model.pdiparams`, `model.pdiparams.info`, `model.pdmodel`四个文件。
## 2. 基于Python的预测
### 2.1 通用检测
在终端输入以下命令进行预测:
```bash
python deploy/python/infer.py --model_dir=./output_inference/yolov3_darknet53_270e_coco --image_file=./demo/000000014439.jpg --device=GPU
```
# 参数说明
参数说明如下:
| 参数 | 是否必须| 含义 |
|-------|-------|---------------------------------------------------------------------------------------------|
| --model_dir | Yes| 上述导出的模型路径 |
| --image_file | Option | 需要预测的图片 |
| --image_dir | Option | 要预测的图片文件夹路径 |
| --video_file | Option | 需要预测的视频 |
| --camera_id | Option | 用来预测的摄像头ID,默认为-1(表示不使用摄像头预测,可设置为:0 - (摄像头数目-1) ),预测过程中在可视化界面按`q`退出输出预测结果到:output/output.mp4 |
| --device | Option | 运行时的设备,可选择`CPU/GPU/XPU`,默认为`CPU` |
| --run_mode | Option | 使用GPU时,默认为paddle, 可选(paddle/trt_fp32/trt_fp16/trt_int8) |
| --batch_size | Option | 预测时的batch size,在指定`image_dir`时有效,默认为1 |
| --threshold | Option| 预测得分的阈值,默认为0.5 |
| --output_dir | Option| 可视化结果保存的根目录,默认为output/ |
| --run_benchmark | Option| 是否运行benchmark,同时需指定`--image_file``--image_dir`,默认为False |
| --enable_mkldnn | Option | CPU预测中是否开启MKLDNN加速,默认为False |
| --cpu_threads | Option| 设置cpu线程数,默认为1 |
| --trt_calib_mode | Option| TensorRT是否使用校准功能,默认为False。使用TensorRT的int8功能时,需设置为True,使用PaddleSlim量化后的模型时需要设置为False |
| --save_images | Option| 是否保存可视化结果 |
| --save_results | Option| 是否在文件夹下将图片的预测结果以JSON的形式保存 |
说明:
- 参数优先级顺序:`camera_id` > `video_file` > `image_dir` > `image_file`
- run_mode:paddle代表使用AnalysisPredictor,精度float32来推理,其他参数指用AnalysisPredictor,TensorRT不同精度来推理。
- 如果安装的PaddlePaddle不支持基于TensorRT进行预测,需要自行编译,详细可参考[预测库编译教程](https://paddleinference.paddlepaddle.org.cn/user_guides/source_compile.html)
- --run_benchmark如果设置为True,则需要安装依赖`pip install pynvml psutil GPUtil`
- 如果需要使用导出模型在coco数据集上进行评估,请在推理时添加`--save_results``--use_coco_category`参数用以保存coco评估所需要的json文件
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import logging
import paddle
import paddle.inference as paddle_infer
from pathlib import Path
CUR_DIR = os.path.dirname(os.path.abspath(__file__))
LOG_PATH_ROOT = f"{CUR_DIR}/../../output"
class PaddleInferBenchmark(object):
def __init__(self,
config,
model_info: dict={},
data_info: dict={},
perf_info: dict={},
resource_info: dict={},
**kwargs):
"""
Construct PaddleInferBenchmark Class to format logs.
args:
config(paddle.inference.Config): paddle inference config
model_info(dict): basic model info
{'model_name': 'resnet50'
'precision': 'fp32'}
data_info(dict): input data info
{'batch_size': 1
'shape': '3,224,224'
'data_num': 1000}
perf_info(dict): performance result
{'preprocess_time_s': 1.0
'inference_time_s': 2.0
'postprocess_time_s': 1.0
'total_time_s': 4.0}
resource_info(dict):
cpu and gpu resources
{'cpu_rss': 100
'gpu_rss': 100
'gpu_util': 60}
"""
# PaddleInferBenchmark Log Version
self.log_version = "1.0.3"
# Paddle Version
self.paddle_version = paddle.__version__
self.paddle_commit = paddle.__git_commit__
paddle_infer_info = paddle_infer.get_version()
self.paddle_branch = paddle_infer_info.strip().split(': ')[-1]
# model info
self.model_info = model_info
# data info
self.data_info = data_info
# perf info
self.perf_info = perf_info
try:
# required value
self.model_name = model_info['model_name']
self.precision = model_info['precision']
self.batch_size = data_info['batch_size']
self.shape = data_info['shape']
self.data_num = data_info['data_num']
self.inference_time_s = round(perf_info['inference_time_s'], 4)
except:
self.print_help()
raise ValueError(
"Set argument wrong, please check input argument and its type")
self.preprocess_time_s = perf_info.get('preprocess_time_s', 0)
self.postprocess_time_s = perf_info.get('postprocess_time_s', 0)
self.with_tracker = True if 'tracking_time_s' in perf_info else False
self.tracking_time_s = perf_info.get('tracking_time_s', 0)
self.total_time_s = perf_info.get('total_time_s', 0)
self.inference_time_s_90 = perf_info.get("inference_time_s_90", "")
self.inference_time_s_99 = perf_info.get("inference_time_s_99", "")
self.succ_rate = perf_info.get("succ_rate", "")
self.qps = perf_info.get("qps", "")
# conf info
self.config_status = self.parse_config(config)
# mem info
if isinstance(resource_info, dict):
self.cpu_rss_mb = int(resource_info.get('cpu_rss_mb', 0))
self.cpu_vms_mb = int(resource_info.get('cpu_vms_mb', 0))
self.cpu_shared_mb = int(resource_info.get('cpu_shared_mb', 0))
self.cpu_dirty_mb = int(resource_info.get('cpu_dirty_mb', 0))
self.cpu_util = round(resource_info.get('cpu_util', 0), 2)
self.gpu_rss_mb = int(resource_info.get('gpu_rss_mb', 0))
self.gpu_util = round(resource_info.get('gpu_util', 0), 2)
self.gpu_mem_util = round(resource_info.get('gpu_mem_util', 0), 2)
else:
self.cpu_rss_mb = 0
self.cpu_vms_mb = 0
self.cpu_shared_mb = 0
self.cpu_dirty_mb = 0
self.cpu_util = 0
self.gpu_rss_mb = 0
self.gpu_util = 0
self.gpu_mem_util = 0
# init benchmark logger
self.benchmark_logger()
def benchmark_logger(self):
"""
benchmark logger
"""
# remove other logging handler
for handler in logging.root.handlers[:]:
logging.root.removeHandler(handler)
# Init logger
FORMAT = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
log_output = f"{LOG_PATH_ROOT}/{self.model_name}.log"
Path(f"{LOG_PATH_ROOT}").mkdir(parents=True, exist_ok=True)
logging.basicConfig(
level=logging.INFO,
format=FORMAT,
handlers=[
logging.FileHandler(
filename=log_output, mode='w'),
logging.StreamHandler(),
])
self.logger = logging.getLogger(__name__)
self.logger.info(
f"Paddle Inference benchmark log will be saved to {log_output}")
def parse_config(self, config) -> dict:
"""
parse paddle predictor config
args:
config(paddle.inference.Config): paddle inference config
return:
config_status(dict): dict style config info
"""
if isinstance(config, paddle_infer.Config):
config_status = {}
config_status['runtime_device'] = "gpu" if config.use_gpu(
) else "cpu"
config_status['ir_optim'] = config.ir_optim()
config_status['enable_tensorrt'] = config.tensorrt_engine_enabled()
config_status['precision'] = self.precision
config_status['enable_mkldnn'] = config.mkldnn_enabled()
config_status[
'cpu_math_library_num_threads'] = config.cpu_math_library_num_threads(
)
elif isinstance(config, dict):
config_status['runtime_device'] = config.get('runtime_device', "")
config_status['ir_optim'] = config.get('ir_optim', "")
config_status['enable_tensorrt'] = config.get('enable_tensorrt', "")
config_status['precision'] = config.get('precision', "")
config_status['enable_mkldnn'] = config.get('enable_mkldnn', "")
config_status['cpu_math_library_num_threads'] = config.get(
'cpu_math_library_num_threads', "")
else:
self.print_help()
raise ValueError(
"Set argument config wrong, please check input argument and its type"
)
return config_status
def report(self, identifier=None):
"""
print log report
args:
identifier(string): identify log
"""
if identifier:
identifier = f"[{identifier}]"
else:
identifier = ""
self.logger.info("\n")
self.logger.info(
"---------------------- Paddle info ----------------------")
self.logger.info(f"{identifier} paddle_version: {self.paddle_version}")
self.logger.info(f"{identifier} paddle_commit: {self.paddle_commit}")
self.logger.info(f"{identifier} paddle_branch: {self.paddle_branch}")
self.logger.info(f"{identifier} log_api_version: {self.log_version}")
self.logger.info(
"----------------------- Conf info -----------------------")
self.logger.info(
f"{identifier} runtime_device: {self.config_status['runtime_device']}"
)
self.logger.info(
f"{identifier} ir_optim: {self.config_status['ir_optim']}")
self.logger.info(f"{identifier} enable_memory_optim: {True}")
self.logger.info(
f"{identifier} enable_tensorrt: {self.config_status['enable_tensorrt']}"
)
self.logger.info(
f"{identifier} enable_mkldnn: {self.config_status['enable_mkldnn']}")
self.logger.info(
f"{identifier} cpu_math_library_num_threads: {self.config_status['cpu_math_library_num_threads']}"
)
self.logger.info(
"----------------------- Model info ----------------------")
self.logger.info(f"{identifier} model_name: {self.model_name}")
self.logger.info(f"{identifier} precision: {self.precision}")
self.logger.info(
"----------------------- Data info -----------------------")
self.logger.info(f"{identifier} batch_size: {self.batch_size}")
self.logger.info(f"{identifier} input_shape: {self.shape}")
self.logger.info(f"{identifier} data_num: {self.data_num}")
self.logger.info(
"----------------------- Perf info -----------------------")
self.logger.info(
f"{identifier} cpu_rss(MB): {self.cpu_rss_mb}, cpu_vms: {self.cpu_vms_mb}, cpu_shared_mb: {self.cpu_shared_mb}, cpu_dirty_mb: {self.cpu_dirty_mb}, cpu_util: {self.cpu_util}%"
)
self.logger.info(
f"{identifier} gpu_rss(MB): {self.gpu_rss_mb}, gpu_util: {self.gpu_util}%, gpu_mem_util: {self.gpu_mem_util}%"
)
self.logger.info(
f"{identifier} total time spent(s): {self.total_time_s}")
if self.with_tracker:
self.logger.info(
f"{identifier} preprocess_time(ms): {round(self.preprocess_time_s*1000, 1)}, "
f"inference_time(ms): {round(self.inference_time_s*1000, 1)}, "
f"postprocess_time(ms): {round(self.postprocess_time_s*1000, 1)}, "
f"tracking_time(ms): {round(self.tracking_time_s*1000, 1)}")
else:
self.logger.info(
f"{identifier} preprocess_time(ms): {round(self.preprocess_time_s*1000, 1)}, "
f"inference_time(ms): {round(self.inference_time_s*1000, 1)}, "
f"postprocess_time(ms): {round(self.postprocess_time_s*1000, 1)}"
)
if self.inference_time_s_90:
self.looger.info(
f"{identifier} 90%_cost: {self.inference_time_s_90}, 99%_cost: {self.inference_time_s_99}, succ_rate: {self.succ_rate}"
)
if self.qps:
self.logger.info(f"{identifier} QPS: {self.qps}")
def print_help(self):
"""
print function help
"""
print("""Usage:
==== Print inference benchmark logs. ====
config = paddle.inference.Config()
model_info = {'model_name': 'resnet50'
'precision': 'fp32'}
data_info = {'batch_size': 1
'shape': '3,224,224'
'data_num': 1000}
perf_info = {'preprocess_time_s': 1.0
'inference_time_s': 2.0
'postprocess_time_s': 1.0
'total_time_s': 4.0}
resource_info = {'cpu_rss_mb': 100
'gpu_rss_mb': 100
'gpu_util': 60}
log = PaddleInferBenchmark(config, model_info, data_info, perf_info, resource_info)
log('Test')
""")
def __call__(self, identifier=None):
"""
__call__
args:
identifier(string): identify log
"""
self.report(identifier)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import yaml
import glob
import json
from pathlib import Path
from functools import reduce
import cv2
import numpy as np
import math
import paddle
from paddle.inference import Config
from paddle.inference import create_predictor
import sys
# add deploy path of PaddleDetection to sys.path
parent_path = os.path.abspath(os.path.join(__file__, *(['..'])))
sys.path.insert(0, parent_path)
from benchmark_utils import PaddleInferBenchmark
from preprocess import preprocess, Resize, NormalizeImage, Permute, Pad, decode_image
from visualize import visualize_box_mask
from utils import argsparser, Timer, get_current_memory_mb, multiclass_nms, coco_clsid2catid
# Global dictionary
SUPPORT_MODELS = {
'YOLO', 'PPYOLOE', 'YOLOX', 'YOLOF', 'YOLOv5', 'RTMDet', 'YOLOv6', 'YOLOv7', 'YOLOv8', 'DETR'
}
def bench_log(detector, img_list, model_info, batch_size=1, name=None):
mems = {
'cpu_rss_mb': detector.cpu_mem / len(img_list),
'gpu_rss_mb': detector.gpu_mem / len(img_list),
'gpu_util': detector.gpu_util * 100 / len(img_list)
}
perf_info = detector.det_times.report(average=True)
data_info = {
'batch_size': batch_size,
'shape': "dynamic_shape",
'data_num': perf_info['img_num']
}
log = PaddleInferBenchmark(detector.config, model_info, data_info,
perf_info, mems)
log(name)
class Detector(object):
"""
Args:
pred_config (object): config of model, defined by `Config(model_dir)`
model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
run_mode (str): mode of running(paddle/trt_fp32/trt_fp16)
batch_size (int): size of pre batch in inference
trt_min_shape (int): min shape for dynamic shape in trt
trt_max_shape (int): max shape for dynamic shape in trt
trt_opt_shape (int): opt shape for dynamic shape in trt
trt_calib_mode (bool): If the model is produced by TRT offline quantitative
calibration, trt_calib_mode need to set True
cpu_threads (int): cpu threads
enable_mkldnn (bool): whether to open MKLDNN
enable_mkldnn_bfloat16 (bool): whether to turn on mkldnn bfloat16
output_dir (str): The path of output
threshold (float): The threshold of score for visualization
delete_shuffle_pass (bool): whether to remove shuffle_channel_detect_pass in TensorRT.
Used by action model.
"""
def __init__(self,
model_dir,
device='CPU',
run_mode='paddle',
batch_size=1,
trt_min_shape=1,
trt_max_shape=1280,
trt_opt_shape=640,
trt_calib_mode=False,
cpu_threads=1,
enable_mkldnn=False,
enable_mkldnn_bfloat16=False,
output_dir='output',
threshold=0.5,
delete_shuffle_pass=False):
self.pred_config = self.set_config(model_dir)
self.predictor, self.config = load_predictor(
model_dir,
self.pred_config.arch,
run_mode=run_mode,
batch_size=batch_size,
min_subgraph_size=self.pred_config.min_subgraph_size,
device=device,
use_dynamic_shape=self.pred_config.use_dynamic_shape,
trt_min_shape=trt_min_shape,
trt_max_shape=trt_max_shape,
trt_opt_shape=trt_opt_shape,
trt_calib_mode=trt_calib_mode,
cpu_threads=cpu_threads,
enable_mkldnn=enable_mkldnn,
enable_mkldnn_bfloat16=enable_mkldnn_bfloat16,
delete_shuffle_pass=delete_shuffle_pass)
self.det_times = Timer()
self.cpu_mem, self.gpu_mem, self.gpu_util = 0, 0, 0
self.batch_size = batch_size
self.output_dir = output_dir
self.threshold = threshold
def set_config(self, model_dir):
return PredictConfig(model_dir)
def preprocess(self, image_list):
preprocess_ops = []
for op_info in self.pred_config.preprocess_infos:
new_op_info = op_info.copy()
op_type = new_op_info.pop('type')
preprocess_ops.append(eval(op_type)(**new_op_info))
input_im_lst = []
input_im_info_lst = []
for im_path in image_list:
im, im_info = preprocess(im_path, preprocess_ops)
input_im_lst.append(im)
input_im_info_lst.append(im_info)
inputs = create_inputs(input_im_lst, input_im_info_lst)
input_names = self.predictor.get_input_names()
for i in range(len(input_names)):
input_tensor = self.predictor.get_input_handle(input_names[i])
if input_names[i] == 'x':
input_tensor.copy_from_cpu(inputs['image'])
else:
input_tensor.copy_from_cpu(inputs[input_names[i]])
return inputs
def postprocess(self, inputs, result):
# postprocess output of predictor
np_boxes_num = result['boxes_num']
assert isinstance(np_boxes_num, np.ndarray), \
'`np_boxes_num` should be a `numpy.ndarray`'
result = {k: v for k, v in result.items() if v is not None}
return result
def filter_box(self, result, threshold):
np_boxes_num = result['boxes_num']
boxes = result['boxes']
start_idx = 0
filter_boxes = []
filter_num = []
for i in range(len(np_boxes_num)):
boxes_num = np_boxes_num[i]
boxes_i = boxes[start_idx:start_idx + boxes_num, :]
idx = boxes_i[:, 1] > threshold
filter_boxes_i = boxes_i[idx, :]
filter_boxes.append(filter_boxes_i)
filter_num.append(filter_boxes_i.shape[0])
start_idx += boxes_num
boxes = np.concatenate(filter_boxes)
filter_num = np.array(filter_num)
filter_res = {'boxes': boxes, 'boxes_num': filter_num}
return filter_res
def predict(self, repeats=1, run_benchmark=False):
'''
Args:
repeats (int): repeats number for prediction
Returns:
result (dict): include 'boxes': np.ndarray: shape:[N,6], N: number of box,
matix element:[class, score, x_min, y_min, x_max, y_max]
MaskRCNN's result include 'masks': np.ndarray:
shape: [N, im_h, im_w]
'''
# model prediction
np_boxes_num, np_boxes, np_masks = np.array([0]), None, None
if run_benchmark:
for i in range(repeats):
self.predictor.run()
paddle.device.cuda.synchronize()
result = dict(
boxes=np_boxes, masks=np_masks, boxes_num=np_boxes_num)
return result
for i in range(repeats):
self.predictor.run()
output_names = self.predictor.get_output_names()
boxes_tensor = self.predictor.get_output_handle(output_names[0])
np_boxes = boxes_tensor.copy_to_cpu()
if len(output_names) == 1:
# some exported model can not get tensor 'bbox_num'
np_boxes_num = np.array([len(np_boxes)])
else:
boxes_num = self.predictor.get_output_handle(output_names[1])
np_boxes_num = boxes_num.copy_to_cpu()
if self.pred_config.mask:
masks_tensor = self.predictor.get_output_handle(output_names[2])
np_masks = masks_tensor.copy_to_cpu()
result = dict(boxes=np_boxes, masks=np_masks, boxes_num=np_boxes_num)
return result
def merge_batch_result(self, batch_result):
if len(batch_result) == 1:
return batch_result[0]
res_key = batch_result[0].keys()
results = {k: [] for k in res_key}
for res in batch_result:
for k, v in res.items():
results[k].append(v)
for k, v in results.items():
if k not in ['masks', 'segm']:
results[k] = np.concatenate(v)
return results
def get_timer(self):
return self.det_times
def predict_image_slice(self,
img_list,
slice_size=[640, 640],
overlap_ratio=[0.25, 0.25],
combine_method='nms',
match_threshold=0.6,
match_metric='ios',
run_benchmark=False,
repeats=1,
visual=True,
save_results=False):
# slice infer only support bs=1
results = []
try:
import sahi
from sahi.slicing import slice_image
except Exception as e:
print(
'sahi not found, plaese install sahi. '
'for example: `pip install sahi`, see https://github.com/obss/sahi.'
)
raise e
num_classes = len(self.pred_config.labels)
for i in range(len(img_list)):
ori_image = img_list[i]
slice_image_result = sahi.slicing.slice_image(
image=ori_image,
slice_height=slice_size[0],
slice_width=slice_size[1],
overlap_height_ratio=overlap_ratio[0],
overlap_width_ratio=overlap_ratio[1])
sub_img_num = len(slice_image_result)
merged_bboxs = []
print('slice to {} sub_samples.', sub_img_num)
batch_image_list = [
slice_image_result.images[_ind] for _ind in range(sub_img_num)
]
if run_benchmark:
# preprocess
inputs = self.preprocess(batch_image_list) # warmup
self.det_times.preprocess_time_s.start()
inputs = self.preprocess(batch_image_list)
self.det_times.preprocess_time_s.end()
# model prediction
result = self.predict(repeats=50, run_benchmark=True) # warmup
self.det_times.inference_time_s.start()
result = self.predict(repeats=repeats, run_benchmark=True)
self.det_times.inference_time_s.end(repeats=repeats)
# postprocess
result_warmup = self.postprocess(inputs, result) # warmup
self.det_times.postprocess_time_s.start()
result = self.postprocess(inputs, result)
self.det_times.postprocess_time_s.end()
self.det_times.img_num += 1
cm, gm, gu = get_current_memory_mb()
self.cpu_mem += cm
self.gpu_mem += gm
self.gpu_util += gu
else:
# preprocess
self.det_times.preprocess_time_s.start()
inputs = self.preprocess(batch_image_list)
self.det_times.preprocess_time_s.end()
# model prediction
self.det_times.inference_time_s.start()
result = self.predict()
self.det_times.inference_time_s.end()
# postprocess
self.det_times.postprocess_time_s.start()
result = self.postprocess(inputs, result)
self.det_times.postprocess_time_s.end()
self.det_times.img_num += 1
st, ed = 0, result['boxes_num'][0] # start_index, end_index
for _ind in range(sub_img_num):
boxes_num = result['boxes_num'][_ind]
ed = st + boxes_num
shift_amount = slice_image_result.starting_pixels[_ind]
result['boxes'][st:ed][:, 2:4] = result['boxes'][
st:ed][:, 2:4] + shift_amount
result['boxes'][st:ed][:, 4:6] = result['boxes'][
st:ed][:, 4:6] + shift_amount
merged_bboxs.append(result['boxes'][st:ed])
st = ed
merged_results = {'boxes': []}
if combine_method == 'nms':
final_boxes = multiclass_nms(
np.concatenate(merged_bboxs), num_classes, match_threshold,
match_metric)
merged_results['boxes'] = np.concatenate(final_boxes)
elif combine_method == 'concat':
merged_results['boxes'] = np.concatenate(merged_bboxs)
else:
raise ValueError(
"Now only support 'nms' or 'concat' to fuse detection results."
)
merged_results['boxes_num'] = np.array(
[len(merged_results['boxes'])], dtype=np.int32)
if visual:
visualize(
[ori_image], # should be list
merged_results,
self.pred_config.labels,
output_dir=self.output_dir,
threshold=self.threshold)
results.append(merged_results)
print('Test iter {}'.format(i))
results = self.merge_batch_result(results)
if save_results:
Path(self.output_dir).mkdir(exist_ok=True)
self.save_coco_results(
img_list, results, use_coco_category=FLAGS.use_coco_category)
return results
def predict_image(self,
image_list,
run_benchmark=False,
repeats=1,
visual=True,
save_results=False):
batch_loop_cnt = math.ceil(float(len(image_list)) / self.batch_size)
results = []
for i in range(batch_loop_cnt):
start_index = i * self.batch_size
end_index = min((i + 1) * self.batch_size, len(image_list))
batch_image_list = image_list[start_index:end_index]
if run_benchmark:
# preprocess
inputs = self.preprocess(batch_image_list) # warmup
self.det_times.preprocess_time_s.start()
inputs = self.preprocess(batch_image_list)
self.det_times.preprocess_time_s.end()
# model prediction
result = self.predict(repeats=50, run_benchmark=True) # warmup
self.det_times.inference_time_s.start()
result = self.predict(repeats=repeats, run_benchmark=True)
self.det_times.inference_time_s.end(repeats=repeats)
# postprocess
result_warmup = self.postprocess(inputs, result) # warmup
self.det_times.postprocess_time_s.start()
result = self.postprocess(inputs, result)
self.det_times.postprocess_time_s.end()
self.det_times.img_num += len(batch_image_list)
cm, gm, gu = get_current_memory_mb()
self.cpu_mem += cm
self.gpu_mem += gm
self.gpu_util += gu
else:
# preprocess
self.det_times.preprocess_time_s.start()
inputs = self.preprocess(batch_image_list)
self.det_times.preprocess_time_s.end()
# model prediction
self.det_times.inference_time_s.start()
result = self.predict()
self.det_times.inference_time_s.end()
# postprocess
self.det_times.postprocess_time_s.start()
result = self.postprocess(inputs, result)
self.det_times.postprocess_time_s.end()
self.det_times.img_num += len(batch_image_list)
if visual:
visualize(
batch_image_list,
result,
self.pred_config.labels,
output_dir=self.output_dir,
threshold=self.threshold)
results.append(result)
print('Test iter {}'.format(i))
results = self.merge_batch_result(results)
if save_results:
Path(self.output_dir).mkdir(exist_ok=True)
self.save_coco_results(
image_list, results, use_coco_category=FLAGS.use_coco_category)
return results
def predict_video(self, video_file, camera_id):
video_out_name = 'output.mp4'
if camera_id != -1:
capture = cv2.VideoCapture(camera_id)
else:
capture = cv2.VideoCapture(video_file)
video_out_name = os.path.split(video_file)[-1]
# Get Video info : resolution, fps, frame count
width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(capture.get(cv2.CAP_PROP_FPS))
frame_count = int(capture.get(cv2.CAP_PROP_FRAME_COUNT))
print("fps: %d, frame_count: %d" % (fps, frame_count))
if not os.path.exists(self.output_dir):
os.makedirs(self.output_dir)
out_path = os.path.join(self.output_dir, video_out_name)
fourcc = cv2.VideoWriter_fourcc(* 'mp4v')
writer = cv2.VideoWriter(out_path, fourcc, fps, (width, height))
index = 1
while (1):
ret, frame = capture.read()
if not ret:
break
print('detect frame: %d' % (index))
index += 1
results = self.predict_image([frame[:, :, ::-1]], visual=False)
im = visualize_box_mask(
frame,
results,
self.pred_config.labels,
threshold=self.threshold)
im = np.array(im)
writer.write(im)
if camera_id != -1:
cv2.imshow('Mask Detection', im)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
writer.release()
def save_coco_results(self, image_list, results, use_coco_category=False):
bbox_results = []
mask_results = []
idx = 0
print("Start saving coco json files...")
for i, box_num in enumerate(results['boxes_num']):
file_name = os.path.split(image_list[i])[-1]
if use_coco_category:
img_id = int(os.path.splitext(file_name)[0])
else:
img_id = i
if 'boxes' in results:
boxes = results['boxes'][idx:idx + box_num].tolist()
bbox_results.extend([{
'image_id': img_id,
'category_id': coco_clsid2catid[int(box[0])] \
if use_coco_category else int(box[0]),
'file_name': file_name,
'bbox': [box[2], box[3], box[4] - box[2],
box[5] - box[3]], # xyxy -> xywh
'score': box[1]} for box in boxes])
if 'masks' in results:
import pycocotools.mask as mask_util
boxes = results['boxes'][idx:idx + box_num].tolist()
masks = results['masks'][i][:box_num].astype(np.uint8)
seg_res = []
for box, mask in zip(boxes, masks):
rle = mask_util.encode(
np.array(
mask[:, :, None], dtype=np.uint8, order="F"))[0]
if 'counts' in rle:
rle['counts'] = rle['counts'].decode("utf8")
seg_res.append({
'image_id': img_id,
'category_id': coco_clsid2catid[int(box[0])] \
if use_coco_category else int(box[0]),
'file_name': file_name,
'segmentation': rle,
'score': box[1]})
mask_results.extend(seg_res)
idx += box_num
if bbox_results:
bbox_file = os.path.join(self.output_dir, "bbox.json")
with open(bbox_file, 'w') as f:
json.dump(bbox_results, f)
print(f"The bbox result is saved to {bbox_file}")
if mask_results:
mask_file = os.path.join(self.output_dir, "mask.json")
with open(mask_file, 'w') as f:
json.dump(mask_results, f)
print(f"The mask result is saved to {mask_file}")
def create_inputs(imgs, im_info):
"""generate input for different model type
Args:
imgs (list(numpy)): list of images (np.ndarray)
im_info (list(dict)): list of image info
Returns:
inputs (dict): input of model
"""
inputs = {}
im_shape = []
scale_factor = []
if len(imgs) == 1:
inputs['image'] = np.array((imgs[0], )).astype('float32')
inputs['im_shape'] = np.array(
(im_info[0]['im_shape'], )).astype('float32')
inputs['scale_factor'] = np.array(
(im_info[0]['scale_factor'], )).astype('float32')
return inputs
for e in im_info:
im_shape.append(np.array((e['im_shape'], )).astype('float32'))
scale_factor.append(np.array((e['scale_factor'], )).astype('float32'))
inputs['im_shape'] = np.concatenate(im_shape, axis=0)
inputs['scale_factor'] = np.concatenate(scale_factor, axis=0)
imgs_shape = [[e.shape[1], e.shape[2]] for e in imgs]
max_shape_h = max([e[0] for e in imgs_shape])
max_shape_w = max([e[1] for e in imgs_shape])
padding_imgs = []
for img in imgs:
im_c, im_h, im_w = img.shape[:]
padding_im = np.zeros(
(im_c, max_shape_h, max_shape_w), dtype=np.float32)
padding_im[:, :im_h, :im_w] = img
padding_imgs.append(padding_im)
inputs['image'] = np.stack(padding_imgs, axis=0)
return inputs
class PredictConfig():
"""set config of preprocess, postprocess and visualize
Args:
model_dir (str): root path of model.yml
"""
def __init__(self, model_dir):
# parsing Yaml config for Preprocess
deploy_file = os.path.join(model_dir, 'infer_cfg.yml')
with open(deploy_file) as f:
yml_conf = yaml.safe_load(f)
self.check_model(yml_conf)
self.arch = yml_conf['arch']
self.preprocess_infos = yml_conf['Preprocess']
self.min_subgraph_size = yml_conf['min_subgraph_size']
self.labels = yml_conf['label_list']
self.mask = False
self.use_dynamic_shape = yml_conf['use_dynamic_shape']
if 'mask' in yml_conf:
self.mask = yml_conf['mask']
self.tracker = None
if 'tracker' in yml_conf:
self.tracker = yml_conf['tracker']
if 'NMS' in yml_conf:
self.nms = yml_conf['NMS']
if 'fpn_stride' in yml_conf:
self.fpn_stride = yml_conf['fpn_stride']
if self.arch == 'RCNN' and yml_conf.get('export_onnx', False):
print(
'The RCNN export model is used for ONNX and it only supports batch_size = 1'
)
self.print_config()
def check_model(self, yml_conf):
"""
Raises:
ValueError: loaded model not in supported model type
"""
for support_model in SUPPORT_MODELS:
if support_model in yml_conf['arch']:
return True
raise ValueError("Unsupported arch: {}, expect {}".format(yml_conf[
'arch'], SUPPORT_MODELS))
def print_config(self):
print('----------- Model Configuration -----------')
print('%s: %s' % ('Model Arch', self.arch))
print('%s: ' % ('Transform Order'))
for op_info in self.preprocess_infos:
print('--%s: %s' % ('transform op', op_info['type']))
print('--------------------------------------------')
def load_predictor(model_dir,
arch,
run_mode='paddle',
batch_size=1,
device='CPU',
min_subgraph_size=3,
use_dynamic_shape=False,
trt_min_shape=1,
trt_max_shape=1280,
trt_opt_shape=640,
trt_calib_mode=False,
cpu_threads=1,
enable_mkldnn=False,
enable_mkldnn_bfloat16=False,
delete_shuffle_pass=False):
"""set AnalysisConfig, generate AnalysisPredictor
Args:
model_dir (str): root path of __model__ and __params__
device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
run_mode (str): mode of running(paddle/trt_fp32/trt_fp16/trt_int8)
use_dynamic_shape (bool): use dynamic shape or not
trt_min_shape (int): min shape for dynamic shape in trt
trt_max_shape (int): max shape for dynamic shape in trt
trt_opt_shape (int): opt shape for dynamic shape in trt
trt_calib_mode (bool): If the model is produced by TRT offline quantitative
calibration, trt_calib_mode need to set True
delete_shuffle_pass (bool): whether to remove shuffle_channel_detect_pass in TensorRT.
Used by action model.
Returns:
predictor (PaddlePredictor): AnalysisPredictor
Raises:
ValueError: predict by TensorRT need device == 'GPU'.
"""
if device != 'GPU' and run_mode != 'paddle':
raise ValueError(
"Predict by TensorRT mode: {}, expect device=='GPU', but device == {}"
.format(run_mode, device))
infer_model = os.path.join(model_dir, 'model.pdmodel')
infer_params = os.path.join(model_dir, 'model.pdiparams')
if not os.path.exists(infer_model):
infer_model = os.path.join(model_dir, 'inference.pdmodel')
infer_params = os.path.join(model_dir, 'inference.pdiparams')
if not os.path.exists(infer_model):
raise ValueError(
"Cannot find any inference model in dir: {},".format(model_dir))
config = Config(infer_model, infer_params)
if device == 'GPU':
# initial GPU memory(M), device ID
config.enable_use_gpu(200, 0)
# optimize graph and fuse op
config.switch_ir_optim(True)
elif device == 'XPU':
if config.lite_engine_enabled():
config.enable_lite_engine()
config.enable_xpu(10 * 1024 * 1024)
elif device == 'NPU':
if config.lite_engine_enabled():
config.enable_lite_engine()
config.enable_custom_device('npu')
else:
config.disable_gpu()
config.set_cpu_math_library_num_threads(cpu_threads)
if enable_mkldnn:
try:
# cache 10 different shapes for mkldnn to avoid memory leak
config.set_mkldnn_cache_capacity(10)
config.enable_mkldnn()
if enable_mkldnn_bfloat16:
config.enable_mkldnn_bfloat16()
except Exception as e:
print(
"The current environment does not support `mkldnn`, so disable mkldnn."
)
pass
precision_map = {
'trt_int8': Config.Precision.Int8,
'trt_fp32': Config.Precision.Float32,
'trt_fp16': Config.Precision.Half
}
if run_mode in precision_map.keys():
config.enable_tensorrt_engine(
workspace_size=(1 << 25) * batch_size,
max_batch_size=batch_size,
min_subgraph_size=min_subgraph_size,
precision_mode=precision_map[run_mode],
use_static=False,
use_calib_mode=trt_calib_mode)
if FLAGS.collect_trt_shape_info:
config.collect_shape_range_info(FLAGS.tuned_trt_shape_file)
elif os.path.exists(FLAGS.tuned_trt_shape_file):
print(f'Use dynamic shape file: '
f'{FLAGS.tuned_trt_shape_file} for TRT...')
config.enable_tuned_tensorrt_dynamic_shape(
FLAGS.tuned_trt_shape_file, True)
if use_dynamic_shape:
min_input_shape = {
'image': [batch_size, 3, trt_min_shape, trt_min_shape],
'scale_factor': [batch_size, 2]
}
max_input_shape = {
'image': [batch_size, 3, trt_max_shape, trt_max_shape],
'scale_factor': [batch_size, 2]
}
opt_input_shape = {
'image': [batch_size, 3, trt_opt_shape, trt_opt_shape],
'scale_factor': [batch_size, 2]
}
config.set_trt_dynamic_shape_info(min_input_shape, max_input_shape,
opt_input_shape)
print('trt set dynamic shape done!')
# disable print log when predict
config.disable_glog_info()
# enable shared memory
config.enable_memory_optim()
# disable feed, fetch OP, needed by zero_copy_run
config.switch_use_feed_fetch_ops(False)
if delete_shuffle_pass:
config.delete_pass("shuffle_channel_detect_pass")
predictor = create_predictor(config)
return predictor, config
def get_test_images(infer_dir, infer_img):
"""
Get image path list in TEST mode
"""
assert infer_img is not None or infer_dir is not None, \
"--image_file or --image_dir should be set"
assert infer_img is None or os.path.isfile(infer_img), \
"{} is not a file".format(infer_img)
assert infer_dir is None or os.path.isdir(infer_dir), \
"{} is not a directory".format(infer_dir)
# infer_img has a higher priority
if infer_img and os.path.isfile(infer_img):
return [infer_img]
images = set()
infer_dir = os.path.abspath(infer_dir)
assert os.path.isdir(infer_dir), \
"infer_dir {} is not a directory".format(infer_dir)
exts = ['jpg', 'jpeg', 'png', 'bmp']
exts += [ext.upper() for ext in exts]
for ext in exts:
images.update(glob.glob('{}/*.{}'.format(infer_dir, ext)))
images = list(images)
assert len(images) > 0, "no image found in {}".format(infer_dir)
print("Found {} inference images in total.".format(len(images)))
return images
def visualize(image_list, result, labels, output_dir='output/', threshold=0.5):
# visualize the predict result
start_idx = 0
for idx, image_file in enumerate(image_list):
im_bboxes_num = result['boxes_num'][idx]
im_results = {}
if 'boxes' in result:
im_results['boxes'] = result['boxes'][start_idx:start_idx +
im_bboxes_num, :]
if 'masks' in result:
im_results['masks'] = result['masks'][start_idx:start_idx +
im_bboxes_num, :]
if 'segm' in result:
im_results['segm'] = result['segm'][start_idx:start_idx +
im_bboxes_num, :]
if 'label' in result:
im_results['label'] = result['label'][start_idx:start_idx +
im_bboxes_num]
if 'score' in result:
im_results['score'] = result['score'][start_idx:start_idx +
im_bboxes_num]
start_idx += im_bboxes_num
im = visualize_box_mask(
image_file, im_results, labels, threshold=threshold)
img_name = os.path.split(image_file)[-1]
if not os.path.exists(output_dir):
os.makedirs(output_dir)
out_path = os.path.join(output_dir, img_name)
im.save(out_path, quality=95)
print("save result to: " + out_path)
def print_arguments(args):
print('----------- Running Arguments -----------')
for arg, value in sorted(vars(args).items()):
print('%s: %s' % (arg, value))
print('------------------------------------------')
def main():
deploy_file = os.path.join(FLAGS.model_dir, 'infer_cfg.yml')
with open(deploy_file) as f:
yml_conf = yaml.safe_load(f)
arch = yml_conf['arch']
detector_func = 'Detector'
detector = eval(detector_func)(
FLAGS.model_dir,
device=FLAGS.device,
run_mode=FLAGS.run_mode,
batch_size=FLAGS.batch_size,
trt_min_shape=FLAGS.trt_min_shape,
trt_max_shape=FLAGS.trt_max_shape,
trt_opt_shape=FLAGS.trt_opt_shape,
trt_calib_mode=FLAGS.trt_calib_mode,
cpu_threads=FLAGS.cpu_threads,
enable_mkldnn=FLAGS.enable_mkldnn,
enable_mkldnn_bfloat16=FLAGS.enable_mkldnn_bfloat16,
threshold=FLAGS.threshold,
output_dir=FLAGS.output_dir)
# predict from video file or camera video stream
if FLAGS.video_file is not None or FLAGS.camera_id != -1:
detector.predict_video(FLAGS.video_file, FLAGS.camera_id)
else:
# predict from image
if FLAGS.image_dir is None and FLAGS.image_file is not None:
assert FLAGS.batch_size == 1, "batch_size should be 1, when image_file is not None"
img_list = get_test_images(FLAGS.image_dir, FLAGS.image_file)
if FLAGS.slice_infer:
detector.predict_image_slice(
img_list,
FLAGS.slice_size,
FLAGS.overlap_ratio,
FLAGS.combine_method,
FLAGS.match_threshold,
FLAGS.match_metric,
visual=FLAGS.save_images,
save_results=FLAGS.save_results)
else:
detector.predict_image(
img_list,
FLAGS.run_benchmark,
repeats=100,
visual=FLAGS.save_images,
save_results=FLAGS.save_results)
if not FLAGS.run_benchmark:
detector.det_times.info(average=True)
else:
mode = FLAGS.run_mode
model_dir = FLAGS.model_dir
model_info = {
'model_name': model_dir.strip('/').split('/')[-1],
'precision': mode.split('_')[-1]
}
bench_log(detector, img_list, model_info, name='DET')
if __name__ == '__main__':
paddle.enable_static()
parser = argsparser()
FLAGS = parser.parse_args()
print_arguments(FLAGS)
FLAGS.device = FLAGS.device.upper()
assert FLAGS.device in ['CPU', 'GPU', 'XPU', 'NPU'
], "device should be CPU, GPU, XPU or NPU"
assert not FLAGS.use_gpu, "use_gpu has been deprecated, please use --device"
assert not (
FLAGS.enable_mkldnn == False and FLAGS.enable_mkldnn_bfloat16 == True
), 'To enable mkldnn bfloat, please turn on both enable_mkldnn and enable_mkldnn_bfloat16'
main()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import cv2
import numpy as np
from PIL import Image
def decode_image(im_file, im_info):
"""read rgb image
Args:
im_file (str|np.ndarray): input can be image path or np.ndarray
im_info (dict): info of image
Returns:
im (np.ndarray): processed image (np.ndarray)
im_info (dict): info of processed image
"""
if isinstance(im_file, str):
with open(im_file, 'rb') as f:
im_read = f.read()
data = np.frombuffer(im_read, dtype='uint8')
im = cv2.imdecode(data, 1) # BGR mode, but need RGB mode
im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
else:
im = im_file
im_info['im_shape'] = np.array(im.shape[:2], dtype=np.float32)
im_info['scale_factor'] = np.array([1., 1.], dtype=np.float32)
return im, im_info
class Resize(object):
"""resize image by target_size and max_size
Args:
target_size (int): the target size of image
keep_ratio (bool): whether keep_ratio or not, default true
interp (int): method of resize
"""
def __init__(self, target_size, keep_ratio=True, interp=cv2.INTER_LINEAR):
if isinstance(target_size, int):
target_size = [target_size, target_size]
self.target_size = target_size
self.keep_ratio = keep_ratio
self.interp = interp
def __call__(self, im, im_info):
"""
Args:
im (np.ndarray): image (np.ndarray)
im_info (dict): info of image
Returns:
im (np.ndarray): processed image (np.ndarray)
im_info (dict): info of processed image
"""
assert len(self.target_size) == 2
assert self.target_size[0] > 0 and self.target_size[1] > 0
im_channel = im.shape[2]
im_scale_y, im_scale_x = self.generate_scale(im)
im = cv2.resize(
im,
None,
None,
fx=im_scale_x,
fy=im_scale_y,
interpolation=self.interp)
im_info['im_shape'] = np.array(im.shape[:2]).astype('float32')
im_info['scale_factor'] = np.array(
[im_scale_y, im_scale_x]).astype('float32')
return im, im_info
def generate_scale(self, im):
"""
Args:
im (np.ndarray): image (np.ndarray)
Returns:
im_scale_x: the resize ratio of X
im_scale_y: the resize ratio of Y
"""
origin_shape = im.shape[:2]
im_c = im.shape[2]
if self.keep_ratio:
im_size_min = np.min(origin_shape)
im_size_max = np.max(origin_shape)
target_size_min = np.min(self.target_size)
target_size_max = np.max(self.target_size)
im_scale = float(target_size_min) / float(im_size_min)
if np.round(im_scale * im_size_max) > target_size_max:
im_scale = float(target_size_max) / float(im_size_max)
im_scale_x = im_scale
im_scale_y = im_scale
else:
resize_h, resize_w = self.target_size
im_scale_y = resize_h / float(origin_shape[0])
im_scale_x = resize_w / float(origin_shape[1])
return im_scale_y, im_scale_x
class NormalizeImage(object):
"""normalize image
Args:
mean (list): im - mean
std (list): im / std
is_scale (bool): whether need im / 255
norm_type (str): type in ['mean_std', 'none']
"""
def __init__(self, mean, std, is_scale=True, norm_type='mean_std'):
self.mean = mean
self.std = std
self.is_scale = is_scale
self.norm_type = norm_type
def __call__(self, im, im_info):
"""
Args:
im (np.ndarray): image (np.ndarray)
im_info (dict): info of image
Returns:
im (np.ndarray): processed image (np.ndarray)
im_info (dict): info of processed image
"""
im = im.astype(np.float32, copy=False)
if self.is_scale:
scale = 1.0 / 255.0
im *= scale
if self.norm_type == 'mean_std':
mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
std = np.array(self.std)[np.newaxis, np.newaxis, :]
im -= mean
im /= std
return im, im_info
class Permute(object):
"""permute image
Args:
to_bgr (bool): whether convert RGB to BGR
channel_first (bool): whether convert HWC to CHW
"""
def __init__(self, ):
super(Permute, self).__init__()
def __call__(self, im, im_info):
"""
Args:
im (np.ndarray): image (np.ndarray)
im_info (dict): info of image
Returns:
im (np.ndarray): processed image (np.ndarray)
im_info (dict): info of processed image
"""
im = im.transpose((2, 0, 1)).copy()
return im, im_info
class PadStride(object):
""" padding image for model with FPN, instead PadBatch(pad_to_stride) in original config
Args:
stride (bool): model with FPN need image shape % stride == 0
"""
def __init__(self, stride=0):
self.coarsest_stride = stride
def __call__(self, im, im_info):
"""
Args:
im (np.ndarray): image (np.ndarray)
im_info (dict): info of image
Returns:
im (np.ndarray): processed image (np.ndarray)
im_info (dict): info of processed image
"""
coarsest_stride = self.coarsest_stride
if coarsest_stride <= 0:
return im, im_info
im_c, im_h, im_w = im.shape
pad_h = int(np.ceil(float(im_h) / coarsest_stride) * coarsest_stride)
pad_w = int(np.ceil(float(im_w) / coarsest_stride) * coarsest_stride)
padding_im = np.zeros((im_c, pad_h, pad_w), dtype=np.float32)
padding_im[:, :im_h, :im_w] = im
return padding_im, im_info
class LetterBoxResize(object):
def __init__(self, target_size):
"""
Resize image to target size, convert normalized xywh to pixel xyxy
format ([x_center, y_center, width, height] -> [x0, y0, x1, y1]).
Args:
target_size (int|list): image target size.
"""
super(LetterBoxResize, self).__init__()
if isinstance(target_size, int):
target_size = [target_size, target_size]
self.target_size = target_size
def letterbox(self, img, height, width, color=(127.5, 127.5, 127.5)):
# letterbox: resize a rectangular image to a padded rectangular
shape = img.shape[:2] # [height, width]
ratio_h = float(height) / shape[0]
ratio_w = float(width) / shape[1]
ratio = min(ratio_h, ratio_w)
new_shape = (round(shape[1] * ratio),
round(shape[0] * ratio)) # [width, height]
padw = (width - new_shape[0]) / 2
padh = (height - new_shape[1]) / 2
top, bottom = round(padh - 0.1), round(padh + 0.1)
left, right = round(padw - 0.1), round(padw + 0.1)
img = cv2.resize(
img, new_shape, interpolation=cv2.INTER_AREA) # resized, no border
img = cv2.copyMakeBorder(
img, top, bottom, left, right, cv2.BORDER_CONSTANT,
value=color) # padded rectangular
return img, ratio, padw, padh
def __call__(self, im, im_info):
"""
Args:
im (np.ndarray): image (np.ndarray)
im_info (dict): info of image
Returns:
im (np.ndarray): processed image (np.ndarray)
im_info (dict): info of processed image
"""
assert len(self.target_size) == 2
assert self.target_size[0] > 0 and self.target_size[1] > 0
height, width = self.target_size
h, w = im.shape[:2]
im, ratio, padw, padh = self.letterbox(im, height=height, width=width)
new_shape = [round(h * ratio), round(w * ratio)]
im_info['im_shape'] = np.array(new_shape, dtype=np.float32)
im_info['scale_factor'] = np.array([ratio, ratio], dtype=np.float32)
return im, im_info
class Pad(object):
def __init__(self, size, fill_value=[114.0, 114.0, 114.0]):
"""
Pad image to a specified size.
Args:
size (list[int]): image target size
fill_value (list[float]): rgb value of pad area, default (114.0, 114.0, 114.0)
"""
super(Pad, self).__init__()
if isinstance(size, int):
size = [size, size]
self.size = size
self.fill_value = fill_value
def __call__(self, im, im_info):
im_h, im_w = im.shape[:2]
h, w = self.size
if h == im_h and w == im_w:
im = im.astype(np.float32)
return im, im_info
canvas = np.ones((h, w, 3), dtype=np.float32)
canvas *= np.array(self.fill_value, dtype=np.float32)
canvas[0:im_h, 0:im_w, :] = im.astype(np.float32)
im = canvas
return im, im_info
def preprocess(im, preprocess_ops):
# process image by preprocess_ops
im_info = {
'scale_factor': np.array(
[1., 1.], dtype=np.float32),
'im_shape': None,
}
im, im_info = decode_image(im, im_info)
for operator in preprocess_ops:
im, im_info = operator(im, im_info)
return im, im_info
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import time
import os
import ast
import argparse
import numpy as np
def argsparser():
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
"--model_dir",
type=str,
default=None,
help=("Directory include:'model.pdiparams', 'model.pdmodel', "
"'infer_cfg.yml', created by tools/export_model.py."),
required=True)
parser.add_argument(
"--image_file", type=str, default=None, help="Path of image file.")
parser.add_argument(
"--image_dir",
type=str,
default=None,
help="Dir of image file, `image_file` has a higher priority.")
parser.add_argument(
"--batch_size", type=int, default=1, help="batch_size for inference.")
parser.add_argument(
"--video_file",
type=str,
default=None,
help="Path of video file, `video_file` or `camera_id` has a highest priority."
)
parser.add_argument(
"--camera_id",
type=int,
default=-1,
help="device id of camera to predict.")
parser.add_argument(
"--threshold", type=float, default=0.5, help="Threshold of score.")
parser.add_argument(
"--output_dir",
type=str,
default="output",
help="Directory of output visualization files.")
parser.add_argument(
"--run_mode",
type=str,
default='paddle',
help="mode of running(paddle/trt_fp32/trt_fp16/trt_int8)")
parser.add_argument(
"--device",
type=str,
default='cpu',
help="Choose the device you want to run, it can be: CPU/GPU/XPU/NPU, default is CPU."
)
parser.add_argument(
"--use_gpu",
type=ast.literal_eval,
default=False,
help="Deprecated, please use `--device`.")
parser.add_argument(
"--run_benchmark",
type=ast.literal_eval,
default=False,
help="Whether to predict a image_file repeatedly for benchmark")
parser.add_argument(
"--enable_mkldnn",
type=ast.literal_eval,
default=False,
help="Whether use mkldnn with CPU.")
parser.add_argument(
"--enable_mkldnn_bfloat16",
type=ast.literal_eval,
default=False,
help="Whether use mkldnn bfloat16 inference with CPU.")
parser.add_argument(
"--cpu_threads", type=int, default=1, help="Num of threads with CPU.")
parser.add_argument(
"--trt_min_shape", type=int, default=1, help="min_shape for TensorRT.")
parser.add_argument(
"--trt_max_shape",
type=int,
default=1280,
help="max_shape for TensorRT.")
parser.add_argument(
"--trt_opt_shape",
type=int,
default=640,
help="opt_shape for TensorRT.")
parser.add_argument(
"--trt_calib_mode",
type=bool,
default=False,
help="If the model is produced by TRT offline quantitative "
"calibration, trt_calib_mode need to set True.")
parser.add_argument(
'--save_images',
type=ast.literal_eval,
default=True,
help='Save visualization image results.')
parser.add_argument(
"--save_results",
action='store_true',
default=False,
help="Whether save detection result to file using coco format")
parser.add_argument(
'--use_coco_category',
action='store_true',
default=False,
help='Whether to use the coco format dictionary `clsid2catid`')
parser.add_argument(
"--slice_infer",
action='store_true',
help="Whether to slice the image and merge the inference results for small object detection."
)
parser.add_argument(
'--slice_size',
nargs='+',
type=int,
default=[640, 640],
help="Height of the sliced image.")
parser.add_argument(
"--overlap_ratio",
nargs='+',
type=float,
default=[0.25, 0.25],
help="Overlap height ratio of the sliced image.")
parser.add_argument(
"--combine_method",
type=str,
default='nms',
help="Combine method of the sliced images' detection results, choose in ['nms', 'nmm', 'concat']."
)
parser.add_argument(
"--match_threshold",
type=float,
default=0.6,
help="Combine method matching threshold.")
parser.add_argument(
"--match_metric",
type=str,
default='ios',
help="Combine method matching metric, choose in ['iou', 'ios'].")
parser.add_argument(
"--collect_trt_shape_info",
action='store_true',
default=False,
help="Whether to collect dynamic shape before using tensorrt.")
parser.add_argument(
"--tuned_trt_shape_file",
type=str,
default="shape_range_info.pbtxt",
help="Path of a dynamic shape file for tensorrt.")
return parser
class Times(object):
def __init__(self):
self.time = 0.
# start time
self.st = 0.
# end time
self.et = 0.
def start(self):
self.st = time.time()
def end(self, repeats=1, accumulative=True):
self.et = time.time()
if accumulative:
self.time += (self.et - self.st) / repeats
else:
self.time = (self.et - self.st) / repeats
def reset(self):
self.time = 0.
self.st = 0.
self.et = 0.
def value(self):
return round(self.time, 4)
class Timer(Times):
def __init__(self, with_tracker=False):
super(Timer, self).__init__()
self.with_tracker = with_tracker
self.preprocess_time_s = Times()
self.inference_time_s = Times()
self.postprocess_time_s = Times()
self.tracking_time_s = Times()
self.img_num = 0
def info(self, average=False):
pre_time = self.preprocess_time_s.value()
infer_time = self.inference_time_s.value()
post_time = self.postprocess_time_s.value()
track_time = self.tracking_time_s.value()
total_time = pre_time + infer_time + post_time
if self.with_tracker:
total_time = total_time + track_time
total_time = round(total_time, 4)
print("------------------ Inference Time Info ----------------------")
print("total_time(ms): {}, img_num: {}".format(total_time * 1000,
self.img_num))
preprocess_time = round(pre_time / max(1, self.img_num),
4) if average else pre_time
postprocess_time = round(post_time / max(1, self.img_num),
4) if average else post_time
inference_time = round(infer_time / max(1, self.img_num),
4) if average else infer_time
tracking_time = round(track_time / max(1, self.img_num),
4) if average else track_time
average_latency = total_time / max(1, self.img_num)
qps = 0
if total_time > 0:
qps = 1 / average_latency
print("average latency time(ms): {:.2f}, QPS: {:2f}".format(
average_latency * 1000, qps))
if self.with_tracker:
print(
"preprocess_time(ms): {:.2f}, inference_time(ms): {:.2f}, postprocess_time(ms): {:.2f}, tracking_time(ms): {:.2f}".
format(preprocess_time * 1000, inference_time * 1000,
postprocess_time * 1000, tracking_time * 1000))
else:
print(
"preprocess_time(ms): {:.2f}, inference_time(ms): {:.2f}, postprocess_time(ms): {:.2f}".
format(preprocess_time * 1000, inference_time * 1000,
postprocess_time * 1000))
def report(self, average=False):
dic = {}
pre_time = self.preprocess_time_s.value()
infer_time = self.inference_time_s.value()
post_time = self.postprocess_time_s.value()
track_time = self.tracking_time_s.value()
dic['preprocess_time_s'] = round(pre_time / max(1, self.img_num),
4) if average else pre_time
dic['inference_time_s'] = round(infer_time / max(1, self.img_num),
4) if average else infer_time
dic['postprocess_time_s'] = round(post_time / max(1, self.img_num),
4) if average else post_time
dic['img_num'] = self.img_num
total_time = pre_time + infer_time + post_time
if self.with_tracker:
dic['tracking_time_s'] = round(track_time / max(1, self.img_num),
4) if average else track_time
total_time = total_time + track_time
dic['total_time_s'] = round(total_time, 4)
return dic
def get_current_memory_mb():
"""
It is used to Obtain the memory usage of the CPU and GPU during the running of the program.
And this function Current program is time-consuming.
"""
import pynvml
import psutil
import GPUtil
gpu_id = int(os.environ.get('CUDA_VISIBLE_DEVICES', 0))
pid = os.getpid()
p = psutil.Process(pid)
info = p.memory_full_info()
cpu_mem = info.uss / 1024. / 1024.
gpu_mem = 0
gpu_percent = 0
gpus = GPUtil.getGPUs()
if gpu_id is not None and len(gpus) > 0:
gpu_percent = gpus[gpu_id].load
pynvml.nvmlInit()
handle = pynvml.nvmlDeviceGetHandleByIndex(0)
meminfo = pynvml.nvmlDeviceGetMemoryInfo(handle)
gpu_mem = meminfo.used / 1024. / 1024.
return round(cpu_mem, 4), round(gpu_mem, 4), round(gpu_percent, 4)
def multiclass_nms(bboxs, num_classes, match_threshold=0.6, match_metric='iou'):
final_boxes = []
for c in range(num_classes):
idxs = bboxs[:, 0] == c
if np.count_nonzero(idxs) == 0: continue
r = nms(bboxs[idxs, 1:], match_threshold, match_metric)
final_boxes.append(np.concatenate([np.full((r.shape[0], 1), c), r], 1))
return final_boxes
def nms(dets, match_threshold=0.6, match_metric='iou'):
""" Apply NMS to avoid detecting too many overlapping bounding boxes.
Args:
dets: shape [N, 5], [score, x1, y1, x2, y2]
match_metric: 'iou' or 'ios'
match_threshold: overlap thresh for match metric.
"""
if dets.shape[0] == 0:
return dets[[], :]
scores = dets[:, 0]
x1 = dets[:, 1]
y1 = dets[:, 2]
x2 = dets[:, 3]
y2 = dets[:, 4]
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
order = scores.argsort()[::-1]
ndets = dets.shape[0]
suppressed = np.zeros((ndets), dtype=np.int32)
for _i in range(ndets):
i = order[_i]
if suppressed[i] == 1:
continue
ix1 = x1[i]
iy1 = y1[i]
ix2 = x2[i]
iy2 = y2[i]
iarea = areas[i]
for _j in range(_i + 1, ndets):
j = order[_j]
if suppressed[j] == 1:
continue
xx1 = max(ix1, x1[j])
yy1 = max(iy1, y1[j])
xx2 = min(ix2, x2[j])
yy2 = min(iy2, y2[j])
w = max(0.0, xx2 - xx1 + 1)
h = max(0.0, yy2 - yy1 + 1)
inter = w * h
if match_metric == 'iou':
union = iarea + areas[j] - inter
match_value = inter / union
elif match_metric == 'ios':
smaller = min(iarea, areas[j])
match_value = inter / smaller
else:
raise ValueError()
if match_value >= match_threshold:
suppressed[j] = 1
keep = np.where(suppressed == 0)[0]
dets = dets[keep, :]
return dets
coco_clsid2catid = {
0: 1,
1: 2,
2: 3,
3: 4,
4: 5,
5: 6,
6: 7,
7: 8,
8: 9,
9: 10,
10: 11,
11: 13,
12: 14,
13: 15,
14: 16,
15: 17,
16: 18,
17: 19,
18: 20,
19: 21,
20: 22,
21: 23,
22: 24,
23: 25,
24: 27,
25: 28,
26: 31,
27: 32,
28: 33,
29: 34,
30: 35,
31: 36,
32: 37,
33: 38,
34: 39,
35: 40,
36: 41,
37: 42,
38: 43,
39: 44,
40: 46,
41: 47,
42: 48,
43: 49,
44: 50,
45: 51,
46: 52,
47: 53,
48: 54,
49: 55,
50: 56,
51: 57,
52: 58,
53: 59,
54: 60,
55: 61,
56: 62,
57: 63,
58: 64,
59: 65,
60: 67,
61: 70,
62: 72,
63: 73,
64: 74,
65: 75,
66: 76,
67: 77,
68: 78,
69: 79,
70: 80,
71: 81,
72: 82,
73: 84,
74: 85,
75: 86,
76: 87,
77: 88,
78: 89,
79: 90
}
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import division
import os
import cv2
import numpy as np
from PIL import Image, ImageDraw, ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True
import math
def visualize_box_mask(im, results, labels, threshold=0.5):
"""
Args:
im (str/np.ndarray): path of image/np.ndarray read by cv2
results (dict): include 'boxes': np.ndarray: shape:[N,6], N: number of box,
matix element:[class, score, x_min, y_min, x_max, y_max]
MaskRCNN's results include 'masks': np.ndarray:
shape:[N, im_h, im_w]
labels (list): labels:['class1', ..., 'classn']
threshold (float): Threshold of score.
Returns:
im (PIL.Image.Image): visualized image
"""
if isinstance(im, str):
im = Image.open(im).convert('RGB')
elif isinstance(im, np.ndarray):
im = Image.fromarray(im)
if 'masks' in results and 'boxes' in results and len(results['boxes']) > 0:
im = draw_mask(
im, results['boxes'], results['masks'], labels, threshold=threshold)
if 'boxes' in results and len(results['boxes']) > 0:
im = draw_box(im, results['boxes'], labels, threshold=threshold)
if 'segm' in results:
im = draw_segm(
im,
results['segm'],
results['label'],
results['score'],
labels,
threshold=threshold)
return im
def get_color_map_list(num_classes):
"""
Args:
num_classes (int): number of class
Returns:
color_map (list): RGB color list
"""
color_map = num_classes * [0, 0, 0]
for i in range(0, num_classes):
j = 0
lab = i
while lab:
color_map[i * 3] |= (((lab >> 0) & 1) << (7 - j))
color_map[i * 3 + 1] |= (((lab >> 1) & 1) << (7 - j))
color_map[i * 3 + 2] |= (((lab >> 2) & 1) << (7 - j))
j += 1
lab >>= 3
color_map = [color_map[i:i + 3] for i in range(0, len(color_map), 3)]
return color_map
def draw_mask(im, np_boxes, np_masks, labels, threshold=0.5):
"""
Args:
im (PIL.Image.Image): PIL image
np_boxes (np.ndarray): shape:[N,6], N: number of box,
matix element:[class, score, x_min, y_min, x_max, y_max]
np_masks (np.ndarray): shape:[N, im_h, im_w]
labels (list): labels:['class1', ..., 'classn']
threshold (float): threshold of mask
Returns:
im (PIL.Image.Image): visualized image
"""
color_list = get_color_map_list(len(labels))
w_ratio = 0.4
alpha = 0.7
im = np.array(im).astype('float32')
clsid2color = {}
expect_boxes = (np_boxes[:, 1] > threshold) & (np_boxes[:, 0] > -1)
np_boxes = np_boxes[expect_boxes, :]
np_masks = np_masks[expect_boxes, :, :]
im_h, im_w = im.shape[:2]
np_masks = np_masks[:, :im_h, :im_w]
for i in range(len(np_masks)):
clsid, score = int(np_boxes[i][0]), np_boxes[i][1]
mask = np_masks[i]
if clsid not in clsid2color:
clsid2color[clsid] = color_list[clsid]
color_mask = clsid2color[clsid]
for c in range(3):
color_mask[c] = color_mask[c] * (1 - w_ratio) + w_ratio * 255
idx = np.nonzero(mask)
color_mask = np.array(color_mask)
im[idx[0], idx[1], :] *= 1.0 - alpha
im[idx[0], idx[1], :] += alpha * color_mask
return Image.fromarray(im.astype('uint8'))
def draw_box(im, np_boxes, labels, threshold=0.5):
"""
Args:
im (PIL.Image.Image): PIL image
np_boxes (np.ndarray): shape:[N,6], N: number of box,
matix element:[class, score, x_min, y_min, x_max, y_max]
labels (list): labels:['class1', ..., 'classn']
threshold (float): threshold of box
Returns:
im (PIL.Image.Image): visualized image
"""
draw_thickness = min(im.size) // 320
draw = ImageDraw.Draw(im)
clsid2color = {}
color_list = get_color_map_list(len(labels))
expect_boxes = (np_boxes[:, 1] > threshold) & (np_boxes[:, 0] > -1)
np_boxes = np_boxes[expect_boxes, :]
for dt in np_boxes:
clsid, bbox, score = int(dt[0]), dt[2:], dt[1]
if clsid not in clsid2color:
clsid2color[clsid] = color_list[clsid]
color = tuple(clsid2color[clsid])
if len(bbox) == 4:
xmin, ymin, xmax, ymax = bbox
print('class_id:{:d}, confidence:{:.4f}, left_top:[{:.2f},{:.2f}],'
'right_bottom:[{:.2f},{:.2f}]'.format(
int(clsid), score, xmin, ymin, xmax, ymax))
# draw bbox
draw.line(
[(xmin, ymin), (xmin, ymax), (xmax, ymax), (xmax, ymin),
(xmin, ymin)],
width=draw_thickness,
fill=color)
elif len(bbox) == 8:
x1, y1, x2, y2, x3, y3, x4, y4 = bbox
draw.line(
[(x1, y1), (x2, y2), (x3, y3), (x4, y4), (x1, y1)],
width=2,
fill=color)
xmin = min(x1, x2, x3, x4)
ymin = min(y1, y2, y3, y4)
# draw label
text = "{} {:.4f}".format(labels[clsid], score)
tw, th = draw.textsize(text)
draw.rectangle(
[(xmin + 1, ymin - th), (xmin + tw + 1, ymin)], fill=color)
draw.text((xmin + 1, ymin - th), text, fill=(255, 255, 255))
return im
def draw_segm(im,
np_segms,
np_label,
np_score,
labels,
threshold=0.5,
alpha=0.7):
"""
Draw segmentation on image
"""
mask_color_id = 0
w_ratio = .4
color_list = get_color_map_list(len(labels))
im = np.array(im).astype('float32')
clsid2color = {}
np_segms = np_segms.astype(np.uint8)
for i in range(np_segms.shape[0]):
mask, score, clsid = np_segms[i], np_score[i], np_label[i]
if score < threshold:
continue
if clsid not in clsid2color:
clsid2color[clsid] = color_list[clsid]
color_mask = clsid2color[clsid]
for c in range(3):
color_mask[c] = color_mask[c] * (1 - w_ratio) + w_ratio * 255
idx = np.nonzero(mask)
color_mask = np.array(color_mask)
idx0 = np.minimum(idx[0], im.shape[0] - 1)
idx1 = np.minimum(idx[1], im.shape[1] - 1)
im[idx0, idx1, :] *= 1.0 - alpha
im[idx0, idx1, :] += alpha * color_mask
sum_x = np.sum(mask, axis=0)
x = np.where(sum_x > 0.5)[0]
sum_y = np.sum(mask, axis=1)
y = np.where(sum_y > 0.5)[0]
x0, x1, y0, y1 = x[0], x[-1], y[0], y[-1]
cv2.rectangle(im, (x0, y0), (x1, y1),
tuple(color_mask.astype('int32').tolist()), 1)
bbox_text = '%s %.2f' % (labels[clsid], score)
t_size = cv2.getTextSize(bbox_text, 0, 0.3, thickness=1)[0]
cv2.rectangle(im, (x0, y0), (x0 + t_size[0], y0 - t_size[1] - 3),
tuple(color_mask.astype('int32').tolist()), -1)
cv2.putText(
im,
bbox_text, (x0, y0 - 2),
cv2.FONT_HERSHEY_SIMPLEX,
0.3, (0, 0, 0),
1,
lineType=cv2.LINE_AA)
return Image.fromarray(im.astype('uint8'))
# 服务端预测部署
`PaddleDetection`训练出来的模型可以使用[Serving](https://github.com/PaddlePaddle/Serving) 部署在服务端。
本教程以在COCO数据集上用`configs/yolov3/yolov3_darknet53_270e_coco.yml`算法训练的模型进行部署。
预训练模型权重文件为[yolov3_darknet53_270e_coco.pdparams](https://paddledet.bj.bcebos.com/models/yolov3_darknet53_270e_coco.pdparams)
## 1. 首先验证模型
```
python tools/infer.py -c configs/yolov3/yolov3_darknet53_270e_coco.yml --infer_img=demo/000000014439.jpg -o use_gpu=True weights=https://paddledet.bj.bcebos.com/models/yolov3_darknet53_270e_coco.pdparams --infer_img=demo/000000014439.jpg
```
## 2. 安装 paddle serving
请参考[PaddleServing](https://github.com/PaddlePaddle/Serving/tree/v0.7.0) 中安装教程安装(版本>=0.7.0)。
## 3. 导出模型
PaddleDetection在训练过程包括网络的前向和优化器相关参数,而在部署过程中,我们只需要前向参数,具体参考:[导出模型](https://github.com/PaddlePaddle/PaddleDetection/blob/develop/deploy/EXPORT_MODEL.md)
```
python tools/export_model.py -c configs/yolov3/yolov3_darknet53_270e_coco.yml -o weights=https://paddledet.bj.bcebos.com/models/yolov3_darknet53_270e_coco.pdparams --export_serving_model=True
```
以上命令会在`output_inference/`文件夹下生成一个`yolov3_darknet53_270e_coco`文件夹:
```
output_inference
│ ├── yolov3_darknet53_270e_coco
│ │ ├── infer_cfg.yml
│ │ ├── model.pdiparams
│ │ ├── model.pdiparams.info
│ │ ├── model.pdmodel
│ │ ├── serving_client
│ │ │ ├── serving_client_conf.prototxt
│ │ │ ├── serving_client_conf.stream.prototxt
│ │ ├── serving_server
│ │ │ ├── __model__
│ │ │ ├── __params__
│ │ │ ├── serving_server_conf.prototxt
│ │ │ ├── serving_server_conf.stream.prototxt
│ │ │ ├── ...
```
`serving_client`文件夹下`serving_client_conf.prototxt`详细说明了模型输入输出信息
`serving_client_conf.prototxt`文件内容为:
```
feed_var {
name: "im_shape"
alias_name: "im_shape"
is_lod_tensor: false
feed_type: 1
shape: 2
}
feed_var {
name: "image"
alias_name: "image"
is_lod_tensor: false
feed_type: 1
shape: 3
shape: 608
shape: 608
}
feed_var {
name: "scale_factor"
alias_name: "scale_factor"
is_lod_tensor: false
feed_type: 1
shape: 2
}
fetch_var {
name: "multiclass_nms3_0.tmp_0"
alias_name: "multiclass_nms3_0.tmp_0"
is_lod_tensor: true
fetch_type: 1
shape: -1
}
fetch_var {
name: "multiclass_nms3_0.tmp_2"
alias_name: "multiclass_nms3_0.tmp_2"
is_lod_tensor: false
fetch_type: 2
```
## 4. 启动PaddleServing服务
```
cd output_inference/yolov3_darknet53_270e_coco/
# GPU
python -m paddle_serving_server.serve --model serving_server --port 9393 --gpu_ids 0
# CPU
python -m paddle_serving_server.serve --model serving_server --port 9393
```
## 5. 测试部署的服务
准备`label_list.txt`文件,示例`label_list.txt`文件内容为
```
person
bicycle
car
motorcycle
airplane
bus
train
truck
boat
traffic light
fire hydrant
stop sign
parking meter
bench
bird
cat
dog
horse
sheep
cow
elephant
bear
zebra
giraffe
backpack
umbrella
handbag
tie
suitcase
frisbee
skis
snowboard
sports ball
kite
baseball bat
baseball glove
skateboard
surfboard
tennis racket
bottle
wine glass
cup
fork
knife
spoon
bowl
banana
apple
sandwich
orange
broccoli
carrot
hot dog
pizza
donut
cake
chair
couch
potted plant
bed
dining table
toilet
tv
laptop
mouse
remote
keyboard
cell phone
microwave
oven
toaster
sink
refrigerator
book
clock
vase
scissors
teddy bear
hair drier
toothbrush
```
设置`prototxt`文件路径为`serving_client/serving_client_conf.prototxt`
设置`fetch``fetch=["multiclass_nms3_0.tmp_0"])`
测试
```
# 进入目录
cd output_inference/yolov3_darknet53_270e_coco/
# 测试代码 test_client.py 会自动创建output文件夹,并在output下生成`bbox.json`和`000000014439.jpg`两个文件
python ../../deploy/serving/test_client.py ../../deploy/serving/label_list.txt ../../demo/000000014439.jpg
```
# C++ Serving预测部署
## 1. 简介
Paddle Serving是飞桨开源的服务化部署框架,提供了C++ Serving和Python Pipeline两套框架,
C++ Serving框架更倾向于追求极致性能,Python Pipeline框架倾向于二次开发的便捷性。
旨在帮助深度学习开发者和企业提供高性能、灵活易用的工业级在线推理服务,助力人工智能落地应用。
更多关于Paddle Serving的介绍,可以参考[Paddle Serving官网repo](https://github.com/PaddlePaddle/Serving)
本文档主要介绍利用C++ Serving框架实现模型(以yolov3_darknet53_270e_coco为例)的服务化部署。
## 2. C++ Serving预测部署
#### 2.1 C++ 服务化部署样例程序介绍
服务化部署的样例程序的目录地址为:`deploy/serving/cpp`
```shell
deploy/
├── serving/
│ ├── python/ # Python 服务化部署样例程序目录
│ │ ├──config.yml # 服务端模型预测相关配置文件
│ │ ├──pipeline_http_client.py # 客户端代码
│ │ ├──postprocess_ops.py # 用户自定义后处理代码
│ │ ├──preprocess_ops.py # 用户自定义预处理代码
│ │ ├──README.md # 说明文档
│ │ ├──web_service.py # 服务端代码
│ ├── cpp/ # C++ 服务化部署样例程序目录
│ │ ├──preprocess/ # C++ 自定义OP
│ │ ├──build_server.sh # C++ Serving 编译脚本
│ │ ├──serving_client.py # 客户端代码
│ │ └── ...
│ └── ...
└── ...
```
### 2.2 环境准备
安装Paddle Serving三个安装包的最新版本,
分别是:paddle-serving-client, paddle-serving-app和paddlepaddle(CPU/GPU版本二选一)。
```commandline
pip install paddle-serving-client
# pip install paddle-serving-server # CPU
pip install paddle-serving-server-gpu # GPU 默认 CUDA10.2 + TensorRT6,其他环境需手动指定版本号
pip install paddle-serving-app
# pip install paddlepaddle # CPU
pip install paddlepaddle-gpu
```
您可能需要使用国内镜像源(例如百度源, 在pip命令中添加`-i https://mirror.baidu.com/pypi/simple`)来加速下载。
Paddle Serving Server更多不同运行环境的whl包下载地址,请参考:[下载页面](https://github.com/PaddlePaddle/Serving/blob/v0.7.0/doc/Latest_Packages_CN.md)
PaddlePaddle更多版本请参考[官网](https://www.paddlepaddle.org.cn/install/quick?docurl=/documentation/docs/zh/install/pip/linux-pip.html)
### 2.3 服务化部署模型导出
导出步骤参考文档[PaddleDetection部署模型导出教程](../../EXPORT_MODEL.md),
导出服务化部署模型需要添加`--export_serving_model True`参数,导出示例如下:
```commandline
python tools/export_model.py -c configs/yolov3/yolov3_darknet53_270e_coco.yml \
--export_serving_model True \
-o weights=https://paddledet.bj.bcebos.com/models/yolov3_darknet53_270e_coco.pdparams
```
### 2.4 编译C++ Serving & 启动服务端模型预测服务
可使用一键编译脚本`deploy/serving/cpp/build_server.sh`进行编译
```commandline
bash deploy/serving/cpp/build_server.sh
```
当完成以上编译安装和模型导出后,可以按如下命令启动模型预测服务:
```commandline
python -m paddle_serving_server.serve --model output_inference/yolov3_darknet53_270e_coco/serving_server --op yolov3_darknet53_270e_coco --port 9997 &
```
如果需要自定义开发OP,请参考[文档](https://github.com/PaddlePaddle/Serving/blob/v0.8.3/doc/C%2B%2B_Serving/2%2B_model.md)进行开发
### 2.5 启动客户端访问
当成功启动了模型预测服务,可以按如下命令启动客户端访问服务:
```commandline
python deploy/serving/python/serving_client.py --serving_client output_inference/yolov3_darknet53_270e_coco/serving_client --image_file demo/000000014439.jpg --http_port 9997
```
#使用镜像:
#registry.baidubce.com/paddlepaddle/paddle:latest-dev-cuda10.1-cudnn7-gcc82
#编译Serving Server:
#client和app可以直接使用release版本
#server因为加入了自定义OP,需要重新编译
apt-get update
apt install -y libcurl4-openssl-dev libbz2-dev
wget https://paddle-serving.bj.bcebos.com/others/centos_ssl.tar && tar xf centos_ssl.tar && rm -rf centos_ssl.tar && mv libcrypto.so.1.0.2k /usr/lib/libcrypto.so.1.0.2k && mv libssl.so.1.0.2k /usr/lib/libssl.so.1.0.2k && ln -sf /usr/lib/libcrypto.so.1.0.2k /usr/lib/libcrypto.so.10 && ln -sf /usr/lib/libssl.so.1.0.2k /usr/lib/libssl.so.10 && ln -sf /usr/lib/libcrypto.so.10 /usr/lib/libcrypto.so && ln -sf /usr/lib/libssl.so.10 /usr/lib/libssl.so
# 安装go依赖
rm -rf /usr/local/go
wget -qO- https://paddle-ci.cdn.bcebos.com/go1.17.2.linux-amd64.tar.gz | tar -xz -C /usr/local
export GOROOT=/usr/local/go
export GOPATH=/root/gopath
export PATH=$PATH:$GOPATH/bin:$GOROOT/bin
go env -w GO111MODULE=on
go env -w GOPROXY=https://goproxy.cn,direct
go install github.com/grpc-ecosystem/grpc-gateway/protoc-gen-grpc-gateway@v1.15.2
go install github.com/grpc-ecosystem/grpc-gateway/protoc-gen-swagger@v1.15.2
go install github.com/golang/protobuf/protoc-gen-go@v1.4.3
go install google.golang.org/grpc@v1.33.0
go env -w GO111MODULE=auto
# 下载opencv库
wget https://paddle-qa.bj.bcebos.com/PaddleServing/opencv3.tar.gz && tar -xvf opencv3.tar.gz && rm -rf opencv3.tar.gz
export OPENCV_DIR=$PWD/opencv3
# clone Serving
git clone https://github.com/PaddlePaddle/Serving.git -b develop --depth=1
cd Serving
export Serving_repo_path=$PWD
git submodule update --init --recursive
python -m pip install -r python/requirements.txt
# set env
export PYTHON_INCLUDE_DIR=$(python -c "from distutils.sysconfig import get_python_inc; print(get_python_inc())")
export PYTHON_LIBRARIES=$(python -c "import distutils.sysconfig as sysconfig; print(sysconfig.get_config_var('LIBDIR'))")
export PYTHON_EXECUTABLE=`which python`
export CUDA_PATH='/usr/local/cuda'
export CUDNN_LIBRARY='/usr/local/cuda/lib64/'
export CUDA_CUDART_LIBRARY='/usr/local/cuda/lib64/'
export TENSORRT_LIBRARY_PATH='/usr/local/TensorRT6-cuda10.1-cudnn7/targets/x86_64-linux-gnu/'
# cp 自定义OP代码
\cp ../deploy/serving/cpp/preprocess/*.h ${Serving_repo_path}/core/general-server/op
\cp ../deploy/serving/cpp/preprocess/*.cpp ${Serving_repo_path}/core/general-server/op
# 编译Server, export SERVING_BIN
mkdir server-build-gpu-opencv && cd server-build-gpu-opencv
cmake -DPYTHON_INCLUDE_DIR=$PYTHON_INCLUDE_DIR \
-DPYTHON_LIBRARIES=$PYTHON_LIBRARIES \
-DPYTHON_EXECUTABLE=$PYTHON_EXECUTABLE \
-DCUDA_TOOLKIT_ROOT_DIR=${CUDA_PATH} \
-DCUDNN_LIBRARY=${CUDNN_LIBRARY} \
-DCUDA_CUDART_LIBRARY=${CUDA_CUDART_LIBRARY} \
-DTENSORRT_ROOT=${TENSORRT_LIBRARY_PATH} \
-DOPENCV_DIR=${OPENCV_DIR} \
-DWITH_OPENCV=ON \
-DSERVER=ON \
-DWITH_GPU=ON ..
make -j32
python -m pip install python/dist/paddle*
export SERVING_BIN=$PWD/core/general-server/serving
cd ../../
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "core/general-server/op/mask_rcnn_r50_fpn_1x_coco.h"
#include "core/predictor/framework/infer.h"
#include "core/predictor/framework/memory.h"
#include "core/predictor/framework/resource.h"
#include "core/util/include/timer.h"
#include <algorithm>
#include <iostream>
#include <memory>
#include <sstream>
namespace baidu {
namespace paddle_serving {
namespace serving {
using baidu::paddle_serving::Timer;
using baidu::paddle_serving::predictor::InferManager;
using baidu::paddle_serving::predictor::MempoolWrapper;
using baidu::paddle_serving::predictor::PaddleGeneralModelConfig;
using baidu::paddle_serving::predictor::general_model::Request;
using baidu::paddle_serving::predictor::general_model::Response;
using baidu::paddle_serving::predictor::general_model::Tensor;
int mask_rcnn_r50_fpn_1x_coco::inference() {
VLOG(2) << "Going to run inference";
const std::vector<std::string> pre_node_names = pre_names();
if (pre_node_names.size() != 1) {
LOG(ERROR) << "This op(" << op_name()
<< ") can only have one predecessor op, but received "
<< pre_node_names.size();
return -1;
}
const std::string pre_name = pre_node_names[0];
const GeneralBlob *input_blob = get_depend_argument<GeneralBlob>(pre_name);
if (!input_blob) {
LOG(ERROR) << "input_blob is nullptr,error";
return -1;
}
uint64_t log_id = input_blob->GetLogId();
VLOG(2) << "(logid=" << log_id << ") Get precedent op name: " << pre_name;
GeneralBlob *output_blob = mutable_data<GeneralBlob>();
if (!output_blob) {
LOG(ERROR) << "output_blob is nullptr,error";
return -1;
}
output_blob->SetLogId(log_id);
if (!input_blob) {
LOG(ERROR) << "(logid=" << log_id
<< ") Failed mutable depended argument, op:" << pre_name;
return -1;
}
const TensorVector *in = &input_blob->tensor_vector;
TensorVector *out = &output_blob->tensor_vector;
int batch_size = input_blob->_batch_size;
output_blob->_batch_size = batch_size;
VLOG(2) << "(logid=" << log_id << ") infer batch size: " << batch_size;
Timer timeline;
int64_t start = timeline.TimeStampUS();
timeline.Start();
// only support string type
char *total_input_ptr = static_cast<char *>(in->at(0).data.data());
std::string base64str = total_input_ptr;
cv::Mat img = Base2Mat(base64str);
cv::cvtColor(img, img, cv::COLOR_BGR2RGB);
// preprocess
Resize(&img, scale_factor_h, scale_factor_w, im_shape_h, im_shape_w);
Normalize(&img, mean_, scale_, is_scale_);
PadStride(&img, 32);
int input_shape_h = img.rows;
int input_shape_w = img.cols;
std::vector<float> input(1 * 3 * input_shape_h * input_shape_w, 0.0f);
Permute(img, input.data());
// create real_in
TensorVector *real_in = new TensorVector();
if (!real_in) {
LOG(ERROR) << "real_in is nullptr,error";
return -1;
}
int in_num = 0;
size_t databuf_size = 0;
void *databuf_data = NULL;
char *databuf_char = NULL;
// im_shape
std::vector<float> im_shape{static_cast<float>(im_shape_h),
static_cast<float>(im_shape_w)};
databuf_size = 2 * sizeof(float);
databuf_data = MempoolWrapper::instance().malloc(databuf_size);
if (!databuf_data) {
LOG(ERROR) << "Malloc failed, size: " << databuf_size;
return -1;
}
memcpy(databuf_data, im_shape.data(), databuf_size);
databuf_char = reinterpret_cast<char *>(databuf_data);
paddle::PaddleBuf paddleBuf_0(databuf_char, databuf_size);
paddle::PaddleTensor tensor_in_0;
tensor_in_0.name = "im_shape";
tensor_in_0.dtype = paddle::PaddleDType::FLOAT32;
tensor_in_0.shape = {1, 2};
tensor_in_0.lod = in->at(0).lod;
tensor_in_0.data = paddleBuf_0;
real_in->push_back(tensor_in_0);
// image
in_num = 1 * 3 * input_shape_h * input_shape_w;
databuf_size = in_num * sizeof(float);
databuf_data = MempoolWrapper::instance().malloc(databuf_size);
if (!databuf_data) {
LOG(ERROR) << "Malloc failed, size: " << databuf_size;
return -1;
}
memcpy(databuf_data, input.data(), databuf_size);
databuf_char = reinterpret_cast<char *>(databuf_data);
paddle::PaddleBuf paddleBuf_1(databuf_char, databuf_size);
paddle::PaddleTensor tensor_in_1;
tensor_in_1.name = "image";
tensor_in_1.dtype = paddle::PaddleDType::FLOAT32;
tensor_in_1.shape = {1, 3, input_shape_h, input_shape_w};
tensor_in_1.lod = in->at(0).lod;
tensor_in_1.data = paddleBuf_1;
real_in->push_back(tensor_in_1);
// scale_factor
std::vector<float> scale_factor{scale_factor_h, scale_factor_w};
databuf_size = 2 * sizeof(float);
databuf_data = MempoolWrapper::instance().malloc(databuf_size);
if (!databuf_data) {
LOG(ERROR) << "Malloc failed, size: " << databuf_size;
return -1;
}
memcpy(databuf_data, scale_factor.data(), databuf_size);
databuf_char = reinterpret_cast<char *>(databuf_data);
paddle::PaddleBuf paddleBuf_2(databuf_char, databuf_size);
paddle::PaddleTensor tensor_in_2;
tensor_in_2.name = "scale_factor";
tensor_in_2.dtype = paddle::PaddleDType::FLOAT32;
tensor_in_2.shape = {1, 2};
tensor_in_2.lod = in->at(0).lod;
tensor_in_2.data = paddleBuf_2;
real_in->push_back(tensor_in_2);
if (InferManager::instance().infer(engine_name().c_str(), real_in, out,
batch_size)) {
LOG(ERROR) << "(logid=" << log_id
<< ") Failed do infer in fluid model: " << engine_name().c_str();
return -1;
}
int64_t end = timeline.TimeStampUS();
CopyBlobInfo(input_blob, output_blob);
AddBlobInfo(output_blob, start);
AddBlobInfo(output_blob, end);
return 0;
}
void mask_rcnn_r50_fpn_1x_coco::Resize(cv::Mat *img, float &scale_factor_h,
float &scale_factor_w, int &im_shape_h,
int &im_shape_w) {
// keep_ratio
int im_size_max = std::max(img->rows, img->cols);
int im_size_min = std::min(img->rows, img->cols);
int target_size_max = std::max(im_shape_h, im_shape_w);
int target_size_min = std::min(im_shape_h, im_shape_w);
float scale_min =
static_cast<float>(target_size_min) / static_cast<float>(im_size_min);
float scale_max =
static_cast<float>(target_size_max) / static_cast<float>(im_size_max);
float scale_ratio = std::min(scale_min, scale_max);
// scale_factor
scale_factor_h = scale_ratio;
scale_factor_w = scale_ratio;
// Resize
cv::resize(*img, *img, cv::Size(), scale_ratio, scale_ratio, 2);
im_shape_h = img->rows;
im_shape_w = img->cols;
}
void mask_rcnn_r50_fpn_1x_coco::Normalize(cv::Mat *img,
const std::vector<float> &mean,
const std::vector<float> &scale,
const bool is_scale) {
// Normalize
double e = 1.0;
if (is_scale) {
e /= 255.0;
}
(*img).convertTo(*img, CV_32FC3, e);
for (int h = 0; h < img->rows; h++) {
for (int w = 0; w < img->cols; w++) {
img->at<cv::Vec3f>(h, w)[0] =
(img->at<cv::Vec3f>(h, w)[0] - mean[0]) / scale[0];
img->at<cv::Vec3f>(h, w)[1] =
(img->at<cv::Vec3f>(h, w)[1] - mean[1]) / scale[1];
img->at<cv::Vec3f>(h, w)[2] =
(img->at<cv::Vec3f>(h, w)[2] - mean[2]) / scale[2];
}
}
}
void mask_rcnn_r50_fpn_1x_coco::PadStride(cv::Mat *img, int stride_) {
// PadStride
if (stride_ <= 0)
return;
int rh = img->rows;
int rw = img->cols;
int nh = (rh / stride_) * stride_ + (rh % stride_ != 0) * stride_;
int nw = (rw / stride_) * stride_ + (rw % stride_ != 0) * stride_;
cv::copyMakeBorder(*img, *img, 0, nh - rh, 0, nw - rw, cv::BORDER_CONSTANT,
cv::Scalar(0));
}
void mask_rcnn_r50_fpn_1x_coco::Permute(const cv::Mat &img, float *data) {
// Permute
int rh = img.rows;
int rw = img.cols;
int rc = img.channels();
for (int i = 0; i < rc; ++i) {
cv::extractChannel(img, cv::Mat(rh, rw, CV_32FC1, data + i * rh * rw), i);
}
}
cv::Mat mask_rcnn_r50_fpn_1x_coco::Base2Mat(std::string &base64_data) {
cv::Mat img;
std::string s_mat;
s_mat = base64Decode(base64_data.data(), base64_data.size());
std::vector<char> base64_img(s_mat.begin(), s_mat.end());
img = cv::imdecode(base64_img, cv::IMREAD_COLOR); // CV_LOAD_IMAGE_COLOR
return img;
}
std::string mask_rcnn_r50_fpn_1x_coco::base64Decode(const char *Data,
int DataByte) {
const char DecodeTable[] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
62, // '+'
0, 0, 0,
63, // '/'
52, 53, 54, 55, 56, 57, 58, 59, 60, 61, // '0'-'9'
0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, // 'A'-'Z'
0, 0, 0, 0, 0, 0, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36,
37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, // 'a'-'z'
};
std::string strDecode;
int nValue;
int i = 0;
while (i < DataByte) {
if (*Data != '\r' && *Data != '\n') {
nValue = DecodeTable[*Data++] << 18;
nValue += DecodeTable[*Data++] << 12;
strDecode += (nValue & 0x00FF0000) >> 16;
if (*Data != '=') {
nValue += DecodeTable[*Data++] << 6;
strDecode += (nValue & 0x0000FF00) >> 8;
if (*Data != '=') {
nValue += DecodeTable[*Data++];
strDecode += nValue & 0x000000FF;
}
}
i += 4;
} else // 回车换行,跳过
{
Data++;
i++;
}
}
return strDecode;
}
DEFINE_OP(mask_rcnn_r50_fpn_1x_coco);
} // namespace serving
} // namespace paddle_serving
} // namespace baidu
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "core/general-server/general_model_service.pb.h"
#include "core/general-server/op/general_infer_helper.h"
#include "paddle_inference_api.h" // NOLINT
#include <string>
#include <vector>
#include "opencv2/core.hpp"
#include "opencv2/imgcodecs.hpp"
#include "opencv2/imgproc.hpp"
#include <chrono>
#include <iomanip>
#include <iostream>
#include <ostream>
#include <vector>
#include <cstring>
#include <fstream>
#include <numeric>
namespace baidu {
namespace paddle_serving {
namespace serving {
class mask_rcnn_r50_fpn_1x_coco
: public baidu::paddle_serving::predictor::OpWithChannel<GeneralBlob> {
public:
typedef std::vector<paddle::PaddleTensor> TensorVector;
DECLARE_OP(mask_rcnn_r50_fpn_1x_coco);
int inference();
private:
// preprocess
std::vector<float> mean_ = {0.485f, 0.456f, 0.406f};
std::vector<float> scale_ = {0.229f, 0.224f, 0.225f};
bool is_scale_ = true;
int im_shape_h = 1333;
int im_shape_w = 800;
float scale_factor_h = 1.0f;
float scale_factor_w = 1.0f;
void Resize(cv::Mat *img, float &scale_factor_h, float &scale_factor_w,
int &im_shape_h, int &im_shape_w);
void Normalize(cv::Mat *img, const std::vector<float> &mean,
const std::vector<float> &scale, const bool is_scale);
void PadStride(cv::Mat *img, int stride_ = -1);
void Permute(const cv::Mat &img, float *data);
// read pics
cv::Mat Base2Mat(std::string &base64_data);
std::string base64Decode(const char *Data, int DataByte);
};
} // namespace serving
} // namespace paddle_serving
} // namespace baidu
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "core/general-server/op/picodet_lcnet_1_5x_416_coco.h"
#include "core/predictor/framework/infer.h"
#include "core/predictor/framework/memory.h"
#include "core/predictor/framework/resource.h"
#include "core/util/include/timer.h"
#include <algorithm>
#include <iostream>
#include <memory>
#include <sstream>
namespace baidu {
namespace paddle_serving {
namespace serving {
using baidu::paddle_serving::Timer;
using baidu::paddle_serving::predictor::InferManager;
using baidu::paddle_serving::predictor::MempoolWrapper;
using baidu::paddle_serving::predictor::PaddleGeneralModelConfig;
using baidu::paddle_serving::predictor::general_model::Request;
using baidu::paddle_serving::predictor::general_model::Response;
using baidu::paddle_serving::predictor::general_model::Tensor;
int picodet_lcnet_1_5x_416_coco::inference() {
VLOG(2) << "Going to run inference";
const std::vector<std::string> pre_node_names = pre_names();
if (pre_node_names.size() != 1) {
LOG(ERROR) << "This op(" << op_name()
<< ") can only have one predecessor op, but received "
<< pre_node_names.size();
return -1;
}
const std::string pre_name = pre_node_names[0];
const GeneralBlob *input_blob = get_depend_argument<GeneralBlob>(pre_name);
if (!input_blob) {
LOG(ERROR) << "input_blob is nullptr,error";
return -1;
}
uint64_t log_id = input_blob->GetLogId();
VLOG(2) << "(logid=" << log_id << ") Get precedent op name: " << pre_name;
GeneralBlob *output_blob = mutable_data<GeneralBlob>();
if (!output_blob) {
LOG(ERROR) << "output_blob is nullptr,error";
return -1;
}
output_blob->SetLogId(log_id);
if (!input_blob) {
LOG(ERROR) << "(logid=" << log_id
<< ") Failed mutable depended argument, op:" << pre_name;
return -1;
}
const TensorVector *in = &input_blob->tensor_vector;
TensorVector *out = &output_blob->tensor_vector;
int batch_size = input_blob->_batch_size;
output_blob->_batch_size = batch_size;
VLOG(2) << "(logid=" << log_id << ") infer batch size: " << batch_size;
Timer timeline;
int64_t start = timeline.TimeStampUS();
timeline.Start();
// only support string type
char *total_input_ptr = static_cast<char *>(in->at(0).data.data());
std::string base64str = total_input_ptr;
cv::Mat img = Base2Mat(base64str);
cv::cvtColor(img, img, cv::COLOR_BGR2RGB);
// preprocess
std::vector<float> input(1 * 3 * im_shape_h * im_shape_w, 0.0f);
preprocess_det(img, input.data(), scale_factor_h, scale_factor_w, im_shape_h,
im_shape_w, mean_, scale_, is_scale_);
// create real_in
TensorVector *real_in = new TensorVector();
if (!real_in) {
LOG(ERROR) << "real_in is nullptr,error";
return -1;
}
int in_num = 0;
size_t databuf_size = 0;
void *databuf_data = NULL;
char *databuf_char = NULL;
// image
in_num = 1 * 3 * im_shape_h * im_shape_w;
databuf_size = in_num * sizeof(float);
databuf_data = MempoolWrapper::instance().malloc(databuf_size);
if (!databuf_data) {
LOG(ERROR) << "Malloc failed, size: " << databuf_size;
return -1;
}
memcpy(databuf_data, input.data(), databuf_size);
databuf_char = reinterpret_cast<char *>(databuf_data);
paddle::PaddleBuf paddleBuf(databuf_char, databuf_size);
paddle::PaddleTensor tensor_in;
tensor_in.name = "image";
tensor_in.dtype = paddle::PaddleDType::FLOAT32;
tensor_in.shape = {1, 3, im_shape_h, im_shape_w};
tensor_in.lod = in->at(0).lod;
tensor_in.data = paddleBuf;
real_in->push_back(tensor_in);
// scale_factor
std::vector<float> scale_factor{scale_factor_h, scale_factor_w};
databuf_size = 2 * sizeof(float);
databuf_data = MempoolWrapper::instance().malloc(databuf_size);
if (!databuf_data) {
LOG(ERROR) << "Malloc failed, size: " << databuf_size;
return -1;
}
memcpy(databuf_data, scale_factor.data(), databuf_size);
databuf_char = reinterpret_cast<char *>(databuf_data);
paddle::PaddleBuf paddleBuf_2(databuf_char, databuf_size);
paddle::PaddleTensor tensor_in_2;
tensor_in_2.name = "scale_factor";
tensor_in_2.dtype = paddle::PaddleDType::FLOAT32;
tensor_in_2.shape = {1, 2};
tensor_in_2.lod = in->at(0).lod;
tensor_in_2.data = paddleBuf_2;
real_in->push_back(tensor_in_2);
if (InferManager::instance().infer(engine_name().c_str(), real_in, out,
batch_size)) {
LOG(ERROR) << "(logid=" << log_id
<< ") Failed do infer in fluid model: " << engine_name().c_str();
return -1;
}
int64_t end = timeline.TimeStampUS();
CopyBlobInfo(input_blob, output_blob);
AddBlobInfo(output_blob, start);
AddBlobInfo(output_blob, end);
return 0;
}
void picodet_lcnet_1_5x_416_coco::preprocess_det(
const cv::Mat &img, float *data, float &scale_factor_h,
float &scale_factor_w, int im_shape_h, int im_shape_w,
const std::vector<float> &mean, const std::vector<float> &scale,
const bool is_scale) {
// scale_factor
scale_factor_h =
static_cast<float>(im_shape_h) / static_cast<float>(img.rows);
scale_factor_w =
static_cast<float>(im_shape_w) / static_cast<float>(img.cols);
// Resize
cv::Mat resize_img;
cv::resize(img, resize_img, cv::Size(im_shape_w, im_shape_h), 0, 0, 2);
// Normalize
double e = 1.0;
if (is_scale) {
e /= 255.0;
}
cv::Mat img_fp;
(resize_img).convertTo(img_fp, CV_32FC3, e);
for (int h = 0; h < im_shape_h; h++) {
for (int w = 0; w < im_shape_w; w++) {
img_fp.at<cv::Vec3f>(h, w)[0] =
(img_fp.at<cv::Vec3f>(h, w)[0] - mean[0]) / scale[0];
img_fp.at<cv::Vec3f>(h, w)[1] =
(img_fp.at<cv::Vec3f>(h, w)[1] - mean[1]) / scale[1];
img_fp.at<cv::Vec3f>(h, w)[2] =
(img_fp.at<cv::Vec3f>(h, w)[2] - mean[2]) / scale[2];
}
}
// Permute
int rh = img_fp.rows;
int rw = img_fp.cols;
int rc = img_fp.channels();
for (int i = 0; i < rc; ++i) {
cv::extractChannel(img_fp, cv::Mat(rh, rw, CV_32FC1, data + i * rh * rw),
i);
}
}
cv::Mat picodet_lcnet_1_5x_416_coco::Base2Mat(std::string &base64_data) {
cv::Mat img;
std::string s_mat;
s_mat = base64Decode(base64_data.data(), base64_data.size());
std::vector<char> base64_img(s_mat.begin(), s_mat.end());
img = cv::imdecode(base64_img, cv::IMREAD_COLOR); // CV_LOAD_IMAGE_COLOR
return img;
}
std::string picodet_lcnet_1_5x_416_coco::base64Decode(const char *Data,
int DataByte) {
const char DecodeTable[] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
62, // '+'
0, 0, 0,
63, // '/'
52, 53, 54, 55, 56, 57, 58, 59, 60, 61, // '0'-'9'
0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, // 'A'-'Z'
0, 0, 0, 0, 0, 0, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36,
37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, // 'a'-'z'
};
std::string strDecode;
int nValue;
int i = 0;
while (i < DataByte) {
if (*Data != '\r' && *Data != '\n') {
nValue = DecodeTable[*Data++] << 18;
nValue += DecodeTable[*Data++] << 12;
strDecode += (nValue & 0x00FF0000) >> 16;
if (*Data != '=') {
nValue += DecodeTable[*Data++] << 6;
strDecode += (nValue & 0x0000FF00) >> 8;
if (*Data != '=') {
nValue += DecodeTable[*Data++];
strDecode += nValue & 0x000000FF;
}
}
i += 4;
} else // 回车换行,跳过
{
Data++;
i++;
}
}
return strDecode;
}
DEFINE_OP(picodet_lcnet_1_5x_416_coco);
} // namespace serving
} // namespace paddle_serving
} // namespace baidu
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "core/general-server/general_model_service.pb.h"
#include "core/general-server/op/general_infer_helper.h"
#include "paddle_inference_api.h" // NOLINT
#include <string>
#include <vector>
#include "opencv2/core.hpp"
#include "opencv2/imgcodecs.hpp"
#include "opencv2/imgproc.hpp"
#include <chrono>
#include <iomanip>
#include <iostream>
#include <ostream>
#include <vector>
#include <cstring>
#include <fstream>
#include <numeric>
namespace baidu {
namespace paddle_serving {
namespace serving {
class picodet_lcnet_1_5x_416_coco
: public baidu::paddle_serving::predictor::OpWithChannel<GeneralBlob> {
public:
typedef std::vector<paddle::PaddleTensor> TensorVector;
DECLARE_OP(picodet_lcnet_1_5x_416_coco);
int inference();
private:
// preprocess
std::vector<float> mean_ = {0.485f, 0.456f, 0.406f};
std::vector<float> scale_ = {0.229f, 0.224f, 0.225f};
bool is_scale_ = true;
int im_shape_h = 416;
int im_shape_w = 416;
float scale_factor_h = 1.0f;
float scale_factor_w = 1.0f;
void preprocess_det(const cv::Mat &img, float *data, float &scale_factor_h,
float &scale_factor_w, int im_shape_h, int im_shape_w,
const std::vector<float> &mean,
const std::vector<float> &scale, const bool is_scale);
// read pics
cv::Mat Base2Mat(std::string &base64_data);
std::string base64Decode(const char *Data, int DataByte);
};
} // namespace serving
} // namespace paddle_serving
} // namespace baidu
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "core/general-server/op/ppyolo_mbv3_large_coco.h"
#include "core/predictor/framework/infer.h"
#include "core/predictor/framework/memory.h"
#include "core/predictor/framework/resource.h"
#include "core/util/include/timer.h"
#include <algorithm>
#include <iostream>
#include <memory>
#include <sstream>
namespace baidu {
namespace paddle_serving {
namespace serving {
using baidu::paddle_serving::Timer;
using baidu::paddle_serving::predictor::InferManager;
using baidu::paddle_serving::predictor::MempoolWrapper;
using baidu::paddle_serving::predictor::PaddleGeneralModelConfig;
using baidu::paddle_serving::predictor::general_model::Request;
using baidu::paddle_serving::predictor::general_model::Response;
using baidu::paddle_serving::predictor::general_model::Tensor;
int ppyolo_mbv3_large_coco::inference() {
VLOG(2) << "Going to run inference";
const std::vector<std::string> pre_node_names = pre_names();
if (pre_node_names.size() != 1) {
LOG(ERROR) << "This op(" << op_name()
<< ") can only have one predecessor op, but received "
<< pre_node_names.size();
return -1;
}
const std::string pre_name = pre_node_names[0];
const GeneralBlob *input_blob = get_depend_argument<GeneralBlob>(pre_name);
if (!input_blob) {
LOG(ERROR) << "input_blob is nullptr,error";
return -1;
}
uint64_t log_id = input_blob->GetLogId();
VLOG(2) << "(logid=" << log_id << ") Get precedent op name: " << pre_name;
GeneralBlob *output_blob = mutable_data<GeneralBlob>();
if (!output_blob) {
LOG(ERROR) << "output_blob is nullptr,error";
return -1;
}
output_blob->SetLogId(log_id);
if (!input_blob) {
LOG(ERROR) << "(logid=" << log_id
<< ") Failed mutable depended argument, op:" << pre_name;
return -1;
}
const TensorVector *in = &input_blob->tensor_vector;
TensorVector *out = &output_blob->tensor_vector;
int batch_size = input_blob->_batch_size;
output_blob->_batch_size = batch_size;
VLOG(2) << "(logid=" << log_id << ") infer batch size: " << batch_size;
Timer timeline;
int64_t start = timeline.TimeStampUS();
timeline.Start();
// only support string type
char *total_input_ptr = static_cast<char *>(in->at(0).data.data());
std::string base64str = total_input_ptr;
cv::Mat img = Base2Mat(base64str);
cv::cvtColor(img, img, cv::COLOR_BGR2RGB);
// preprocess
std::vector<float> input(1 * 3 * im_shape_h * im_shape_w, 0.0f);
preprocess_det(img, input.data(), scale_factor_h, scale_factor_w, im_shape_h,
im_shape_w, mean_, scale_, is_scale_);
// create real_in
TensorVector *real_in = new TensorVector();
if (!real_in) {
LOG(ERROR) << "real_in is nullptr,error";
return -1;
}
int in_num = 0;
size_t databuf_size = 0;
void *databuf_data = NULL;
char *databuf_char = NULL;
// im_shape
std::vector<float> im_shape{static_cast<float>(im_shape_h),
static_cast<float>(im_shape_w)};
databuf_size = 2 * sizeof(float);
databuf_data = MempoolWrapper::instance().malloc(databuf_size);
if (!databuf_data) {
LOG(ERROR) << "Malloc failed, size: " << databuf_size;
return -1;
}
memcpy(databuf_data, im_shape.data(), databuf_size);
databuf_char = reinterpret_cast<char *>(databuf_data);
paddle::PaddleBuf paddleBuf_0(databuf_char, databuf_size);
paddle::PaddleTensor tensor_in_0;
tensor_in_0.name = "im_shape";
tensor_in_0.dtype = paddle::PaddleDType::FLOAT32;
tensor_in_0.shape = {1, 2};
tensor_in_0.lod = in->at(0).lod;
tensor_in_0.data = paddleBuf_0;
real_in->push_back(tensor_in_0);
// image
in_num = 1 * 3 * im_shape_h * im_shape_w;
databuf_size = in_num * sizeof(float);
databuf_data = MempoolWrapper::instance().malloc(databuf_size);
if (!databuf_data) {
LOG(ERROR) << "Malloc failed, size: " << databuf_size;
return -1;
}
memcpy(databuf_data, input.data(), databuf_size);
databuf_char = reinterpret_cast<char *>(databuf_data);
paddle::PaddleBuf paddleBuf_1(databuf_char, databuf_size);
paddle::PaddleTensor tensor_in_1;
tensor_in_1.name = "image";
tensor_in_1.dtype = paddle::PaddleDType::FLOAT32;
tensor_in_1.shape = {1, 3, im_shape_h, im_shape_w};
tensor_in_1.lod = in->at(0).lod;
tensor_in_1.data = paddleBuf_1;
real_in->push_back(tensor_in_1);
// scale_factor
std::vector<float> scale_factor{scale_factor_h, scale_factor_w};
databuf_size = 2 * sizeof(float);
databuf_data = MempoolWrapper::instance().malloc(databuf_size);
if (!databuf_data) {
LOG(ERROR) << "Malloc failed, size: " << databuf_size;
return -1;
}
memcpy(databuf_data, scale_factor.data(), databuf_size);
databuf_char = reinterpret_cast<char *>(databuf_data);
paddle::PaddleBuf paddleBuf_2(databuf_char, databuf_size);
paddle::PaddleTensor tensor_in_2;
tensor_in_2.name = "scale_factor";
tensor_in_2.dtype = paddle::PaddleDType::FLOAT32;
tensor_in_2.shape = {1, 2};
tensor_in_2.lod = in->at(0).lod;
tensor_in_2.data = paddleBuf_2;
real_in->push_back(tensor_in_2);
if (InferManager::instance().infer(engine_name().c_str(), real_in, out,
batch_size)) {
LOG(ERROR) << "(logid=" << log_id
<< ") Failed do infer in fluid model: " << engine_name().c_str();
return -1;
}
int64_t end = timeline.TimeStampUS();
CopyBlobInfo(input_blob, output_blob);
AddBlobInfo(output_blob, start);
AddBlobInfo(output_blob, end);
return 0;
}
void ppyolo_mbv3_large_coco::preprocess_det(const cv::Mat &img, float *data,
float &scale_factor_h,
float &scale_factor_w,
int im_shape_h, int im_shape_w,
const std::vector<float> &mean,
const std::vector<float> &scale,
const bool is_scale) {
// scale_factor
scale_factor_h =
static_cast<float>(im_shape_h) / static_cast<float>(img.rows);
scale_factor_w =
static_cast<float>(im_shape_w) / static_cast<float>(img.cols);
// Resize
cv::Mat resize_img;
cv::resize(img, resize_img, cv::Size(im_shape_w, im_shape_h), 0, 0, 2);
// Normalize
double e = 1.0;
if (is_scale) {
e /= 255.0;
}
cv::Mat img_fp;
(resize_img).convertTo(img_fp, CV_32FC3, e);
for (int h = 0; h < im_shape_h; h++) {
for (int w = 0; w < im_shape_w; w++) {
img_fp.at<cv::Vec3f>(h, w)[0] =
(img_fp.at<cv::Vec3f>(h, w)[0] - mean[0]) / scale[0];
img_fp.at<cv::Vec3f>(h, w)[1] =
(img_fp.at<cv::Vec3f>(h, w)[1] - mean[1]) / scale[1];
img_fp.at<cv::Vec3f>(h, w)[2] =
(img_fp.at<cv::Vec3f>(h, w)[2] - mean[2]) / scale[2];
}
}
// Permute
int rh = img_fp.rows;
int rw = img_fp.cols;
int rc = img_fp.channels();
for (int i = 0; i < rc; ++i) {
cv::extractChannel(img_fp, cv::Mat(rh, rw, CV_32FC1, data + i * rh * rw),
i);
}
}
cv::Mat ppyolo_mbv3_large_coco::Base2Mat(std::string &base64_data) {
cv::Mat img;
std::string s_mat;
s_mat = base64Decode(base64_data.data(), base64_data.size());
std::vector<char> base64_img(s_mat.begin(), s_mat.end());
img = cv::imdecode(base64_img, cv::IMREAD_COLOR); // CV_LOAD_IMAGE_COLOR
return img;
}
std::string ppyolo_mbv3_large_coco::base64Decode(const char *Data,
int DataByte) {
const char DecodeTable[] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
62, // '+'
0, 0, 0,
63, // '/'
52, 53, 54, 55, 56, 57, 58, 59, 60, 61, // '0'-'9'
0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, // 'A'-'Z'
0, 0, 0, 0, 0, 0, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36,
37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, // 'a'-'z'
};
std::string strDecode;
int nValue;
int i = 0;
while (i < DataByte) {
if (*Data != '\r' && *Data != '\n') {
nValue = DecodeTable[*Data++] << 18;
nValue += DecodeTable[*Data++] << 12;
strDecode += (nValue & 0x00FF0000) >> 16;
if (*Data != '=') {
nValue += DecodeTable[*Data++] << 6;
strDecode += (nValue & 0x0000FF00) >> 8;
if (*Data != '=') {
nValue += DecodeTable[*Data++];
strDecode += nValue & 0x000000FF;
}
}
i += 4;
} else // 回车换行,跳过
{
Data++;
i++;
}
}
return strDecode;
}
DEFINE_OP(ppyolo_mbv3_large_coco);
} // namespace serving
} // namespace paddle_serving
} // namespace baidu
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment