Commit fccfdfa5 authored by dlyrm's avatar dlyrm
Browse files

update code

parent dcc7bf4f
Pipeline #681 canceled with stages
cmake_minimum_required(VERSION 3.4.1)
set(CMAKE_CXX_STANDARD 14)
project(picodet_demo)
find_package(OpenCV REQUIRED)
find_package(InferenceEngine REQUIRED)
find_package(ngraph REQUIRED)
include_directories(
${OpenCV_INCLUDE_DIRS}
${CMAKE_CURRENT_SOURCE_DIR}
${CMAKE_CURRENT_BINARY_DIR}
)
add_executable(picodet_demo main.cpp picodet_openvino.cpp)
target_link_libraries(
picodet_demo
${InferenceEngine_LIBRARIES}
${NGRAPH_LIBRARIES}
${OpenCV_LIBS}
)
# PicoDet OpenVINO Demo
This fold provides PicoDet inference code using
[Intel's OpenVINO Toolkit](https://software.intel.com/content/www/us/en/develop/tools/openvino-toolkit.html). Most of the implements in this fold are same as *demo_ncnn*.
**Recommand** to use the xxx.tar.gz file to install instead of github method, [link](https://registrationcenter-download.intel.com/akdlm/irc_nas/18096/l_openvino_toolkit_p_2021.4.689.tgz).
## Install OpenVINO Toolkit
Go to [OpenVINO HomePage](https://software.intel.com/content/www/us/en/develop/tools/openvino-toolkit.html)
Download a suitable version and install.
Follow the official Get Started Guides: https://docs.openvinotoolkit.org/latest/get_started_guides.html
## Set the Environment Variables
### Windows:
Run this command in cmd. (Every time before using OpenVINO)
```cmd
<INSTSLL_DIR>\openvino_2021\bin\setupvars.bat
```
Or set the system environment variables once for all:
Name |Value
:--------------------:|:--------:
INTEL_OPENVINO_DIR | <INSTSLL_DIR>\openvino_2021
INTEL_CVSDK_DIR | %INTEL_OPENVINO_DIR%
InferenceEngine_DIR | %INTEL_OPENVINO_DIR%\deployment_tools\inference_engine\share
HDDL_INSTALL_DIR | %INTEL_OPENVINO_DIR%\deployment_tools\inference_engine\external\hddl
ngraph_DIR | %INTEL_OPENVINO_DIR%\deployment_tools\ngraph\cmake
And add this to ```Path```
```
%INTEL_OPENVINO_DIR%\deployment_tools\inference_engine\bin\intel64\Debug;%INTEL_OPENVINO_DIR%\deployment_tools\inference_engine\bin\intel64\Release;%HDDL_INSTALL_DIR%\bin;%INTEL_OPENVINO_DIR%\deployment_tools\inference_engine\external\tbb\bin;%INTEL_OPENVINO_DIR%\deployment_tools\ngraph\lib
```
### Linux
Run this command in shell. (Every time before using OpenVINO)
```shell
source /opt/intel/openvino_2021/bin/setupvars.sh
```
Or edit .bashrc
```shell
vi ~/.bashrc
```
Add this line to the end of the file
```shell
source /opt/intel/openvino_2021/bin/setupvars.sh
```
## Convert model
Convert to OpenVINO
``` shell
cd <INSTSLL_DIR>/openvino_2021/deployment_tools/model_optimizer
```
Install requirements for convert tool
```shell
cd ./install_prerequisites
sudo install_prerequisites_onnx.sh
```
Then convert model. Notice: mean_values and scale_values should be the same with your training settings in YAML config file.
```shell
python3 mo_onnx.py --input_model <ONNX_MODEL> --mean_values [103.53,116.28,123.675] --scale_values [57.375,57.12,58.395]
```
## Build
### Windows
```cmd
<OPENVINO_INSTSLL_DIR>\openvino_2021\bin\setupvars.bat
mkdir -p build
cd build
cmake ..
msbuild picodet_demo.vcxproj /p:configuration=release /p:platform=x64
```
### Linux
```shell
source /opt/intel/openvino_2021/bin/setupvars.sh
mkdir build
cd build
cmake ..
make
```
## Run demo
Download PicoDet openvino model [PicoDet openvino model download link](https://paddledet.bj.bcebos.com/deploy/third_engine/picodet_m_416_openvino.zip).
move picodet openvino model files to the demo's weight folder.
### Edit file
```
step1:
main.cpp
#define image_size 416
...
auto detector = PicoDet("../weight/picodet_m_416.xml");
...
step2:
picodet_openvino.h
#define image_size 416
```
### Webcam
```shell
picodet_demo 0 0
```
### Inference images
```shell
picodet_demo 1 IMAGE_FOLDER/*.jpg
```
### Inference video
```shell
picodet_demo 2 VIDEO_PATH
```
### Benchmark
```shell
picodet_demo 3 0
```
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// reference from https://github.com/RangiLyu/nanodet
#include "picodet_openvino.h"
#include <iostream>
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#define image_size 416
struct object_rect {
int x;
int y;
int width;
int height;
};
int resize_uniform(cv::Mat &src, cv::Mat &dst, cv::Size dst_size,
object_rect &effect_area) {
int w = src.cols;
int h = src.rows;
int dst_w = dst_size.width;
int dst_h = dst_size.height;
dst = cv::Mat(cv::Size(dst_w, dst_h), CV_8UC3, cv::Scalar(0));
float ratio_src = w * 1.0 / h;
float ratio_dst = dst_w * 1.0 / dst_h;
int tmp_w = 0;
int tmp_h = 0;
if (ratio_src > ratio_dst) {
tmp_w = dst_w;
tmp_h = floor((dst_w * 1.0 / w) * h);
} else if (ratio_src < ratio_dst) {
tmp_h = dst_h;
tmp_w = floor((dst_h * 1.0 / h) * w);
} else {
cv::resize(src, dst, dst_size);
effect_area.x = 0;
effect_area.y = 0;
effect_area.width = dst_w;
effect_area.height = dst_h;
return 0;
}
cv::Mat tmp;
cv::resize(src, tmp, cv::Size(tmp_w, tmp_h));
if (tmp_w != dst_w) {
int index_w = floor((dst_w - tmp_w) / 2.0);
for (int i = 0; i < dst_h; i++) {
memcpy(dst.data + i * dst_w * 3 + index_w * 3, tmp.data + i * tmp_w * 3,
tmp_w * 3);
}
effect_area.x = index_w;
effect_area.y = 0;
effect_area.width = tmp_w;
effect_area.height = tmp_h;
} else if (tmp_h != dst_h) {
int index_h = floor((dst_h - tmp_h) / 2.0);
memcpy(dst.data + index_h * dst_w * 3, tmp.data, tmp_w * tmp_h * 3);
effect_area.x = 0;
effect_area.y = index_h;
effect_area.width = tmp_w;
effect_area.height = tmp_h;
} else {
printf("error\n");
}
return 0;
}
const int color_list[80][3] = {
{216, 82, 24}, {236, 176, 31}, {125, 46, 141}, {118, 171, 47},
{76, 189, 237}, {238, 19, 46}, {76, 76, 76}, {153, 153, 153},
{255, 0, 0}, {255, 127, 0}, {190, 190, 0}, {0, 255, 0},
{0, 0, 255}, {170, 0, 255}, {84, 84, 0}, {84, 170, 0},
{84, 255, 0}, {170, 84, 0}, {170, 170, 0}, {170, 255, 0},
{255, 84, 0}, {255, 170, 0}, {255, 255, 0}, {0, 84, 127},
{0, 170, 127}, {0, 255, 127}, {84, 0, 127}, {84, 84, 127},
{84, 170, 127}, {84, 255, 127}, {170, 0, 127}, {170, 84, 127},
{170, 170, 127}, {170, 255, 127}, {255, 0, 127}, {255, 84, 127},
{255, 170, 127}, {255, 255, 127}, {0, 84, 255}, {0, 170, 255},
{0, 255, 255}, {84, 0, 255}, {84, 84, 255}, {84, 170, 255},
{84, 255, 255}, {170, 0, 255}, {170, 84, 255}, {170, 170, 255},
{170, 255, 255}, {255, 0, 255}, {255, 84, 255}, {255, 170, 255},
{42, 0, 0}, {84, 0, 0}, {127, 0, 0}, {170, 0, 0},
{212, 0, 0}, {255, 0, 0}, {0, 42, 0}, {0, 84, 0},
{0, 127, 0}, {0, 170, 0}, {0, 212, 0}, {0, 255, 0},
{0, 0, 42}, {0, 0, 84}, {0, 0, 127}, {0, 0, 170},
{0, 0, 212}, {0, 0, 255}, {0, 0, 0}, {36, 36, 36},
{72, 72, 72}, {109, 109, 109}, {145, 145, 145}, {182, 182, 182},
{218, 218, 218}, {0, 113, 188}, {80, 182, 188}, {127, 127, 0},
};
void draw_bboxes(const cv::Mat &bgr, const std::vector<BoxInfo> &bboxes,
object_rect effect_roi) {
static const char *class_names[] = {
"person", "bicycle", "car",
"motorcycle", "airplane", "bus",
"train", "truck", "boat",
"traffic light", "fire hydrant", "stop sign",
"parking meter", "bench", "bird",
"cat", "dog", "horse",
"sheep", "cow", "elephant",
"bear", "zebra", "giraffe",
"backpack", "umbrella", "handbag",
"tie", "suitcase", "frisbee",
"skis", "snowboard", "sports ball",
"kite", "baseball bat", "baseball glove",
"skateboard", "surfboard", "tennis racket",
"bottle", "wine glass", "cup",
"fork", "knife", "spoon",
"bowl", "banana", "apple",
"sandwich", "orange", "broccoli",
"carrot", "hot dog", "pizza",
"donut", "cake", "chair",
"couch", "potted plant", "bed",
"dining table", "toilet", "tv",
"laptop", "mouse", "remote",
"keyboard", "cell phone", "microwave",
"oven", "toaster", "sink",
"refrigerator", "book", "clock",
"vase", "scissors", "teddy bear",
"hair drier", "toothbrush"};
cv::Mat image = bgr.clone();
int src_w = image.cols;
int src_h = image.rows;
int dst_w = effect_roi.width;
int dst_h = effect_roi.height;
float width_ratio = (float)src_w / (float)dst_w;
float height_ratio = (float)src_h / (float)dst_h;
for (size_t i = 0; i < bboxes.size(); i++) {
const BoxInfo &bbox = bboxes[i];
cv::Scalar color =
cv::Scalar(color_list[bbox.label][0], color_list[bbox.label][1],
color_list[bbox.label][2]);
cv::rectangle(image,
cv::Rect(cv::Point((bbox.x1 - effect_roi.x) * width_ratio,
(bbox.y1 - effect_roi.y) * height_ratio),
cv::Point((bbox.x2 - effect_roi.x) * width_ratio,
(bbox.y2 - effect_roi.y) * height_ratio)),
color);
char text[256];
sprintf(text, "%s %.1f%%", class_names[bbox.label], bbox.score * 100);
int baseLine = 0;
cv::Size label_size =
cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.4, 1, &baseLine);
int x = (bbox.x1 - effect_roi.x) * width_ratio;
int y =
(bbox.y1 - effect_roi.y) * height_ratio - label_size.height - baseLine;
if (y < 0)
y = 0;
if (x + label_size.width > image.cols)
x = image.cols - label_size.width;
cv::rectangle(image, cv::Rect(cv::Point(x, y),
cv::Size(label_size.width,
label_size.height + baseLine)),
color, -1);
cv::putText(image, text, cv::Point(x, y + label_size.height),
cv::FONT_HERSHEY_SIMPLEX, 0.4, cv::Scalar(255, 255, 255));
}
cv::imwrite("../predict.jpg", image);
}
int image_demo(PicoDet &detector, const char *imagepath) {
std::vector<std::string> filenames;
cv::glob(imagepath, filenames, false);
for (auto img_name : filenames) {
cv::Mat image = cv::imread(img_name);
if (image.empty()) {
return -1;
}
object_rect effect_roi;
cv::Mat resized_img;
resize_uniform(image, resized_img, cv::Size(image_size, image_size),
effect_roi);
auto results = detector.detect(resized_img, 0.4, 0.5);
draw_bboxes(image, results, effect_roi);
}
return 0;
}
int webcam_demo(PicoDet &detector, int cam_id) {
cv::Mat image;
cv::VideoCapture cap(cam_id);
while (true) {
cap >> image;
object_rect effect_roi;
cv::Mat resized_img;
resize_uniform(image, resized_img, cv::Size(image_size, image_size),
effect_roi);
auto results = detector.detect(resized_img, 0.4, 0.5);
draw_bboxes(image, results, effect_roi);
cv::waitKey(1);
}
return 0;
}
int video_demo(PicoDet &detector, const char *path) {
cv::Mat image;
cv::VideoCapture cap(path);
while (true) {
cap >> image;
object_rect effect_roi;
cv::Mat resized_img;
resize_uniform(image, resized_img, cv::Size(image_size, image_size),
effect_roi);
auto results = detector.detect(resized_img, 0.4, 0.5);
draw_bboxes(image, results, effect_roi);
cv::waitKey(1);
}
return 0;
}
int benchmark(PicoDet &detector) {
int loop_num = 100;
int warm_up = 8;
double time_min = DBL_MAX;
double time_max = -DBL_MAX;
double time_avg = 0;
cv::Mat image(image_size, image_size, CV_8UC3, cv::Scalar(1, 1, 1));
for (int i = 0; i < warm_up + loop_num; i++) {
auto start = std::chrono::steady_clock::now();
std::vector<BoxInfo> results;
results = detector.detect(image, 0.4, 0.5);
auto end = std::chrono::steady_clock::now();
double time =
std::chrono::duration<double, std::milli>(end - start).count();
if (i >= warm_up) {
time_min = (std::min)(time_min, time);
time_max = (std::max)(time_max, time);
time_avg += time;
}
}
time_avg /= loop_num;
fprintf(stderr, "%20s min = %7.2f max = %7.2f avg = %7.2f\n", "picodet",
time_min, time_max, time_avg);
return 0;
}
int main(int argc, char **argv) {
if (argc != 3) {
fprintf(stderr, "usage: %s [mode] [path]. \n For webcam mode=0, path is "
"cam id; \n For image demo, mode=1, path=xxx/xxx/*.jpg; \n "
"For video, mode=2; \n For benchmark, mode=3 path=0.\n",
argv[0]);
return -1;
}
std::cout << "start init model" << std::endl;
auto detector = PicoDet("../weight/picodet_m_416.xml");
std::cout << "success" << std::endl;
int mode = atoi(argv[1]);
switch (mode) {
case 0: {
int cam_id = atoi(argv[2]);
webcam_demo(detector, cam_id);
break;
}
case 1: {
const char *images = argv[2];
image_demo(detector, images);
break;
}
case 2: {
const char *path = argv[2];
video_demo(detector, path);
break;
}
case 3: {
benchmark(detector);
break;
}
default: {
fprintf(stderr, "usage: %s [mode] [path]. \n For webcam mode=0, path is "
"cam id; \n For image demo, mode=1, path=xxx/xxx/*.jpg; \n "
"For video, mode=2; \n For benchmark, mode=3 path=0.\n",
argv[0]);
break;
}
}
}
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// reference from https://github.com/RangiLyu/nanodet/tree/main/demo_openvino
#include "picodet_openvino.h"
inline float fast_exp(float x) {
union {
uint32_t i;
float f;
} v{};
v.i = (1 << 23) * (1.4426950409 * x + 126.93490512f);
return v.f;
}
inline float sigmoid(float x) { return 1.0f / (1.0f + fast_exp(-x)); }
template <typename _Tp>
int activation_function_softmax(const _Tp *src, _Tp *dst, int length) {
const _Tp alpha = *std::max_element(src, src + length);
_Tp denominator{0};
for (int i = 0; i < length; ++i) {
dst[i] = fast_exp(src[i] - alpha);
denominator += dst[i];
}
for (int i = 0; i < length; ++i) {
dst[i] /= denominator;
}
return 0;
}
PicoDet::PicoDet(const char *model_path) {
InferenceEngine::Core ie;
InferenceEngine::CNNNetwork model = ie.ReadNetwork(model_path);
// prepare input settings
InferenceEngine::InputsDataMap inputs_map(model.getInputsInfo());
input_name_ = inputs_map.begin()->first;
InferenceEngine::InputInfo::Ptr input_info = inputs_map.begin()->second;
// prepare output settings
InferenceEngine::OutputsDataMap outputs_map(model.getOutputsInfo());
for (auto &output_info : outputs_map) {
output_info.second->setPrecision(InferenceEngine::Precision::FP32);
}
// get network
network_ = ie.LoadNetwork(model, "CPU");
infer_request_ = network_.CreateInferRequest();
}
PicoDet::~PicoDet() {}
void PicoDet::preprocess(cv::Mat &image, InferenceEngine::Blob::Ptr &blob) {
int img_w = image.cols;
int img_h = image.rows;
int channels = 3;
InferenceEngine::MemoryBlob::Ptr mblob =
InferenceEngine::as<InferenceEngine::MemoryBlob>(blob);
if (!mblob) {
THROW_IE_EXCEPTION
<< "We expect blob to be inherited from MemoryBlob in matU8ToBlob, "
<< "but by fact we were not able to cast inputBlob to MemoryBlob";
}
auto mblobHolder = mblob->wmap();
float *blob_data = mblobHolder.as<float *>();
for (size_t c = 0; c < channels; c++) {
for (size_t h = 0; h < img_h; h++) {
for (size_t w = 0; w < img_w; w++) {
blob_data[c * img_w * img_h + h * img_w + w] =
(float)image.at<cv::Vec3b>(h, w)[c];
}
}
}
}
std::vector<BoxInfo> PicoDet::detect(cv::Mat image, float score_threshold,
float nms_threshold) {
InferenceEngine::Blob::Ptr input_blob = infer_request_.GetBlob(input_name_);
preprocess(image, input_blob);
// do inference
infer_request_.Infer();
// get output
std::vector<std::vector<BoxInfo>> results;
results.resize(this->num_class_);
for (const auto &head_info : this->heads_info_) {
const InferenceEngine::Blob::Ptr dis_pred_blob =
infer_request_.GetBlob(head_info.dis_layer);
const InferenceEngine::Blob::Ptr cls_pred_blob =
infer_request_.GetBlob(head_info.cls_layer);
auto mdis_pred =
InferenceEngine::as<InferenceEngine::MemoryBlob>(dis_pred_blob);
auto mdis_pred_holder = mdis_pred->rmap();
const float *dis_pred = mdis_pred_holder.as<const float *>();
auto mcls_pred =
InferenceEngine::as<InferenceEngine::MemoryBlob>(cls_pred_blob);
auto mcls_pred_holder = mcls_pred->rmap();
const float *cls_pred = mcls_pred_holder.as<const float *>();
this->decode_infer(cls_pred, dis_pred, head_info.stride, score_threshold,
results);
}
std::vector<BoxInfo> dets;
for (int i = 0; i < (int)results.size(); i++) {
this->nms(results[i], nms_threshold);
for (auto &box : results[i]) {
dets.push_back(box);
}
}
return dets;
}
void PicoDet::decode_infer(const float *&cls_pred, const float *&dis_pred,
int stride, float threshold,
std::vector<std::vector<BoxInfo>> &results) {
int feature_h = ceil((float)input_size_ / stride);
int feature_w = ceil((float)input_size_ / stride);
for (int idx = 0; idx < feature_h * feature_w; idx++) {
int row = idx / feature_w;
int col = idx % feature_w;
float score = 0;
int cur_label = 0;
for (int label = 0; label < num_class_; label++) {
if (cls_pred[idx * num_class_ + label] > score) {
score = cls_pred[idx * num_class_ + label];
cur_label = label;
}
}
if (score > threshold) {
const float *bbox_pred = dis_pred + idx * (reg_max_ + 1) * 4;
results[cur_label].push_back(
this->disPred2Bbox(bbox_pred, cur_label, score, col, row, stride));
}
}
}
BoxInfo PicoDet::disPred2Bbox(const float *&dfl_det, int label, float score,
int x, int y, int stride) {
float ct_x = (x + 0.5) * stride;
float ct_y = (y + 0.5) * stride;
std::vector<float> dis_pred;
dis_pred.resize(4);
for (int i = 0; i < 4; i++) {
float dis = 0;
float *dis_after_sm = new float[reg_max_ + 1];
activation_function_softmax(dfl_det + i * (reg_max_ + 1), dis_after_sm,
reg_max_ + 1);
for (int j = 0; j < reg_max_ + 1; j++) {
dis += j * dis_after_sm[j];
}
dis *= stride;
dis_pred[i] = dis;
delete[] dis_after_sm;
}
float xmin = (std::max)(ct_x - dis_pred[0], .0f);
float ymin = (std::max)(ct_y - dis_pred[1], .0f);
float xmax = (std::min)(ct_x + dis_pred[2], (float)this->input_size_);
float ymax = (std::min)(ct_y + dis_pred[3], (float)this->input_size_);
return BoxInfo{xmin, ymin, xmax, ymax, score, label};
}
void PicoDet::nms(std::vector<BoxInfo> &input_boxes, float NMS_THRESH) {
std::sort(input_boxes.begin(), input_boxes.end(),
[](BoxInfo a, BoxInfo b) { return a.score > b.score; });
std::vector<float> vArea(input_boxes.size());
for (int i = 0; i < int(input_boxes.size()); ++i) {
vArea[i] = (input_boxes.at(i).x2 - input_boxes.at(i).x1 + 1) *
(input_boxes.at(i).y2 - input_boxes.at(i).y1 + 1);
}
for (int i = 0; i < int(input_boxes.size()); ++i) {
for (int j = i + 1; j < int(input_boxes.size());) {
float xx1 = (std::max)(input_boxes[i].x1, input_boxes[j].x1);
float yy1 = (std::max)(input_boxes[i].y1, input_boxes[j].y1);
float xx2 = (std::min)(input_boxes[i].x2, input_boxes[j].x2);
float yy2 = (std::min)(input_boxes[i].y2, input_boxes[j].y2);
float w = (std::max)(float(0), xx2 - xx1 + 1);
float h = (std::max)(float(0), yy2 - yy1 + 1);
float inter = w * h;
float ovr = inter / (vArea[i] + vArea[j] - inter);
if (ovr >= NMS_THRESH) {
input_boxes.erase(input_boxes.begin() + j);
vArea.erase(vArea.begin() + j);
} else {
j++;
}
}
}
}
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// reference from https://github.com/RangiLyu/nanodet/tree/main/demo_openvino
#ifndef _PICODET_OPENVINO_H_
#define _PICODET_OPENVINO_H_
#include <inference_engine.hpp>
#include <opencv2/core.hpp>
#include <string>
#define image_size 416
typedef struct HeadInfo {
std::string cls_layer;
std::string dis_layer;
int stride;
} HeadInfo;
typedef struct BoxInfo {
float x1;
float y1;
float x2;
float y2;
float score;
int label;
} BoxInfo;
class PicoDet {
public:
PicoDet(const char *param);
~PicoDet();
InferenceEngine::ExecutableNetwork network_;
InferenceEngine::InferRequest infer_request_;
// static bool hasGPU;
std::vector<HeadInfo> heads_info_{
// cls_pred|dis_pred|stride
{"transpose_0.tmp_0", "transpose_1.tmp_0", 8},
{"transpose_2.tmp_0", "transpose_3.tmp_0", 16},
{"transpose_4.tmp_0", "transpose_5.tmp_0", 32},
{"transpose_6.tmp_0", "transpose_7.tmp_0", 64},
};
std::vector<BoxInfo> detect(cv::Mat image, float score_threshold,
float nms_threshold);
private:
void preprocess(cv::Mat &image, InferenceEngine::Blob::Ptr &blob);
void decode_infer(const float *&cls_pred, const float *&dis_pred, int stride,
float threshold,
std::vector<std::vector<BoxInfo>> &results);
BoxInfo disPred2Bbox(const float *&dfl_det, int label, float score, int x,
int y, int stride);
static void nms(std::vector<BoxInfo> &result, float nms_threshold);
std::string input_name_;
int input_size_ = image_size;
int num_class_ = 80;
int reg_max_ = 7;
};
#endif
# PicoDet OpenVINO Benchmark Demo
本文件夹提供利用[Intel's OpenVINO Toolkit](https://software.intel.com/content/www/us/en/develop/tools/openvino-toolkit.html)进行PicoDet测速的Benchmark Demo与带后处理的模型Inference Demo。
## 安装 OpenVINO Toolkit
前往 [OpenVINO HomePage](https://software.intel.com/content/www/us/en/develop/tools/openvino-toolkit.html),下载对应版本并安装。
本demo安装的是 OpenVINO 2022.1.0,可直接运行如下指令安装:
```shell
pip install openvino==2022.1.0
```
详细安装步骤,可参考[OpenVINO官网](https://docs.openvinotoolkit.org/latest/get_started_guides.html)
## Benchmark测试
- 准备测试模型:根据[PicoDet](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/picodet)中【导出及转换模型】步骤,采用不包含后处理的方式导出模型(`-o export.benchmark=True` ),并生成待测试模型简化后的onnx模型(可在下文链接中直接下载)。同时在本目录下新建```out_onnxsim```文件夹,将导出的onnx模型放在该目录下。
- 准备测试所用图片:本demo默认利用PaddleDetection/demo/[000000014439.jpg](https://github.com/PaddlePaddle/PaddleDetection/blob/develop/demo/000000014439.jpg)
- 在本目录下直接运行:
```shell
# Linux
python openvino_benchmark.py --img_path ../../../../demo/000000014439.jpg --onnx_path out_onnxsim/picodet_s_320_coco_lcnet.onnx --in_shape 320
# Windows
python openvino_benchmark.py --img_path ..\..\..\..\demo\000000014439.jpg --onnx_path out_onnxsim\picodet_s_320_coco_lcnet.onnx --in_shape 320
```
- 注意:```--in_shape```为对应模型输入size,默认为320
## 真实图片测试(网络包含后处理,但不包含NMS)
- 准备测试模型:根据[PicoDet](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/picodet)中【导出及转换模型】步骤,采用**包含后处理****不包含NMS**的方式导出模型(`-o export.benchmark=False export.nms=False` ),并生成待测试模型简化后的onnx模型(可在下文链接中直接下载)。同时在本目录下新建```out_onnxsim_infer```文件夹,将导出的onnx模型放在该目录下。
- 准备测试所用图片:默认利用../../demo_onnxruntime/imgs/[bus.jpg](https://github.com/PaddlePaddle/PaddleDetection/blob/develop/deploy/third_engine/demo_onnxruntime/imgs/bus.jpg)
```shell
# Linux
python openvino_infer.py --img_path ../../demo_onnxruntime/imgs/bus.jpg --onnx_path out_onnxsim_infer/picodet_s_320_postproccesed_woNMS.onnx --in_shape 320
# Windows
python openvino_infer.py --img_path ..\..\demo_onnxruntime\imgs\bus.jpg --onnx_path out_onnxsim_infer\picodet_s_320_postproccesed_woNMS.onnx --in_shape 320
```
### 真实图片测试(网络不包含后处理)
```shell
# Linux
python openvino_benchmark.py --benchmark 0 --img_path ../../../../demo/000000014439.jpg --onnx_path out_onnxsim/picodet_s_320_coco_lcnet.onnx --in_shape 320
# Windows
python openvino_benchmark.py --benchmark 0 --img_path ..\..\..\..\demo\000000014439.jpg --onnx_path out_onnxsim\picodet_s_320_coco_lcnet.onnx --in_shape 320
```
- 结果:
<div align="center">
<img src="../../../../docs/images/res.jpg" height="500px" >
</div>
## Benchmark结果
- 测速结果如下:
| 模型 | 输入尺寸 | ONNX | 预测时延<sup><small>[CPU](#latency)|
| :-------- | :--------: | :---------------------: | :----------------: |
| PicoDet-XS | 320*320 | [( w/ 后处理;w/o NMS)](https://paddledet.bj.bcebos.com/deploy/third_engine/picodet_xs_320_lcnet_postproccesed_woNMS.onnx) &#124; [( w/o 后处理)](https://paddledet.bj.bcebos.com/deploy/third_engine/picodet_xs_320_coco_lcnet.onnx) | 3.9ms |
| PicoDet-XS | 416*416 | [( w/ 后处理;w/o NMS)](https://paddledet.bj.bcebos.com/deploy/third_engine/picodet_xs_416_lcnet_postproccesed_woNMS.onnx) &#124; [( w/o 后处理)](https://paddledet.bj.bcebos.com/deploy/third_engine/picodet_xs_416_coco_lcnet.onnx) | 6.1ms |
| PicoDet-S | 320*320 | [( w/ 后处理;w/o NMS)](https://paddledet.bj.bcebos.com/deploy/third_engine/picodet_s_320_lcnet_postproccesed_woNMS.onnx) &#124; [( w/o 后处理)](https://paddledet.bj.bcebos.com/deploy/third_engine/picodet_s_320_coco_lcnet.onnx) | 4.8ms |
| PicoDet-S | 416*416 | [( w/ 后处理;w/o NMS)](https://paddledet.bj.bcebos.com/deploy/third_engine/picodet_s_416_lcnet_postproccesed_woNMS.onnx) &#124; [( w/o 后处理)](https://paddledet.bj.bcebos.com/deploy/third_engine/picodet_s_416_coco_lcnet.onnx) | 6.6ms |
| PicoDet-M | 320*320 | [( w/ 后处理;w/o NMS)](https://paddledet.bj.bcebos.com/deploy/third_engine/picodet_m_320_lcnet_postproccesed_woNMS.onnx) &#124; [( w/o 后处理)](https://paddledet.bj.bcebos.com/deploy/third_engine/picodet_m_320_coco_lcnet.onnx) | 8.2ms |
| PicoDet-M | 416*416 | [( w/ 后处理;w/o NMS)](https://paddledet.bj.bcebos.com/deploy/third_engine/picodet_m_416_lcnet_postproccesed_woNMS.onnx) &#124; [( w/o 后处理)](https://paddledet.bj.bcebos.com/deploy/third_engine/picodet_m_416_coco_lcnet.onnx) | 12.7ms |
| PicoDet-L | 320*320 | [( w/ 后处理;w/o NMS)](https://paddledet.bj.bcebos.com/deploy/third_engine/picodet_l_320_lcnet_postproccesed_woNMS.onnx) &#124; [( w/o 后处理)](https://paddledet.bj.bcebos.com/deploy/third_engine/picodet_l_320_coco_lcnet.onnx) | 11.5ms |
| PicoDet-L | 416*416 | [( w/ 后处理;w/o NMS)](https://paddledet.bj.bcebos.com/deploy/third_engine/picodet_l_416_lcnet_postproccesed_woNMS.onnx) &#124; [( w/o 后处理)](https://paddledet.bj.bcebos.com/deploy/third_engine/picodet_l_416_coco_lcnet.onnx) | 20.7ms |
| PicoDet-L | 640*640 | [( w/ 后处理;w/o NMS)](https://paddledet.bj.bcebos.com/deploy/third_engine/picodet_l_640_lcnet_postproccesed_woNMS.onnx) &#124; [( w/o 后处理)](https://paddledet.bj.bcebos.com/deploy/third_engine/picodet_l_640_coco_lcnet.onnx) | 62.5ms |
- <a name="latency">测试环境:</a> 英特尔酷睿i7 10750H CPU。
person
bicycle
car
motorbike
aeroplane
bus
train
truck
boat
traffic light
fire hydrant
stop sign
parking meter
bench
bird
cat
dog
horse
sheep
cow
elephant
bear
zebra
giraffe
backpack
umbrella
handbag
tie
suitcase
frisbee
skis
snowboard
sports ball
kite
baseball bat
baseball glove
skateboard
surfboard
tennis racket
bottle
wine glass
cup
fork
knife
spoon
bowl
banana
apple
sandwich
orange
broccoli
carrot
hot dog
pizza
donut
cake
chair
sofa
pottedplant
bed
diningtable
toilet
tvmonitor
laptop
mouse
remote
keyboard
cell phone
microwave
oven
toaster
sink
refrigerator
book
clock
vase
scissors
teddy bear
hair drier
toothbrush
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import cv2
import numpy as np
import time
import argparse
from scipy.special import softmax
from openvino.runtime import Core
def image_preprocess(img_path, re_shape):
img = cv2.imread(img_path)
img = cv2.resize(
img, (re_shape, re_shape), interpolation=cv2.INTER_LANCZOS4)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = np.transpose(img, [2, 0, 1]) / 255
img = np.expand_dims(img, 0)
img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1))
img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1))
img -= img_mean
img /= img_std
return img.astype(np.float32)
def draw_box(img, results, class_label, scale_x, scale_y):
label_list = list(
map(lambda x: x.strip(), open(class_label, 'r').readlines()))
for i in range(len(results)):
print(label_list[int(results[i][0])], ':', results[i][1])
bbox = results[i, 2:]
label_id = int(results[i, 0])
score = results[i, 1]
if (score > 0.20):
xmin, ymin, xmax, ymax = [
int(bbox[0] * scale_x), int(bbox[1] * scale_y),
int(bbox[2] * scale_x), int(bbox[3] * scale_y)
]
cv2.rectangle(img, (xmin, ymin), (xmax, ymax), (0, 255, 0), 3)
font = cv2.FONT_HERSHEY_SIMPLEX
label_text = label_list[label_id]
cv2.rectangle(img, (xmin, ymin), (xmax, ymin - 60), (0, 255, 0), -1)
cv2.putText(img, "#" + label_text, (xmin, ymin - 10), font, 1,
(255, 255, 255), 2, cv2.LINE_AA)
cv2.putText(img,
str(round(score, 3)), (xmin, ymin - 40), font, 0.8,
(255, 255, 255), 2, cv2.LINE_AA)
return img
def hard_nms(box_scores, iou_threshold, top_k=-1, candidate_size=200):
"""
Args:
box_scores (N, 5): boxes in corner-form and probabilities.
iou_threshold: intersection over union threshold.
top_k: keep top_k results. If k <= 0, keep all the results.
candidate_size: only consider the candidates with the highest scores.
Returns:
picked: a list of indexes of the kept boxes
"""
scores = box_scores[:, -1]
boxes = box_scores[:, :-1]
picked = []
indexes = np.argsort(scores)
indexes = indexes[-candidate_size:]
while len(indexes) > 0:
current = indexes[-1]
picked.append(current)
if 0 < top_k == len(picked) or len(indexes) == 1:
break
current_box = boxes[current, :]
indexes = indexes[:-1]
rest_boxes = boxes[indexes, :]
iou = iou_of(
rest_boxes,
np.expand_dims(
current_box, axis=0), )
indexes = indexes[iou <= iou_threshold]
return box_scores[picked, :]
def iou_of(boxes0, boxes1, eps=1e-5):
"""Return intersection-over-union (Jaccard index) of boxes.
Args:
boxes0 (N, 4): ground truth boxes.
boxes1 (N or 1, 4): predicted boxes.
eps: a small number to avoid 0 as denominator.
Returns:
iou (N): IoU values.
"""
overlap_left_top = np.maximum(boxes0[..., :2], boxes1[..., :2])
overlap_right_bottom = np.minimum(boxes0[..., 2:], boxes1[..., 2:])
overlap_area = area_of(overlap_left_top, overlap_right_bottom)
area0 = area_of(boxes0[..., :2], boxes0[..., 2:])
area1 = area_of(boxes1[..., :2], boxes1[..., 2:])
return overlap_area / (area0 + area1 - overlap_area + eps)
def area_of(left_top, right_bottom):
"""Compute the areas of rectangles given two corners.
Args:
left_top (N, 2): left top corner.
right_bottom (N, 2): right bottom corner.
Returns:
area (N): return the area.
"""
hw = np.clip(right_bottom - left_top, 0.0, None)
return hw[..., 0] * hw[..., 1]
class PicoDetPostProcess(object):
"""
Args:
input_shape (int): network input image size
ori_shape (int): ori image shape of before padding
scale_factor (float): scale factor of ori image
enable_mkldnn (bool): whether to open MKLDNN
"""
def __init__(self,
input_shape,
ori_shape,
scale_factor,
strides=[8, 16, 32, 64],
score_threshold=0.4,
nms_threshold=0.5,
nms_top_k=1000,
keep_top_k=100):
self.ori_shape = ori_shape
self.input_shape = input_shape
self.scale_factor = scale_factor
self.strides = strides
self.score_threshold = score_threshold
self.nms_threshold = nms_threshold
self.nms_top_k = nms_top_k
self.keep_top_k = keep_top_k
def warp_boxes(self, boxes, ori_shape):
"""Apply transform to boxes
"""
width, height = ori_shape[1], ori_shape[0]
n = len(boxes)
if n:
# warp points
xy = np.ones((n * 4, 3))
xy[:, :2] = boxes[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(
n * 4, 2) # x1y1, x2y2, x1y2, x2y1
# xy = xy @ M.T # transform
xy = (xy[:, :2] / xy[:, 2:3]).reshape(n, 8) # rescale
# create new boxes
x = xy[:, [0, 2, 4, 6]]
y = xy[:, [1, 3, 5, 7]]
xy = np.concatenate(
(x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
# clip boxes
xy[:, [0, 2]] = xy[:, [0, 2]].clip(0, width)
xy[:, [1, 3]] = xy[:, [1, 3]].clip(0, height)
return xy.astype(np.float32)
else:
return boxes
def __call__(self, scores, raw_boxes):
batch_size = raw_boxes[0].shape[0]
reg_max = int(raw_boxes[0].shape[-1] / 4 - 1)
out_boxes_num = []
out_boxes_list = []
for batch_id in range(batch_size):
# generate centers
decode_boxes = []
select_scores = []
for stride, box_distribute, score in zip(self.strides, raw_boxes,
scores):
box_distribute = box_distribute[batch_id]
score = score[batch_id]
# centers
fm_h = self.input_shape[0] / stride
fm_w = self.input_shape[1] / stride
h_range = np.arange(fm_h)
w_range = np.arange(fm_w)
ww, hh = np.meshgrid(w_range, h_range)
ct_row = (hh.flatten() + 0.5) * stride
ct_col = (ww.flatten() + 0.5) * stride
center = np.stack((ct_col, ct_row, ct_col, ct_row), axis=1)
# box distribution to distance
reg_range = np.arange(reg_max + 1)
box_distance = box_distribute.reshape((-1, reg_max + 1))
box_distance = softmax(box_distance, axis=1)
box_distance = box_distance * np.expand_dims(reg_range, axis=0)
box_distance = np.sum(box_distance, axis=1).reshape((-1, 4))
box_distance = box_distance * stride
# top K candidate
topk_idx = np.argsort(score.max(axis=1))[::-1]
topk_idx = topk_idx[:self.nms_top_k]
center = center[topk_idx]
score = score[topk_idx]
box_distance = box_distance[topk_idx]
# decode box
decode_box = center + [-1, -1, 1, 1] * box_distance
select_scores.append(score)
decode_boxes.append(decode_box)
# nms
bboxes = np.concatenate(decode_boxes, axis=0)
confidences = np.concatenate(select_scores, axis=0)
picked_box_probs = []
picked_labels = []
for class_index in range(0, confidences.shape[1]):
probs = confidences[:, class_index]
mask = probs > self.score_threshold
probs = probs[mask]
if probs.shape[0] == 0:
continue
subset_boxes = bboxes[mask, :]
box_probs = np.concatenate(
[subset_boxes, probs.reshape(-1, 1)], axis=1)
box_probs = hard_nms(
box_probs,
iou_threshold=self.nms_threshold,
top_k=self.keep_top_k, )
picked_box_probs.append(box_probs)
picked_labels.extend([class_index] * box_probs.shape[0])
if len(picked_box_probs) == 0:
out_boxes_list.append(np.empty((0, 4)))
out_boxes_num.append(0)
else:
picked_box_probs = np.concatenate(picked_box_probs)
# resize output boxes
picked_box_probs[:, :4] = self.warp_boxes(
picked_box_probs[:, :4], self.ori_shape[batch_id])
im_scale = np.concatenate([
self.scale_factor[batch_id][::-1],
self.scale_factor[batch_id][::-1]
])
picked_box_probs[:, :4] /= im_scale
# clas score box
out_boxes_list.append(
np.concatenate(
[
np.expand_dims(
np.array(picked_labels),
axis=-1), np.expand_dims(
picked_box_probs[:, 4], axis=-1),
picked_box_probs[:, :4]
],
axis=1))
out_boxes_num.append(len(picked_labels))
out_boxes_list = np.concatenate(out_boxes_list, axis=0)
out_boxes_num = np.asarray(out_boxes_num).astype(np.int32)
return out_boxes_list, out_boxes_num
def detect(img_file, compiled_model, re_shape, class_label):
output = compiled_model.infer_new_request({0: test_image})
result_ie = list(output.values()) #[0]
test_im_shape = np.array([[re_shape, re_shape]]).astype('float32')
test_scale_factor = np.array([[1, 1]]).astype('float32')
np_score_list = []
np_boxes_list = []
num_outs = int(len(result_ie) / 2)
for out_idx in range(num_outs):
np_score_list.append(result_ie[out_idx])
np_boxes_list.append(result_ie[out_idx + num_outs])
postprocess = PicoDetPostProcess(test_image.shape[2:], test_im_shape,
test_scale_factor)
np_boxes, np_boxes_num = postprocess(np_score_list, np_boxes_list)
image = cv2.imread(img_file, 1)
scale_x = image.shape[1] / test_image.shape[3]
scale_y = image.shape[0] / test_image.shape[2]
res_image = draw_box(image, np_boxes, class_label, scale_x, scale_y)
cv2.imwrite('res.jpg', res_image)
cv2.imshow("res", res_image)
cv2.waitKey()
def benchmark(test_image, compiled_model):
# benchmark
loop_num = 100
warm_up = 8
timeall = 0
time_min = float("inf")
time_max = float('-inf')
for i in range(loop_num + warm_up):
time0 = time.time()
#perform the inference step
output = compiled_model.infer_new_request({0: test_image})
time1 = time.time()
timed = time1 - time0
if i >= warm_up:
timeall = timeall + timed
time_min = min(time_min, timed)
time_max = max(time_max, timed)
time_avg = timeall / loop_num
print('inference_time(ms): min={}, max={}, avg={}'.format(
round(time_min * 1000, 2),
round(time_max * 1000, 1), round(time_avg * 1000, 1)))
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument(
'--benchmark', type=int, default=1, help="0:detect; 1:benchmark")
parser.add_argument(
'--img_path',
type=str,
default='../../../../demo/000000014439.jpg',
help="image path")
parser.add_argument(
'--onnx_path',
type=str,
default='out_onnxsim/picodet_s_320_processed.onnx',
help="onnx filepath")
parser.add_argument('--in_shape', type=int, default=320, help="input_size")
parser.add_argument(
'--class_label',
type=str,
default='coco_label.txt',
help="class label file")
args = parser.parse_args()
ie = Core()
net = ie.read_model(args.onnx_path)
test_image = image_preprocess(args.img_path, args.in_shape)
compiled_model = ie.compile_model(net, 'CPU')
if args.benchmark == 0:
detect(args.img_path, compiled_model, args.in_shape, args.class_label)
if args.benchmark == 1:
benchmark(test_image, compiled_model)
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import cv2
import numpy as np
import argparse
from scipy.special import softmax
from openvino.runtime import Core
def image_preprocess(img_path, re_shape):
img = cv2.imread(img_path)
img = cv2.resize(
img, (re_shape, re_shape), interpolation=cv2.INTER_LANCZOS4)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = np.transpose(img, [2, 0, 1]) / 255
img = np.expand_dims(img, 0)
img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1))
img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1))
img -= img_mean
img /= img_std
return img.astype(np.float32)
def get_color_map_list(num_classes):
color_map = num_classes * [0, 0, 0]
for i in range(0, num_classes):
j = 0
lab = i
while lab:
color_map[i * 3] |= (((lab >> 0) & 1) << (7 - j))
color_map[i * 3 + 1] |= (((lab >> 1) & 1) << (7 - j))
color_map[i * 3 + 2] |= (((lab >> 2) & 1) << (7 - j))
j += 1
lab >>= 3
color_map = [color_map[i:i + 3] for i in range(0, len(color_map), 3)]
return color_map
def draw_box(srcimg, results, class_label):
label_list = list(
map(lambda x: x.strip(), open(class_label, 'r').readlines()))
for i in range(len(results)):
color_list = get_color_map_list(len(label_list))
clsid2color = {}
classid, conf = int(results[i, 0]), results[i, 1]
xmin, ymin, xmax, ymax = int(results[i, 2]), int(results[i, 3]), int(
results[i, 4]), int(results[i, 5])
if classid not in clsid2color:
clsid2color[classid] = color_list[classid]
color = tuple(clsid2color[classid])
cv2.rectangle(srcimg, (xmin, ymin), (xmax, ymax), color, thickness=2)
print(label_list[classid] + ': ' + str(round(conf, 3)))
cv2.putText(
srcimg,
label_list[classid] + ':' + str(round(conf, 3)), (xmin, ymin - 10),
cv2.FONT_HERSHEY_SIMPLEX,
0.8, (0, 255, 0),
thickness=2)
return srcimg
def hard_nms(box_scores, iou_threshold, top_k=-1, candidate_size=200):
"""
Args:
box_scores (N, 5): boxes in corner-form and probabilities.
iou_threshold: intersection over union threshold.
top_k: keep top_k results. If k <= 0, keep all the results.
candidate_size: only consider the candidates with the highest scores.
Returns:
picked: a list of indexes of the kept boxes
"""
scores = box_scores[:, -1]
boxes = box_scores[:, :-1]
picked = []
indexes = np.argsort(scores)
indexes = indexes[-candidate_size:]
while len(indexes) > 0:
current = indexes[-1]
picked.append(current)
if 0 < top_k == len(picked) or len(indexes) == 1:
break
current_box = boxes[current, :]
indexes = indexes[:-1]
rest_boxes = boxes[indexes, :]
iou = iou_of(
rest_boxes,
np.expand_dims(
current_box, axis=0), )
indexes = indexes[iou <= iou_threshold]
return box_scores[picked, :]
def iou_of(boxes0, boxes1, eps=1e-5):
"""Return intersection-over-union (Jaccard index) of boxes.
Args:
boxes0 (N, 4): ground truth boxes.
boxes1 (N or 1, 4): predicted boxes.
eps: a small number to avoid 0 as denominator.
Returns:
iou (N): IoU values.
"""
overlap_left_top = np.maximum(boxes0[..., :2], boxes1[..., :2])
overlap_right_bottom = np.minimum(boxes0[..., 2:], boxes1[..., 2:])
overlap_area = area_of(overlap_left_top, overlap_right_bottom)
area0 = area_of(boxes0[..., :2], boxes0[..., 2:])
area1 = area_of(boxes1[..., :2], boxes1[..., 2:])
return overlap_area / (area0 + area1 - overlap_area + eps)
def area_of(left_top, right_bottom):
"""Compute the areas of rectangles given two corners.
Args:
left_top (N, 2): left top corner.
right_bottom (N, 2): right bottom corner.
Returns:
area (N): return the area.
"""
hw = np.clip(right_bottom - left_top, 0.0, None)
return hw[..., 0] * hw[..., 1]
class PicoDetNMS(object):
"""
Args:
input_shape (int): network input image size
scale_factor (float): scale factor of ori image
"""
def __init__(self,
input_shape,
scale_x,
scale_y,
strides=[8, 16, 32, 64],
score_threshold=0.4,
nms_threshold=0.5,
nms_top_k=1000,
keep_top_k=100):
self.input_shape = input_shape
self.scale_x = scale_x
self.scale_y = scale_y
self.strides = strides
self.score_threshold = score_threshold
self.nms_threshold = nms_threshold
self.nms_top_k = nms_top_k
self.keep_top_k = keep_top_k
def __call__(self, decode_boxes, select_scores):
batch_size = 1
out_boxes_list = []
for batch_id in range(batch_size):
# nms
bboxes = np.concatenate(decode_boxes, axis=0)
confidences = np.concatenate(select_scores, axis=0)
picked_box_probs = []
picked_labels = []
for class_index in range(0, confidences.shape[1]):
probs = confidences[:, class_index]
mask = probs > self.score_threshold
probs = probs[mask]
if probs.shape[0] == 0:
continue
subset_boxes = bboxes[mask, :]
box_probs = np.concatenate(
[subset_boxes, probs.reshape(-1, 1)], axis=1)
box_probs = hard_nms(
box_probs,
iou_threshold=self.nms_threshold,
top_k=self.keep_top_k, )
picked_box_probs.append(box_probs)
picked_labels.extend([class_index] * box_probs.shape[0])
if len(picked_box_probs) == 0:
out_boxes_list.append(np.empty((0, 4)))
else:
picked_box_probs = np.concatenate(picked_box_probs)
# resize output boxes
picked_box_probs[:, 0] *= self.scale_x
picked_box_probs[:, 2] *= self.scale_x
picked_box_probs[:, 1] *= self.scale_y
picked_box_probs[:, 3] *= self.scale_y
# clas score box
out_boxes_list.append(
np.concatenate(
[
np.expand_dims(
np.array(picked_labels),
axis=-1), np.expand_dims(
picked_box_probs[:, 4], axis=-1),
picked_box_probs[:, :4]
],
axis=1))
out_boxes_list = np.concatenate(out_boxes_list, axis=0)
return out_boxes_list
def detect(img_file, compiled_model, class_label):
output = compiled_model.infer_new_request({0: test_image})
result_ie = list(output.values())
decode_boxes = []
select_scores = []
num_outs = int(len(result_ie) / 2)
for out_idx in range(num_outs):
decode_boxes.append(result_ie[out_idx])
select_scores.append(result_ie[out_idx + num_outs])
image = cv2.imread(img_file, 1)
scale_x = image.shape[1] / test_image.shape[3]
scale_y = image.shape[0] / test_image.shape[2]
nms = PicoDetNMS(test_image.shape[2:], scale_x, scale_y)
np_boxes = nms(decode_boxes, select_scores)
res_image = draw_box(image, np_boxes, class_label)
cv2.imwrite('res.jpg', res_image)
cv2.imshow("res", res_image)
cv2.waitKey()
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument(
'--img_path',
type=str,
default='../../demo_onnxruntime/imgs/bus.jpg',
help="image path")
parser.add_argument(
'--onnx_path',
type=str,
default='out_onnxsim_infer/picodet_s_320_postproccesed_woNMS.onnx',
help="onnx filepath")
parser.add_argument('--in_shape', type=int, default=320, help="input_size")
parser.add_argument(
'--class_label',
type=str,
default='coco_label.txt',
help="class label file")
args = parser.parse_args()
ie = Core()
net = ie.read_model(args.onnx_path)
test_image = image_preprocess(args.img_path, args.in_shape)
compiled_model = ie.compile_model(net, 'CPU')
detect(args.img_path, compiled_model, args.class_label)
cmake_minimum_required(VERSION 3.4.1)
set(CMAKE_CXX_STANDARD 14)
project(tinypose_demo)
find_package(OpenCV REQUIRED)
find_package(InferenceEngine REQUIRED)
find_package(ngraph REQUIRED)
include_directories(
${OpenCV_INCLUDE_DIRS}
${CMAKE_CURRENT_SOURCE_DIR}
${CMAKE_CURRENT_BINARY_DIR}
)
add_executable(tinypose_demo main.cpp picodet_openvino.cpp keypoint_detector.cpp keypoint_postprocess.cpp)
target_link_libraries(
tinypose_demo
${InferenceEngine_LIBRARIES}
${NGRAPH_LIBRARIES}
${OpenCV_LIBS}
)
# TinyPose OpenVINO Demo
This fold provides TinyPose inference code using
[Intel's OpenVINO Toolkit](https://software.intel.com/content/www/us/en/develop/tools/openvino-toolkit.html). Most of the implements in this fold are same as *demo_ncnn*.
**Recommand**
1. To use the xxx.tar.gz file to install instead of github method, [link](https://registrationcenter-download.intel.com/akdlm/irc_nas/18096/l_openvino_toolkit_p_2021.4.689.tgz).
2. Your can also deploy openvino with docker, the command is :
```
docker pull openvino/ubuntu18_dev:2021.4.1
```
## Install OpenVINO Toolkit
Go to [OpenVINO HomePage](https://software.intel.com/content/www/us/en/develop/tools/openvino-toolkit.html)
Download a suitable version and install.
Follow the official Get Started Guides: https://docs.openvinotoolkit.org/latest/get_started_guides.html
## Set the Environment Variables
### Windows:
Run this command in cmd. (Every time before using OpenVINO)
```cmd
<INSTSLL_DIR>\openvino_2021\bin\setupvars.bat
```
Or set the system environment variables once for all:
Name |Value
:--------------------:|:--------:
INTEL_OPENVINO_DIR | <INSTSLL_DIR>\openvino_2021
INTEL_CVSDK_DIR | %INTEL_OPENVINO_DIR%
InferenceEngine_DIR | %INTEL_OPENVINO_DIR%\deployment_tools\inference_engine\share
HDDL_INSTALL_DIR | %INTEL_OPENVINO_DIR%\deployment_tools\inference_engine\external\hddl
ngraph_DIR | %INTEL_OPENVINO_DIR%\deployment_tools\ngraph\cmake
And add this to ```Path```
```
%INTEL_OPENVINO_DIR%\deployment_tools\inference_engine\bin\intel64\Debug;%INTEL_OPENVINO_DIR%\deployment_tools\inference_engine\bin\intel64\Release;%HDDL_INSTALL_DIR%\bin;%INTEL_OPENVINO_DIR%\deployment_tools\inference_engine\external\tbb\bin;%INTEL_OPENVINO_DIR%\deployment_tools\ngraph\lib
```
### Linux
Run this command in shell. (Every time before using OpenVINO)
```shell
source /opt/intel/openvino_2021/bin/setupvars.sh
```
Or edit .bashrc
```shell
vi ~/.bashrc
```
Add this line to the end of the file
```shell
source /opt/intel/openvino_2021/bin/setupvars.sh
```
## Convert model
**1. Conver to onnx**
Create picodet_m_416_coco.onnx and tinypose256.onnx
example:
```shell
modelName=picodet_m_416_coco
# export model
python tools/export_model.py \
-c configs/picodet/${modelName}.yml \
-o weights=${modelName}.pdparams \
--output_dir=inference_model
# convert to onnx
paddle2onnx --model_dir inference_model/${modelName} \
--model_filename model.pdmodel \
--params_filename model.pdiparams \
--opset_version 11 \
--save_file ${modelName}.onnx
# onnxsim
python -m onnxsim ${modelName}.onnx ${modelName}_sim.onnx
```
**2.Convert to OpenVINO**
``` shell
cd <INSTSLL_DIR>/openvino_2021/deployment_tools/model_optimizer
```
Install requirements for convert tool
```shell
cd ./install_prerequisites
sudo install_prerequisites_onnx.sh
```
Then convert model. Notice: mean_values and scale_values should be the same with your training settings in YAML config file.
```shell
mo_onnx.py --input_model <ONNX_MODEL> --mean_values [103.53,116.28,123.675] --scale_values [57.375,57.12,58.395] --input_shape [1,3,256,192]
```
**Note: The new version of openvino convert tools may cause error in Resize op. If you has problem with this, please try the version: openvino_2021.4.689**
## Build
### Windows
```cmd
<OPENVINO_INSTSLL_DIR>\openvino_2021\bin\setupvars.bat
mkdir -p build
cd build
cmake ..
msbuild tinypose_demo.vcxproj /p:configuration=release /p:platform=x64
```
### Linux
```shell
source /opt/intel/openvino_2021/bin/setupvars.sh
mkdir build
cd build
cmake ..
make
```
## Run demo
Download PicoDet openvino model [PicoDet openvino model download link](https://paddledet.bj.bcebos.com/deploy/third_engine/picodet_m_416_openvino.zip).
Download TinyPose openvino model [TinyPose openvino model download link](https://bj.bcebos.com/v1/paddledet/deploy/third_engine/demo_openvino_kpts.tar.gz), the origin paddlepaddle model is [Tinypose256](https://bj.bcebos.com/v1/paddledet/models/keypoint/tinypose_enhance/tinypose_256x192.pdparams).
move picodet and tinypose openvino model files to the demo's weight folder.
Note:
1. The model output node name may update by new version of paddle\paddle2onnx\onnxsim\openvino, please checkout your own model output node when the code can't find "conv2d_441.tmp_1"\"argmax_0.tmp_0".
2. If you happened with this error "Cannot find blob with name: transpose_1.tmp_0", it means your picodet model is oldversion. you can modify the below code to fix it.
```
#picodet_openvino.h line 50-54
std::vector<HeadInfo> heads_info_{
// cls_pred|dis_pred|stride
{"transpose_0.tmp_0", "transpose_1.tmp_0", 8},
{"transpose_2.tmp_0", "transpose_3.tmp_0", 16},
{"transpose_4.tmp_0", "transpose_5.tmp_0", 32},
{"transpose_6.tmp_0", "transpose_7.tmp_0", 64},
};
modify to:
std::vector<HeadInfo> heads_info_{
// cls_pred|dis_pred|stride
{"save_infer_model/scale_0.tmp_1", "save_infer_model/scale_4.tmp_1", 8},
{"save_infer_model/scale_1.tmp_1", "save_infer_model/scale_5.tmp_1", 16},
{"save_infer_model/scale_2.tmp_1", "save_infer_model/scale_6.tmp_1", 32},
{"save_infer_model/scale_3.tmp_1", "save_infer_model/scale_7.tmp_1", 64},
};
```
3. you can view your onnx model with [Netron](https://netron.app/).
### Edit file
```
step1:
main.cpp
#define image_size 416
...
cv::Mat image(256, 192, CV_8UC3, cv::Scalar(1, 1, 1));
std::vector<float> center = {128, 96};
std::vector<float> scale = {256, 192};
...
auto detector = PicoDet("../weight/picodet_m_416.xml");
auto kpts_detector = new KeyPointDetector("../weight/tinypose256.xml", -1, 256, 192);
...
step2:
picodet_openvino.h
#define image_size 416
```
### Run
Run command:
``` shell
./tinypose_demo [mode] [image_file]
```
| param | detail |
| ---- | ---- |
| --mode | input mode,0:camera;1:image;2:video;3:benchmark |
| --image_file | input image path |
#### Webcam
```shell
tinypose_demo 0 0
```
#### Inference images
```shell
tinypose_demo 1 IMAGE_FOLDER/*.jpg
```
#### Inference video
```shell
tinypose_demo 2 VIDEO_PATH
```
### Benchmark
```shell
tinypose_demo 3 0
```
Plateform: Intel(R) Xeon(R) CPU E5-2650 v4 @ 2.20GHz x 24(核)
Model: [Tinypose256_Openvino](https://paddledet.bj.bcebos.com/deploy/third_engine/tinypose_256_openvino.zip)
| param | Min | Max | Avg |
| ------------- | ----- | ----- | ----- |
| infer time(s) | 0.018 | 0.062 | 0.028 |
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <sstream>
// for setprecision
#include <chrono>
#include <iomanip>
#include "keypoint_detector.h"
namespace PaddleDetection {
// Visualiztion MaskDetector results
cv::Mat VisualizeKptsResult(const cv::Mat& img,
const std::vector<KeyPointResult>& results,
const std::vector<int>& colormap,
float threshold) {
const int edge[][2] = {{0, 1},
{0, 2},
{1, 3},
{2, 4},
{3, 5},
{4, 6},
{5, 7},
{6, 8},
{7, 9},
{8, 10},
{5, 11},
{6, 12},
{11, 13},
{12, 14},
{13, 15},
{14, 16},
{11, 12}};
cv::Mat vis_img = img.clone();
for (int batchid = 0; batchid < results.size(); batchid++) {
for (int i = 0; i < results[batchid].num_joints; i++) {
if (results[batchid].keypoints[i * 3] > threshold) {
int x_coord = int(results[batchid].keypoints[i * 3 + 1]);
int y_coord = int(results[batchid].keypoints[i * 3 + 2]);
cv::circle(vis_img,
cv::Point2d(x_coord, y_coord),
1,
cv::Scalar(0, 0, 255),
2);
}
}
for (int i = 0; i < results[batchid].num_joints; i++) {
if (results[batchid].keypoints[edge[i][0] * 3] > threshold &&
results[batchid].keypoints[edge[i][1] * 3] > threshold) {
int x_start = int(results[batchid].keypoints[edge[i][0] * 3 + 1]);
int y_start = int(results[batchid].keypoints[edge[i][0] * 3 + 2]);
int x_end = int(results[batchid].keypoints[edge[i][1] * 3 + 1]);
int y_end = int(results[batchid].keypoints[edge[i][1] * 3 + 2]);
cv::line(vis_img,
cv::Point2d(x_start, y_start),
cv::Point2d(x_end, y_end),
colormap[i],
1);
}
}
}
return vis_img;
}
void KeyPointDetector::Postprocess(std::vector<float>& output,
std::vector<uint64_t>& output_shape,
std::vector<float>& idxout,
std::vector<uint64_t>& idx_shape,
std::vector<KeyPointResult>* result,
std::vector<std::vector<float>>& center_bs,
std::vector<std::vector<float>>& scale_bs) {
std::vector<float> preds(output_shape[1] * 3, 0);
for (int batchid = 0; batchid < output_shape[0]; batchid++) {
get_final_preds(output,
output_shape,
idxout,
idx_shape,
center_bs[batchid],
scale_bs[batchid],
preds,
batchid,
this->use_dark());
KeyPointResult result_item;
result_item.num_joints = output_shape[1];
result_item.keypoints.clear();
for (int i = 0; i < output_shape[1]; i++) {
result_item.keypoints.emplace_back(preds[i * 3]);
result_item.keypoints.emplace_back(preds[i * 3 + 1]);
result_item.keypoints.emplace_back(preds[i * 3 + 2]);
}
result->push_back(result_item);
}
}
void KeyPointDetector::Predict(const std::vector<cv::Mat> imgs,
std::vector<std::vector<float>>& center_bs,
std::vector<std::vector<float>>& scale_bs,
std::vector<KeyPointResult>* result) {
int batch_size = imgs.size();
auto insize = 3 * in_h * in_w;
InferenceEngine::Blob::Ptr input_blob = infer_request_.GetBlob(input_name_);
// Preprocess image
InferenceEngine::MemoryBlob::Ptr mblob =
InferenceEngine::as<InferenceEngine::MemoryBlob>(input_blob);
if (!mblob) {
THROW_IE_EXCEPTION
<< "We expect blob to be inherited from MemoryBlob in matU8ToBlob, "
<< "but by fact we were not able to cast inputBlob to MemoryBlob";
}
auto mblobHolder = mblob->wmap();
float* blob_data = mblobHolder.as<float*>();
cv::Mat resized_im;
for (int bs_idx = 0; bs_idx < batch_size; bs_idx++) {
cv::Mat im = imgs.at(bs_idx);
cv::resize(im, resized_im, cv::Size(in_w, in_h));
for (size_t c = 0; c < 3; c++) {
for (size_t h = 0; h < in_h; h++) {
for (size_t w = 0; w < in_w; w++) {
blob_data[c * in_w * in_h + h * in_w + w] =
(float)resized_im.at<cv::Vec3b>(h, w)[c];
}
}
}
}
// Run predictor
auto inference_start = std::chrono::steady_clock::now();
// do inference
infer_request_.Infer();
InferenceEngine::Blob::Ptr output_blob =
infer_request_.GetBlob("conv2d_441.tmp_1");
auto output_shape = output_blob->getTensorDesc().getDims();
InferenceEngine::MemoryBlob::Ptr moutput =
InferenceEngine::as<InferenceEngine::MemoryBlob>(output_blob);
if (moutput) {
// locked memory holder should be alive all time while access to its
// buffer happens
auto minputHolder = moutput->rmap();
auto data = minputHolder.as<const InferenceEngine::PrecisionTrait<
InferenceEngine::Precision::FP32>::value_type*>();
// Calculate output length
int output_size = 1;
for (int j = 0; j < output_shape.size(); ++j) {
output_size *= output_shape[j];
}
output_data_.resize(output_size);
std::copy_n(data, output_size, output_data_.data());
}
InferenceEngine::Blob::Ptr output_blob2 =
infer_request_.GetBlob("argmax_0.tmp_0");
auto idx_shape = output_blob2->getTensorDesc().getDims();
InferenceEngine::MemoryBlob::Ptr moutput2 =
InferenceEngine::as<InferenceEngine::MemoryBlob>(output_blob2);
if (moutput2) {
// locked memory holder should be alive all time while access to its
// buffer happens
auto minputHolder = moutput2->rmap();
// Original I64 precision was converted to I32
auto data = minputHolder.as<const InferenceEngine::PrecisionTrait<
InferenceEngine::Precision::FP32>::value_type*>();
// Calculate output length
int output_size = 1;
for (int j = 0; j < idx_shape.size(); ++j) {
output_size *= idx_shape[j];
}
idx_data_.resize(output_size);
std::copy_n(data, output_size, idx_data_.data());
}
auto inference_end = std::chrono::steady_clock::now();
std::chrono::duration<double> elapsed = inference_end - inference_start;
printf("keypoint inference time: %f s\n", elapsed.count());
// Postprocessing result
Postprocess(output_data_,
output_shape,
idx_data_,
idx_shape,
result,
center_bs,
scale_bs);
}
} // namespace PaddleDetection
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <ctime>
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <inference_engine.hpp>
#include "keypoint_postprocess.h"
namespace PaddleDetection {
// Object KeyPoint Result
struct KeyPointResult {
// Keypoints: shape(N x 3); N: number of Joints; 3: x,y,conf
std::vector<float> keypoints;
int num_joints = -1;
};
// Visualiztion KeyPoint Result
cv::Mat VisualizeKptsResult(const cv::Mat& img,
const std::vector<KeyPointResult>& results,
const std::vector<int>& colormap,
float threshold = 0.2);
class KeyPointDetector {
public:
explicit KeyPointDetector(const std::string& model_path,
int input_height = 256,
int input_width = 192,
float score_threshold = 0.3,
const int batch_size = 1,
bool use_dark = true) {
use_dark_ = use_dark;
in_w = input_width;
in_h = input_height;
threshold_ = score_threshold;
InferenceEngine::Core ie;
auto model = ie.ReadNetwork(model_path);
// prepare input settings
InferenceEngine::InputsDataMap inputs_map(model.getInputsInfo());
input_name_ = inputs_map.begin()->first;
InferenceEngine::InputInfo::Ptr input_info = inputs_map.begin()->second;
// prepare output settings
InferenceEngine::OutputsDataMap outputs_map(model.getOutputsInfo());
int idx = 0;
for (auto& output_info : outputs_map) {
if (idx == 0) {
output_info.second->setPrecision(InferenceEngine::Precision::FP32);
} else {
output_info.second->setPrecision(InferenceEngine::Precision::FP32);
}
idx++;
}
// get network
network_ = ie.LoadNetwork(model, "CPU");
infer_request_ = network_.CreateInferRequest();
}
// Load Paddle inference model
void LoadModel(std::string model_file, int num_theads);
// Run predictor
void Predict(const std::vector<cv::Mat> imgs,
std::vector<std::vector<float>>& center,
std::vector<std::vector<float>>& scale,
std::vector<KeyPointResult>* result = nullptr);
bool use_dark() { return this->use_dark_; }
inline float get_threshold() { return threshold_; };
int in_w = 128;
int in_h = 256;
private:
// Postprocess result
void Postprocess(std::vector<float>& output,
std::vector<uint64_t>& output_shape,
std::vector<float>& idxout,
std::vector<uint64_t>& idx_shape,
std::vector<KeyPointResult>* result,
std::vector<std::vector<float>>& center,
std::vector<std::vector<float>>& scale);
std::vector<float> output_data_;
std::vector<float> idx_data_;
float threshold_;
bool use_dark_;
InferenceEngine::ExecutableNetwork network_;
InferenceEngine::InferRequest infer_request_;
std::string input_name_;
};
} // namespace PaddleDetection
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "keypoint_postprocess.h"
#define PI 3.1415926535
#define HALF_CIRCLE_DEGREE 180
cv::Point2f get_3rd_point(cv::Point2f& a, cv::Point2f& b) {
cv::Point2f direct{a.x - b.x, a.y - b.y};
return cv::Point2f(a.x - direct.y, a.y + direct.x);
}
std::vector<float> get_dir(float src_point_x,
float src_point_y,
float rot_rad) {
float sn = sin(rot_rad);
float cs = cos(rot_rad);
std::vector<float> src_result{0.0, 0.0};
src_result[0] = src_point_x * cs - src_point_y * sn;
src_result[1] = src_point_x * sn + src_point_y * cs;
return src_result;
}
void affine_tranform(
float pt_x, float pt_y, cv::Mat& trans, std::vector<float>& preds, int p) {
double new1[3] = {pt_x, pt_y, 1.0};
cv::Mat new_pt(3, 1, trans.type(), new1);
cv::Mat w = trans * new_pt;
preds[p * 3 + 1] = static_cast<float>(w.at<double>(0, 0));
preds[p * 3 + 2] = static_cast<float>(w.at<double>(1, 0));
}
void get_affine_transform(std::vector<float>& center,
std::vector<float>& scale,
float rot,
std::vector<int>& output_size,
cv::Mat& trans,
int inv) {
float src_w = scale[0];
float dst_w = static_cast<float>(output_size[0]);
float dst_h = static_cast<float>(output_size[1]);
float rot_rad = rot * PI / HALF_CIRCLE_DEGREE;
std::vector<float> src_dir = get_dir(-0.5 * src_w, 0, rot_rad);
std::vector<float> dst_dir{static_cast<float>(-0.5) * dst_w, 0.0};
cv::Point2f srcPoint2f[3], dstPoint2f[3];
srcPoint2f[0] = cv::Point2f(center[0], center[1]);
srcPoint2f[1] = cv::Point2f(center[0] + src_dir[0], center[1] + src_dir[1]);
srcPoint2f[2] = get_3rd_point(srcPoint2f[0], srcPoint2f[1]);
dstPoint2f[0] = cv::Point2f(dst_w * 0.5, dst_h * 0.5);
dstPoint2f[1] =
cv::Point2f(dst_w * 0.5 + dst_dir[0], dst_h * 0.5 + dst_dir[1]);
dstPoint2f[2] = get_3rd_point(dstPoint2f[0], dstPoint2f[1]);
if (inv == 0) {
trans = cv::getAffineTransform(srcPoint2f, dstPoint2f);
} else {
trans = cv::getAffineTransform(dstPoint2f, srcPoint2f);
}
}
void transform_preds(std::vector<float>& coords,
std::vector<float>& center,
std::vector<float>& scale,
std::vector<int>& output_size,
std::vector<uint64_t>& dim,
std::vector<float>& target_coords,
bool affine=false) {
if (affine) {
cv::Mat trans(2, 3, CV_64FC1);
get_affine_transform(center, scale, 0, output_size, trans, 1);
for (int p = 0; p < dim[1]; ++p) {
affine_tranform(
coords[p * 2], coords[p * 2 + 1], trans, target_coords, p);
}
} else {
float heat_w = static_cast<float>(output_size[0]);
float heat_h = static_cast<float>(output_size[1]);
float x_scale = scale[0] / heat_w;
float y_scale = scale[1] / heat_h;
float offset_x = center[0] - scale[0] / 2.;
float offset_y = center[1] - scale[1] / 2.;
for (int i = 0; i < dim[1]; i++) {
target_coords[i * 3 + 1] = x_scale * coords[i * 2] + offset_x;
target_coords[i * 3 + 2] = y_scale * coords[i * 2 + 1] + offset_y;
}
}
}
// only for batchsize == 1
void get_max_preds(std::vector<float>& heatmap,
std::vector<int>& dim,
std::vector<float>& preds,
std::vector<float>& maxvals,
int batchid,
int joint_idx) {
int num_joints = dim[1];
int width = dim[3];
std::vector<int> idx;
idx.resize(num_joints * 2);
for (int j = 0; j < dim[1]; j++) {
float* index = &(
heatmap[batchid * num_joints * dim[2] * dim[3] + j * dim[2] * dim[3]]);
float* end = index + dim[2] * dim[3];
float* max_dis = std::max_element(index, end);
auto max_id = std::distance(index, max_dis);
maxvals[j] = *max_dis;
if (*max_dis > 0) {
preds[j * 2] = static_cast<float>(max_id % width);
preds[j * 2 + 1] = static_cast<float>(max_id / width);
}
}
}
void dark_parse(std::vector<float>& heatmap,
std::vector<uint64_t>& dim,
std::vector<float>& coords,
int px,
int py,
int index,
int ch) {
/*DARK postpocessing, Zhang et al. Distribution-Aware Coordinate
Representation for Human Pose Estimation (CVPR 2020).
1) offset = - hassian.inv() * derivative
2) dx = (heatmap[x+1] - heatmap[x-1])/2.
3) dxx = (dx[x+1] - dx[x-1])/2.
4) derivative = Mat([dx, dy])
5) hassian = Mat([[dxx, dxy], [dxy, dyy]])
*/
std::vector<float>::const_iterator first1 = heatmap.begin() + index;
std::vector<float>::const_iterator last1 =
heatmap.begin() + index + dim[2] * dim[3];
std::vector<float> heatmap_ch(first1, last1);
cv::Mat heatmap_mat = cv::Mat(heatmap_ch).reshape(0, dim[2]);
heatmap_mat.convertTo(heatmap_mat, CV_32FC1);
cv::GaussianBlur(heatmap_mat, heatmap_mat, cv::Size(3, 3), 0, 0);
heatmap_mat = heatmap_mat.reshape(1, 1);
heatmap_ch = std::vector<float>(heatmap_mat.reshape(1, 1));
float epsilon = 1e-10;
// sample heatmap to get values in around target location
float xy = log(fmax(heatmap_ch[py * dim[3] + px], epsilon));
float xr = log(fmax(heatmap_ch[py * dim[3] + px + 1], epsilon));
float xl = log(fmax(heatmap_ch[py * dim[3] + px - 1], epsilon));
float xr2 = log(fmax(heatmap_ch[py * dim[3] + px + 2], epsilon));
float xl2 = log(fmax(heatmap_ch[py * dim[3] + px - 2], epsilon));
float yu = log(fmax(heatmap_ch[(py + 1) * dim[3] + px], epsilon));
float yd = log(fmax(heatmap_ch[(py - 1) * dim[3] + px], epsilon));
float yu2 = log(fmax(heatmap_ch[(py + 2) * dim[3] + px], epsilon));
float yd2 = log(fmax(heatmap_ch[(py - 2) * dim[3] + px], epsilon));
float xryu = log(fmax(heatmap_ch[(py + 1) * dim[3] + px + 1], epsilon));
float xryd = log(fmax(heatmap_ch[(py - 1) * dim[3] + px + 1], epsilon));
float xlyu = log(fmax(heatmap_ch[(py + 1) * dim[3] + px - 1], epsilon));
float xlyd = log(fmax(heatmap_ch[(py - 1) * dim[3] + px - 1], epsilon));
// compute dx/dy and dxx/dyy with sampled values
float dx = 0.5 * (xr - xl);
float dy = 0.5 * (yu - yd);
float dxx = 0.25 * (xr2 - 2 * xy + xl2);
float dxy = 0.25 * (xryu - xryd - xlyu + xlyd);
float dyy = 0.25 * (yu2 - 2 * xy + yd2);
// finally get offset by derivative and hassian, which combined by dx/dy and
// dxx/dyy
if (dxx * dyy - dxy * dxy != 0) {
float M[2][2] = {dxx, dxy, dxy, dyy};
float D[2] = {dx, dy};
cv::Mat hassian(2, 2, CV_32F, M);
cv::Mat derivative(2, 1, CV_32F, D);
cv::Mat offset = -hassian.inv() * derivative;
coords[ch * 2] += offset.at<float>(0, 0);
coords[ch * 2 + 1] += offset.at<float>(1, 0);
}
}
void get_final_preds(std::vector<float>& heatmap,
std::vector<uint64_t>& dim,
std::vector<float>& idxout,
std::vector<uint64_t>& idxdim,
std::vector<float>& center,
std::vector<float> scale,
std::vector<float>& preds,
int batchid,
bool DARK) {
std::vector<float> coords;
coords.resize(dim[1] * 2);
int heatmap_height = dim[2];
int heatmap_width = dim[3];
for (int j = 0; j < dim[1]; ++j) {
int index = (batchid * dim[1] + j) * dim[2] * dim[3];
int idx = int(idxout[batchid * dim[1] + j]);
preds[j * 3] = heatmap[index + idx];
coords[j * 2] = idx % heatmap_width;
coords[j * 2 + 1] = idx / heatmap_width;
int px = int(coords[j * 2] + 0.5);
int py = int(coords[j * 2 + 1] + 0.5);
if (DARK && px > 1 && px < heatmap_width - 2 && py > 1 &&
py < heatmap_height - 2) {
dark_parse(heatmap, dim, coords, px, py, index, j);
} else {
if (px > 0 && px < heatmap_width - 1) {
float diff_x = heatmap[index + py * dim[3] + px + 1] -
heatmap[index + py * dim[3] + px - 1];
coords[j * 2] += diff_x > 0 ? 1 : -1 * 0.25;
}
if (py > 0 && py < heatmap_height - 1) {
float diff_y = heatmap[index + (py + 1) * dim[3] + px] -
heatmap[index + (py - 1) * dim[3] + px];
coords[j * 2 + 1] += diff_y > 0 ? 1 : -1 * 0.25;
}
}
}
std::vector<int> img_size{heatmap_width, heatmap_height};
transform_preds(coords, center, scale, img_size, dim, preds);
}
void CropImg(cv::Mat& img,
cv::Mat& crop_img,
std::vector<int>& area,
std::vector<float>& center,
std::vector<float>& scale,
float expandratio) {
int crop_x1 = std::max(0, area[0]);
int crop_y1 = std::max(0, area[1]);
int crop_x2 = std::min(img.cols - 1, area[2]);
int crop_y2 = std::min(img.rows - 1, area[3]);
int center_x = (crop_x1 + crop_x2) / 2.;
int center_y = (crop_y1 + crop_y2) / 2.;
int half_h = (crop_y2 - crop_y1) / 2.;
int half_w = (crop_x2 - crop_x1) / 2.;
if (half_h * 3 > half_w * 4) {
half_w = static_cast<int>(half_h * 0.75);
} else {
half_h = static_cast<int>(half_w * 4 / 3);
}
crop_x1 =
std::max(0, center_x - static_cast<int>(half_w * (1 + expandratio)));
crop_y1 =
std::max(0, center_y - static_cast<int>(half_h * (1 + expandratio)));
crop_x2 = std::min(img.cols - 1,
static_cast<int>(center_x + half_w * (1 + expandratio)));
crop_y2 = std::min(img.rows - 1,
static_cast<int>(center_y + half_h * (1 + expandratio)));
crop_img =
img(cv::Range(crop_y1, crop_y2 + 1), cv::Range(crop_x1, crop_x2 + 1));
center.clear();
center.emplace_back((crop_x1 + crop_x2) / 2);
center.emplace_back((crop_y1 + crop_y2) / 2);
scale.clear();
scale.emplace_back((crop_x2 - crop_x1));
scale.emplace_back((crop_y2 - crop_y1));
}
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <vector>
std::vector<float> get_3rd_point(std::vector<float>& a, std::vector<float>& b);
std::vector<float> get_dir(float src_point_x, float src_point_y, float rot_rad);
void affine_tranform(float pt_x,
float pt_y,
cv::Mat& trans,
std::vector<float>& x,
int p,
int num);
cv::Mat get_affine_transform(std::vector<float>& center,
std::vector<float>& scale,
float rot,
std::vector<int>& output_size,
int inv);
void transform_preds(std::vector<float>& coords,
std::vector<float>& center,
std::vector<float>& scale,
std::vector<uint64_t>& output_size,
std::vector<int>& dim,
std::vector<float>& target_coords,
bool affine);
void box_to_center_scale(std::vector<int>& box,
int width,
int height,
std::vector<float>& center,
std::vector<float>& scale);
void get_max_preds(std::vector<float>& heatmap,
std::vector<int>& dim,
std::vector<float>& preds,
std::vector<float>& maxvals,
int batchid,
int joint_idx);
void get_final_preds(std::vector<float>& heatmap,
std::vector<uint64_t>& dim,
std::vector<float>& idxout,
std::vector<uint64_t>& idxdim,
std::vector<float>& center,
std::vector<float> scale,
std::vector<float>& preds,
int batchid,
bool DARK = true);
void CropImg(cv::Mat& img,
cv::Mat& crop_img,
std::vector<int>& area,
std::vector<float>& center,
std::vector<float>& scale,
float expandratio = 0.25);
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// reference from https://github.com/RangiLyu/nanodet
#include <iostream>
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#define image_size 416
#include "keypoint_detector.h"
#include "picodet_openvino.h"
using namespace PaddleDetection;
struct object_rect {
int x;
int y;
int width;
int height;
};
int resize_uniform(cv::Mat& src,
cv::Mat& dst,
cv::Size dst_size,
object_rect& effect_area) {
int w = src.cols;
int h = src.rows;
int dst_w = dst_size.width;
int dst_h = dst_size.height;
dst = cv::Mat(cv::Size(dst_w, dst_h), CV_8UC3, cv::Scalar(0));
float ratio_src = w * 1.0 / h;
float ratio_dst = dst_w * 1.0 / dst_h;
int tmp_w = 0;
int tmp_h = 0;
if (ratio_src > ratio_dst) {
tmp_w = dst_w;
tmp_h = floor((dst_w * 1.0 / w) * h);
} else if (ratio_src < ratio_dst) {
tmp_h = dst_h;
tmp_w = floor((dst_h * 1.0 / h) * w);
} else {
cv::resize(src, dst, dst_size);
effect_area.x = 0;
effect_area.y = 0;
effect_area.width = dst_w;
effect_area.height = dst_h;
return 0;
}
cv::Mat tmp;
cv::resize(src, tmp, cv::Size(tmp_w, tmp_h));
if (tmp_w != dst_w) {
int index_w = floor((dst_w - tmp_w) / 2.0);
for (int i = 0; i < dst_h; i++) {
memcpy(dst.data + i * dst_w * 3 + index_w * 3,
tmp.data + i * tmp_w * 3,
tmp_w * 3);
}
effect_area.x = index_w;
effect_area.y = 0;
effect_area.width = tmp_w;
effect_area.height = tmp_h;
} else if (tmp_h != dst_h) {
int index_h = floor((dst_h - tmp_h) / 2.0);
memcpy(dst.data + index_h * dst_w * 3, tmp.data, tmp_w * tmp_h * 3);
effect_area.x = 0;
effect_area.y = index_h;
effect_area.width = tmp_w;
effect_area.height = tmp_h;
} else {
printf("error\n");
}
return 0;
}
const int color_list[80][3] = {
{216, 82, 24}, {236, 176, 31}, {125, 46, 141}, {118, 171, 47},
{76, 189, 237}, {238, 19, 46}, {76, 76, 76}, {153, 153, 153},
{255, 0, 0}, {255, 127, 0}, {190, 190, 0}, {0, 255, 0},
{0, 0, 255}, {170, 0, 255}, {84, 84, 0}, {84, 170, 0},
{84, 255, 0}, {170, 84, 0}, {170, 170, 0}, {170, 255, 0},
{255, 84, 0}, {255, 170, 0}, {255, 255, 0}, {0, 84, 127},
{0, 170, 127}, {0, 255, 127}, {84, 0, 127}, {84, 84, 127},
{84, 170, 127}, {84, 255, 127}, {170, 0, 127}, {170, 84, 127},
{170, 170, 127}, {170, 255, 127}, {255, 0, 127}, {255, 84, 127},
{255, 170, 127}, {255, 255, 127}, {0, 84, 255}, {0, 170, 255},
{0, 255, 255}, {84, 0, 255}, {84, 84, 255}, {84, 170, 255},
{84, 255, 255}, {170, 0, 255}, {170, 84, 255}, {170, 170, 255},
{170, 255, 255}, {255, 0, 255}, {255, 84, 255}, {255, 170, 255},
{42, 0, 0}, {84, 0, 0}, {127, 0, 0}, {170, 0, 0},
{212, 0, 0}, {255, 0, 0}, {0, 42, 0}, {0, 84, 0},
{0, 127, 0}, {0, 170, 0}, {0, 212, 0}, {0, 255, 0},
{0, 0, 42}, {0, 0, 84}, {0, 0, 127}, {0, 0, 170},
{0, 0, 212}, {0, 0, 255}, {0, 0, 0}, {36, 36, 36},
{72, 72, 72}, {109, 109, 109}, {145, 145, 145}, {182, 182, 182},
{218, 218, 218}, {0, 113, 188}, {80, 182, 188}, {127, 127, 0},
};
void draw_bboxes(const cv::Mat& bgr,
const std::vector<BoxInfo>& bboxes,
object_rect effect_roi) {
static const char* class_names[] = {
"person", "bicycle", "car",
"motorcycle", "airplane", "bus",
"train", "truck", "boat",
"traffic light", "fire hydrant", "stop sign",
"parking meter", "bench", "bird",
"cat", "dog", "horse",
"sheep", "cow", "elephant",
"bear", "zebra", "giraffe",
"backpack", "umbrella", "handbag",
"tie", "suitcase", "frisbee",
"skis", "snowboard", "sports ball",
"kite", "baseball bat", "baseball glove",
"skateboard", "surfboard", "tennis racket",
"bottle", "wine glass", "cup",
"fork", "knife", "spoon",
"bowl", "banana", "apple",
"sandwich", "orange", "broccoli",
"carrot", "hot dog", "pizza",
"donut", "cake", "chair",
"couch", "potted plant", "bed",
"dining table", "toilet", "tv",
"laptop", "mouse", "remote",
"keyboard", "cell phone", "microwave",
"oven", "toaster", "sink",
"refrigerator", "book", "clock",
"vase", "scissors", "teddy bear",
"hair drier", "toothbrush"};
cv::Mat image = bgr.clone();
int src_w = image.cols;
int src_h = image.rows;
int dst_w = effect_roi.width;
int dst_h = effect_roi.height;
float width_ratio = (float)src_w / (float)dst_w;
float height_ratio = (float)src_h / (float)dst_h;
for (size_t i = 0; i < bboxes.size(); i++) {
const BoxInfo& bbox = bboxes[i];
cv::Scalar color = cv::Scalar(color_list[bbox.label][0],
color_list[bbox.label][1],
color_list[bbox.label][2]);
cv::rectangle(image,
cv::Rect(cv::Point((bbox.x1 - effect_roi.x) * width_ratio,
(bbox.y1 - effect_roi.y) * height_ratio),
cv::Point((bbox.x2 - effect_roi.x) * width_ratio,
(bbox.y2 - effect_roi.y) * height_ratio)),
color);
char text[256];
sprintf(text, "%s %.1f%%", class_names[bbox.label], bbox.score * 100);
int baseLine = 0;
cv::Size label_size =
cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.4, 1, &baseLine);
int x = (bbox.x1 - effect_roi.x) * width_ratio;
int y =
(bbox.y1 - effect_roi.y) * height_ratio - label_size.height - baseLine;
if (y < 0) y = 0;
if (x + label_size.width > image.cols) x = image.cols - label_size.width;
cv::rectangle(
image,
cv::Rect(cv::Point(x, y),
cv::Size(label_size.width, label_size.height + baseLine)),
color,
-1);
cv::putText(image,
text,
cv::Point(x, y + label_size.height),
cv::FONT_HERSHEY_SIMPLEX,
0.4,
cv::Scalar(255, 255, 255));
}
cv::imwrite("../predict.jpg", image);
}
std::vector<BoxInfo> coordsback(const cv::Mat image,
const object_rect effect_roi,
const std::vector<BoxInfo>& bboxes) {
int src_w = image.cols;
int src_h = image.rows;
int dst_w = effect_roi.width;
int dst_h = effect_roi.height;
float width_ratio = (float)src_w / (float)dst_w;
float height_ratio = (float)src_h / (float)dst_h;
std::vector<BoxInfo> bboxes_oimg;
for (int i = 0; i < bboxes.size(); i++) {
auto bbox = bboxes[i];
bbox.x1 = (bbox.x1 - effect_roi.x) * width_ratio;
bbox.y1 = (bbox.y1 - effect_roi.y) * height_ratio;
bbox.x2 = (bbox.x2 - effect_roi.x) * width_ratio;
bbox.y2 = (bbox.y2 - effect_roi.y) * height_ratio;
bboxes_oimg.emplace_back(bbox);
}
return bboxes_oimg;
}
void image_infer_kpts(KeyPointDetector* kpts_detector,
cv::Mat image,
const object_rect effect_roi,
const std::vector<BoxInfo>& results,
std::string img_name = "kpts_vis",
bool save_img = true) {
std::vector<cv::Mat> cropimgs;
std::vector<std::vector<float>> center_bs;
std::vector<std::vector<float>> scale_bs;
std::vector<KeyPointResult> kpts_results;
auto results_oimg = coordsback(image, effect_roi, results);
for (int i = 0; i < results_oimg.size(); i++) {
auto rect = results_oimg[i];
if (rect.label == 0) {
cv::Mat cropimg;
std::vector<float> center, scale;
std::vector<int> area = {static_cast<int>(rect.x1),
static_cast<int>(rect.y1),
static_cast<int>(rect.x2),
static_cast<int>(rect.y2)};
CropImg(image, cropimg, area, center, scale);
cropimgs.emplace_back(cropimg);
center_bs.emplace_back(center);
scale_bs.emplace_back(scale);
}
if (cropimgs.size() == 1 ||
(cropimgs.size() > 0 && i == results_oimg.size() - 1)) {
kpts_detector->Predict(cropimgs, center_bs, scale_bs, &kpts_results);
cropimgs.clear();
center_bs.clear();
scale_bs.clear();
}
}
std::vector<int> compression_params;
compression_params.push_back(cv::IMWRITE_JPEG_QUALITY);
compression_params.push_back(95);
std::string kpts_savepath =
"keypoint_" + img_name.substr(img_name.find_last_of('/') + 1);
cv::Mat kpts_vis_img =
VisualizeKptsResult(image, kpts_results, {0, 255, 0}, 0.1);
if (save_img) {
cv::imwrite(kpts_savepath, kpts_vis_img, compression_params);
printf("Visualized output saved as %s\n", kpts_savepath.c_str());
} else {
cv::imshow("image", kpts_vis_img);
}
}
int image_demo(PicoDet& detector,
KeyPointDetector* kpts_detector,
const char* imagepath) {
std::vector<std::string> filenames;
cv::glob(imagepath, filenames, false);
for (auto img_name : filenames) {
cv::Mat image = cv::imread(img_name);
if (image.empty()) {
return -1;
}
object_rect effect_roi;
cv::Mat resized_img;
resize_uniform(
image, resized_img, cv::Size(image_size, image_size), effect_roi);
auto results = detector.detect(resized_img, 0.4, 0.5);
if (kpts_detector) {
image_infer_kpts(kpts_detector, image, effect_roi, results, img_name);
}
}
return 0;
}
int webcam_demo(PicoDet& detector,
KeyPointDetector* kpts_detector,
int cam_id) {
cv::Mat image;
cv::VideoCapture cap(cam_id);
while (true) {
cap >> image;
object_rect effect_roi;
cv::Mat resized_img;
resize_uniform(
image, resized_img, cv::Size(image_size, image_size), effect_roi);
auto results = detector.detect(resized_img, 0.4, 0.5);
if (kpts_detector) {
image_infer_kpts(kpts_detector, image, effect_roi, results, "", false);
}
}
return 0;
}
int video_demo(PicoDet& detector,
KeyPointDetector* kpts_detector,
const char* path) {
cv::Mat image;
cv::VideoCapture cap(path);
while (true) {
cap >> image;
object_rect effect_roi;
cv::Mat resized_img;
resize_uniform(
image, resized_img, cv::Size(image_size, image_size), effect_roi);
auto results = detector.detect(resized_img, 0.4, 0.5);
if (kpts_detector) {
image_infer_kpts(kpts_detector, image, effect_roi, results, "", false);
}
}
return 0;
}
int benchmark(KeyPointDetector* kpts_detector) {
int loop_num = 100;
int warm_up = 8;
double time_min = DBL_MAX;
double time_max = -DBL_MAX;
double time_avg = 0;
cv::Mat image(256, 192, CV_8UC3, cv::Scalar(1, 1, 1));
std::vector<float> center = {128, 96};
std::vector<float> scale = {256, 192};
std::vector<cv::Mat> cropimgs = {image};
std::vector<std::vector<float>> center_bs = {center};
std::vector<std::vector<float>> scale_bs = {scale};
std::vector<KeyPointResult> kpts_results;
for (int i = 0; i < warm_up + loop_num; i++) {
auto start = std::chrono::steady_clock::now();
std::vector<BoxInfo> results;
kpts_detector->Predict(cropimgs, center_bs, scale_bs, &kpts_results);
auto end = std::chrono::steady_clock::now();
std::chrono::duration<double> elapsed = end - start;
double time = elapsed.count();
if (i >= warm_up) {
time_min = (std::min)(time_min, time);
time_max = (std::max)(time_max, time);
time_avg += time;
}
}
time_avg /= loop_num;
fprintf(stderr,
"%20s min = %7.4f max = %7.4f avg = %7.4f\n",
"tinypose",
time_min,
time_max,
time_avg);
return 0;
}
int main(int argc, char** argv) {
if (argc != 3) {
fprintf(stderr,
"usage: %s [mode] [path]. \n For webcam mode=0, path is cam id; \n "
"For image demo, mode=1, path=xxx/xxx/*.jpg; \n For video, mode=2; "
"\n For benchmark, mode=3 path=0.\n",
argv[0]);
return -1;
}
std::cout << "start init model" << std::endl;
auto detector = PicoDet("./weight/picodet_m_416.xml");
auto kpts_detector =
new KeyPointDetector("./weight/tinypose256_git2-sim.xml", 256, 192);
std::cout << "success" << std::endl;
int mode = atoi(argv[1]);
switch (mode) {
case 0: {
int cam_id = atoi(argv[2]);
webcam_demo(detector, kpts_detector, cam_id);
break;
}
case 1: {
const char* images = argv[2];
image_demo(detector, kpts_detector, images);
break;
}
case 2: {
const char* path = argv[2];
video_demo(detector, kpts_detector, path);
break;
}
case 3: {
benchmark(kpts_detector);
break;
}
default: {
fprintf(stderr,
"usage: %s [mode] [path]. \n For webcam mode=0, path is cam id; "
"\n For image demo, mode=1, path=xxx/xxx/*.jpg; \n For video, "
"mode=2; \n For benchmark, mode=3 path=0.\n",
argv[0]);
break;
}
}
delete kpts_detector;
kpts_detector = nullptr;
}
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// reference from https://github.com/RangiLyu/nanodet/tree/main/demo_openvino
#include "picodet_openvino.h"
inline float fast_exp(float x) {
union {
uint32_t i;
float f;
} v{};
v.i = (1 << 23) * (1.4426950409 * x + 126.93490512f);
return v.f;
}
inline float sigmoid(float x) { return 1.0f / (1.0f + fast_exp(-x)); }
template <typename _Tp>
int activation_function_softmax(const _Tp* src, _Tp* dst, int length) {
const _Tp alpha = *std::max_element(src, src + length);
_Tp denominator{0};
for (int i = 0; i < length; ++i) {
dst[i] = fast_exp(src[i] - alpha);
denominator += dst[i];
}
for (int i = 0; i < length; ++i) {
dst[i] /= denominator;
}
return 0;
}
PicoDet::PicoDet(const char* model_path) {
InferenceEngine::Core ie;
InferenceEngine::CNNNetwork model = ie.ReadNetwork(model_path);
// prepare input settings
InferenceEngine::InputsDataMap inputs_map(model.getInputsInfo());
input_name_ = inputs_map.begin()->first;
InferenceEngine::InputInfo::Ptr input_info = inputs_map.begin()->second;
// prepare output settings
InferenceEngine::OutputsDataMap outputs_map(model.getOutputsInfo());
for (auto& output_info : outputs_map) {
output_info.second->setPrecision(InferenceEngine::Precision::FP32);
}
// get network
network_ = ie.LoadNetwork(model, "CPU");
infer_request_ = network_.CreateInferRequest();
}
PicoDet::~PicoDet() {}
void PicoDet::preprocess(cv::Mat& image, InferenceEngine::Blob::Ptr& blob) {
int img_w = image.cols;
int img_h = image.rows;
int channels = 3;
InferenceEngine::MemoryBlob::Ptr mblob =
InferenceEngine::as<InferenceEngine::MemoryBlob>(blob);
if (!mblob) {
THROW_IE_EXCEPTION
<< "We expect blob to be inherited from MemoryBlob in matU8ToBlob, "
<< "but by fact we were not able to cast inputBlob to MemoryBlob";
}
auto mblobHolder = mblob->wmap();
float* blob_data = mblobHolder.as<float*>();
for (size_t c = 0; c < channels; c++) {
for (size_t h = 0; h < img_h; h++) {
for (size_t w = 0; w < img_w; w++) {
blob_data[c * img_w * img_h + h * img_w + w] =
(float)image.at<cv::Vec3b>(h, w)[c];
}
}
}
}
std::vector<BoxInfo> PicoDet::detect(cv::Mat image,
float score_threshold,
float nms_threshold) {
InferenceEngine::Blob::Ptr input_blob = infer_request_.GetBlob(input_name_);
preprocess(image, input_blob);
// do inference
infer_request_.Infer();
// get output
std::vector<std::vector<BoxInfo>> results;
results.resize(this->num_class_);
for (const auto& head_info : this->heads_info_) {
const InferenceEngine::Blob::Ptr dis_pred_blob =
infer_request_.GetBlob(head_info.dis_layer);
const InferenceEngine::Blob::Ptr cls_pred_blob =
infer_request_.GetBlob(head_info.cls_layer);
auto mdis_pred =
InferenceEngine::as<InferenceEngine::MemoryBlob>(dis_pred_blob);
auto mdis_pred_holder = mdis_pred->rmap();
const float* dis_pred = mdis_pred_holder.as<const float*>();
auto mcls_pred =
InferenceEngine::as<InferenceEngine::MemoryBlob>(cls_pred_blob);
auto mcls_pred_holder = mcls_pred->rmap();
const float* cls_pred = mcls_pred_holder.as<const float*>();
this->decode_infer(
cls_pred, dis_pred, head_info.stride, score_threshold, results);
}
std::vector<BoxInfo> dets;
for (int i = 0; i < (int)results.size(); i++) {
this->nms(results[i], nms_threshold);
for (auto& box : results[i]) {
dets.push_back(box);
}
}
return dets;
}
void PicoDet::decode_infer(const float*& cls_pred,
const float*& dis_pred,
int stride,
float threshold,
std::vector<std::vector<BoxInfo>>& results) {
int feature_h = input_size_ / stride;
int feature_w = input_size_ / stride;
for (int idx = 0; idx < feature_h * feature_w; idx++) {
int row = idx / feature_w;
int col = idx % feature_w;
float score = 0;
int cur_label = 0;
for (int label = 0; label < num_class_; label++) {
if (cls_pred[idx * num_class_ + label] > score) {
score = cls_pred[idx * num_class_ + label];
cur_label = label;
}
}
if (score > threshold) {
const float* bbox_pred = dis_pred + idx * (reg_max_ + 1) * 4;
results[cur_label].push_back(
this->disPred2Bbox(bbox_pred, cur_label, score, col, row, stride));
}
}
}
BoxInfo PicoDet::disPred2Bbox(
const float*& dfl_det, int label, float score, int x, int y, int stride) {
float ct_x = (x + 0.5) * stride;
float ct_y = (y + 0.5) * stride;
std::vector<float> dis_pred;
dis_pred.resize(4);
for (int i = 0; i < 4; i++) {
float dis = 0;
float* dis_after_sm = new float[reg_max_ + 1];
activation_function_softmax(
dfl_det + i * (reg_max_ + 1), dis_after_sm, reg_max_ + 1);
for (int j = 0; j < reg_max_ + 1; j++) {
dis += j * dis_after_sm[j];
}
dis *= stride;
dis_pred[i] = dis;
delete[] dis_after_sm;
}
float xmin = (std::max)(ct_x - dis_pred[0], .0f);
float ymin = (std::max)(ct_y - dis_pred[1], .0f);
float xmax = (std::min)(ct_x + dis_pred[2], (float)this->input_size_);
float ymax = (std::min)(ct_y + dis_pred[3], (float)this->input_size_);
return BoxInfo{xmin, ymin, xmax, ymax, score, label};
}
void PicoDet::nms(std::vector<BoxInfo>& input_boxes, float NMS_THRESH) {
std::sort(input_boxes.begin(), input_boxes.end(), [](BoxInfo a, BoxInfo b) {
return a.score > b.score;
});
std::vector<float> vArea(input_boxes.size());
for (int i = 0; i < int(input_boxes.size()); ++i) {
vArea[i] = (input_boxes.at(i).x2 - input_boxes.at(i).x1 + 1) *
(input_boxes.at(i).y2 - input_boxes.at(i).y1 + 1);
}
for (int i = 0; i < int(input_boxes.size()); ++i) {
for (int j = i + 1; j < int(input_boxes.size());) {
float xx1 = (std::max)(input_boxes[i].x1, input_boxes[j].x1);
float yy1 = (std::max)(input_boxes[i].y1, input_boxes[j].y1);
float xx2 = (std::min)(input_boxes[i].x2, input_boxes[j].x2);
float yy2 = (std::min)(input_boxes[i].y2, input_boxes[j].y2);
float w = (std::max)(float(0), xx2 - xx1 + 1);
float h = (std::max)(float(0), yy2 - yy1 + 1);
float inter = w * h;
float ovr = inter / (vArea[i] + vArea[j] - inter);
if (ovr >= NMS_THRESH) {
input_boxes.erase(input_boxes.begin() + j);
vArea.erase(vArea.begin() + j);
} else {
j++;
}
}
}
}
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// reference from https://github.com/RangiLyu/nanodet/tree/main/demo_openvino
#ifndef _PICODET_OPENVINO_H_
#define _PICODET_OPENVINO_H_
#include <inference_engine.hpp>
#include <opencv2/core.hpp>
#include <string>
#define image_size 416
typedef struct HeadInfo {
std::string cls_layer;
std::string dis_layer;
int stride;
} HeadInfo;
typedef struct BoxInfo {
float x1;
float y1;
float x2;
float y2;
float score;
int label;
} BoxInfo;
class PicoDet {
public:
PicoDet(const char *param);
~PicoDet();
InferenceEngine::ExecutableNetwork network_;
InferenceEngine::InferRequest infer_request_;
std::vector<HeadInfo> heads_info_{
// cls_pred|dis_pred|stride
{"transpose_0.tmp_0", "transpose_1.tmp_0", 8},
{"transpose_2.tmp_0", "transpose_3.tmp_0", 16},
{"transpose_4.tmp_0", "transpose_5.tmp_0", 32},
{"transpose_6.tmp_0", "transpose_7.tmp_0", 64},
};
std::vector<BoxInfo> detect(cv::Mat image, float score_threshold,
float nms_threshold);
private:
void preprocess(cv::Mat &image, InferenceEngine::Blob::Ptr &blob);
void decode_infer(const float *&cls_pred, const float *&dis_pred, int stride,
float threshold,
std::vector<std::vector<BoxInfo>> &results);
BoxInfo disPred2Bbox(const float *&dfl_det, int label, float score, int x,
int y, int stride);
static void nms(std::vector<BoxInfo> &result, float nms_threshold);
std::string input_name_;
int input_size_ = image_size;
int num_class_ = 80;
int reg_max_ = 7;
};
#endif
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import yaml
import argparse
import numpy as np
import glob
from onnxruntime import InferenceSession
from preprocess import Compose
# Global dictionary
SUPPORT_MODELS = {
'YOLO', 'PPYOLOE', 'YOLOX', 'YOLOv5', 'YOLOv6', 'YOLOv7', 'YOLOv8', 'RTMDet'
}
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("--infer_cfg", type=str, help="infer_cfg.yml")
parser.add_argument(
'--onnx_file', type=str, default="model.onnx", help="onnx model file path")
parser.add_argument("--image_dir", type=str)
parser.add_argument("--image_file", type=str)
def get_test_images(infer_dir, infer_img):
"""
Get image path list in TEST mode
"""
assert infer_img is not None or infer_dir is not None, \
"--image_file or --image_dir should be set"
assert infer_img is None or os.path.isfile(infer_img), \
"{} is not a file".format(infer_img)
assert infer_dir is None or os.path.isdir(infer_dir), \
"{} is not a directory".format(infer_dir)
# infer_img has a higher priority
if infer_img and os.path.isfile(infer_img):
return [infer_img]
images = set()
infer_dir = os.path.abspath(infer_dir)
assert os.path.isdir(infer_dir), \
"infer_dir {} is not a directory".format(infer_dir)
exts = ['jpg', 'jpeg', 'png', 'bmp']
exts += [ext.upper() for ext in exts]
for ext in exts:
images.update(glob.glob('{}/*.{}'.format(infer_dir, ext)))
images = list(images)
assert len(images) > 0, "no image found in {}".format(infer_dir)
print("Found {} inference images in total.".format(len(images)))
return images
class PredictConfig(object):
"""set config of preprocess, postprocess and visualize
Args:
infer_config (str): path of infer_cfg.yml
"""
def __init__(self, infer_config):
# parsing Yaml config for Preprocess
with open(infer_config) as f:
yml_conf = yaml.safe_load(f)
self.check_model(yml_conf)
self.arch = yml_conf['arch']
self.preprocess_infos = yml_conf['Preprocess']
self.min_subgraph_size = yml_conf['min_subgraph_size']
self.label_list = yml_conf['label_list']
self.use_dynamic_shape = yml_conf['use_dynamic_shape']
self.draw_threshold = yml_conf.get("draw_threshold", 0.5)
self.mask = yml_conf.get("mask", False)
self.tracker = yml_conf.get("tracker", None)
self.nms = yml_conf.get("NMS", None)
self.fpn_stride = yml_conf.get("fpn_stride", None)
if self.arch == 'RCNN' and yml_conf.get('export_onnx', False):
print(
'The RCNN export model is used for ONNX and it only supports batch_size = 1'
)
self.print_config()
def check_model(self, yml_conf):
"""
Raises:
ValueError: loaded model not in supported model type
"""
for support_model in SUPPORT_MODELS:
if support_model in yml_conf['arch']:
return True
raise ValueError("Unsupported arch: {}, expect {}".format(yml_conf[
'arch'], SUPPORT_MODELS))
def print_config(self):
print('----------- Model Configuration -----------')
print('%s: %s' % ('Model Arch', self.arch))
print('%s: ' % ('Transform Order'))
for op_info in self.preprocess_infos:
print('--%s: %s' % ('transform op', op_info['type']))
print('--------------------------------------------')
def predict_image(infer_config, predictor, img_list):
# load preprocess transforms
transforms = Compose(infer_config.preprocess_infos)
# predict image
for img_path in img_list:
inputs = transforms(img_path)
inputs_name = [var.name for var in predictor.get_inputs()]
inputs = {k: inputs[k][None, ] for k in inputs_name}
outputs = predictor.run(output_names=None, input_feed=inputs)
print("ONNXRuntime predict: ")
if infer_config.arch in ["HRNet"]:
print(np.array(outputs[0]))
else:
bboxes = np.array(outputs[0])
for bbox in bboxes:
if bbox[0] > -1 and bbox[1] > infer_config.draw_threshold:
print(f"{int(bbox[0])} {bbox[1]} "
f"{bbox[2]} {bbox[3]} {bbox[4]} {bbox[5]}")
if __name__ == '__main__':
FLAGS = parser.parse_args()
# load image list
img_list = get_test_images(FLAGS.image_dir, FLAGS.image_file)
# load predictor
predictor = InferenceSession(FLAGS.onnx_file)
# load infer config
infer_config = PredictConfig(FLAGS.infer_cfg)
predict_image(infer_config, predictor, img_list)
import numpy as np
import cv2
import copy
def decode_image(img_path):
with open(img_path, 'rb') as f:
im_read = f.read()
data = np.frombuffer(im_read, dtype='uint8')
im = cv2.imdecode(data, 1) # BGR mode, but need RGB mode
im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
img_info = {
"im_shape": np.array(
im.shape[:2], dtype=np.float32),
"scale_factor": np.array(
[1., 1.], dtype=np.float32)
}
return im, img_info
class Resize(object):
"""resize image by target_size and max_size
Args:
target_size (int): the target size of image
keep_ratio (bool): whether keep_ratio or not, default true
interp (int): method of resize
"""
def __init__(self, target_size, keep_ratio=True, interp=cv2.INTER_LINEAR):
if isinstance(target_size, int):
target_size = [target_size, target_size]
self.target_size = target_size
self.keep_ratio = keep_ratio
self.interp = interp
def __call__(self, im, im_info):
"""
Args:
im (np.ndarray): image (np.ndarray)
im_info (dict): info of image
Returns:
im (np.ndarray): processed image (np.ndarray)
im_info (dict): info of processed image
"""
assert len(self.target_size) == 2
assert self.target_size[0] > 0 and self.target_size[1] > 0
im_channel = im.shape[2]
im_scale_y, im_scale_x = self.generate_scale(im)
im = cv2.resize(
im,
None,
None,
fx=im_scale_x,
fy=im_scale_y,
interpolation=self.interp)
im_info['im_shape'] = np.array(im.shape[:2]).astype('float32')
im_info['scale_factor'] = np.array(
[im_scale_y, im_scale_x]).astype('float32')
return im, im_info
def generate_scale(self, im):
"""
Args:
im (np.ndarray): image (np.ndarray)
Returns:
im_scale_x: the resize ratio of X
im_scale_y: the resize ratio of Y
"""
origin_shape = im.shape[:2]
im_c = im.shape[2]
if self.keep_ratio:
im_size_min = np.min(origin_shape)
im_size_max = np.max(origin_shape)
target_size_min = np.min(self.target_size)
target_size_max = np.max(self.target_size)
im_scale = float(target_size_min) / float(im_size_min)
if np.round(im_scale * im_size_max) > target_size_max:
im_scale = float(target_size_max) / float(im_size_max)
im_scale_x = im_scale
im_scale_y = im_scale
else:
resize_h, resize_w = self.target_size
im_scale_y = resize_h / float(origin_shape[0])
im_scale_x = resize_w / float(origin_shape[1])
return im_scale_y, im_scale_x
class NormalizeImage(object):
"""normalize image
Args:
mean (list): im - mean
std (list): im / std
is_scale (bool): whether need im / 255
norm_type (str): type in ['mean_std', 'none']
"""
def __init__(self, mean, std, is_scale=True, norm_type='mean_std'):
self.mean = mean
self.std = std
self.is_scale = is_scale
self.norm_type = norm_type
def __call__(self, im, im_info):
"""
Args:
im (np.ndarray): image (np.ndarray)
im_info (dict): info of image
Returns:
im (np.ndarray): processed image (np.ndarray)
im_info (dict): info of processed image
"""
im = im.astype(np.float32, copy=False)
if self.is_scale:
scale = 1.0 / 255.0
im *= scale
if self.norm_type == 'mean_std':
mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
std = np.array(self.std)[np.newaxis, np.newaxis, :]
im -= mean
im /= std
return im, im_info
class Permute(object):
"""permute image
Args:
to_bgr (bool): whether convert RGB to BGR
channel_first (bool): whether convert HWC to CHW
"""
def __init__(self, ):
super(Permute, self).__init__()
def __call__(self, im, im_info):
"""
Args:
im (np.ndarray): image (np.ndarray)
im_info (dict): info of image
Returns:
im (np.ndarray): processed image (np.ndarray)
im_info (dict): info of processed image
"""
im = im.transpose((2, 0, 1)).copy()
return im, im_info
class PadStride(object):
""" padding image for model with FPN, instead PadBatch(pad_to_stride) in original config
Args:
stride (bool): model with FPN need image shape % stride == 0
"""
def __init__(self, stride=0):
self.coarsest_stride = stride
def __call__(self, im, im_info):
"""
Args:
im (np.ndarray): image (np.ndarray)
im_info (dict): info of image
Returns:
im (np.ndarray): processed image (np.ndarray)
im_info (dict): info of processed image
"""
coarsest_stride = self.coarsest_stride
if coarsest_stride <= 0:
return im, im_info
im_c, im_h, im_w = im.shape
pad_h = int(np.ceil(float(im_h) / coarsest_stride) * coarsest_stride)
pad_w = int(np.ceil(float(im_w) / coarsest_stride) * coarsest_stride)
padding_im = np.zeros((im_c, pad_h, pad_w), dtype=np.float32)
padding_im[:, :im_h, :im_w] = im
return padding_im, im_info
class LetterBoxResize(object):
def __init__(self, target_size):
"""
Resize image to target size, convert normalized xywh to pixel xyxy
format ([x_center, y_center, width, height] -> [x0, y0, x1, y1]).
Args:
target_size (int|list): image target size.
"""
super(LetterBoxResize, self).__init__()
if isinstance(target_size, int):
target_size = [target_size, target_size]
self.target_size = target_size
def letterbox(self, img, height, width, color=(127.5, 127.5, 127.5)):
# letterbox: resize a rectangular image to a padded rectangular
shape = img.shape[:2] # [height, width]
ratio_h = float(height) / shape[0]
ratio_w = float(width) / shape[1]
ratio = min(ratio_h, ratio_w)
new_shape = (round(shape[1] * ratio),
round(shape[0] * ratio)) # [width, height]
padw = (width - new_shape[0]) / 2
padh = (height - new_shape[1]) / 2
top, bottom = round(padh - 0.1), round(padh + 0.1)
left, right = round(padw - 0.1), round(padw + 0.1)
img = cv2.resize(
img, new_shape, interpolation=cv2.INTER_AREA) # resized, no border
img = cv2.copyMakeBorder(
img, top, bottom, left, right, cv2.BORDER_CONSTANT,
value=color) # padded rectangular
return img, ratio, padw, padh
def __call__(self, im, im_info):
"""
Args:
im (np.ndarray): image (np.ndarray)
im_info (dict): info of image
Returns:
im (np.ndarray): processed image (np.ndarray)
im_info (dict): info of processed image
"""
assert len(self.target_size) == 2
assert self.target_size[0] > 0 and self.target_size[1] > 0
height, width = self.target_size
h, w = im.shape[:2]
im, ratio, padw, padh = self.letterbox(im, height=height, width=width)
new_shape = [round(h * ratio), round(w * ratio)]
im_info['im_shape'] = np.array(new_shape, dtype=np.float32)
im_info['scale_factor'] = np.array([ratio, ratio], dtype=np.float32)
return im, im_info
class Pad(object):
def __init__(self, size, fill_value=[114.0, 114.0, 114.0]):
"""
Pad image to a specified size.
Args:
size (list[int]): image target size
fill_value (list[float]): rgb value of pad area, default (114.0, 114.0, 114.0)
"""
super(Pad, self).__init__()
if isinstance(size, int):
size = [size, size]
self.size = size
self.fill_value = fill_value
def __call__(self, im, im_info):
im_h, im_w = im.shape[:2]
h, w = self.size
if h == im_h and w == im_w:
im = im.astype(np.float32)
return im, im_info
canvas = np.ones((h, w, 3), dtype=np.float32)
canvas *= np.array(self.fill_value, dtype=np.float32)
canvas[0:im_h, 0:im_w, :] = im.astype(np.float32)
im = canvas
return im, im_info
def rotate_point(pt, angle_rad):
"""Rotate a point by an angle.
Args:
pt (list[float]): 2 dimensional point to be rotated
angle_rad (float): rotation angle by radian
Returns:
list[float]: Rotated point.
"""
assert len(pt) == 2
sn, cs = np.sin(angle_rad), np.cos(angle_rad)
new_x = pt[0] * cs - pt[1] * sn
new_y = pt[0] * sn + pt[1] * cs
rotated_pt = [new_x, new_y]
return rotated_pt
def _get_3rd_point(a, b):
"""To calculate the affine matrix, three pairs of points are required. This
function is used to get the 3rd point, given 2D points a & b.
The 3rd point is defined by rotating vector `a - b` by 90 degrees
anticlockwise, using b as the rotation center.
Args:
a (np.ndarray): point(x,y)
b (np.ndarray): point(x,y)
Returns:
np.ndarray: The 3rd point.
"""
assert len(a) == 2
assert len(b) == 2
direction = a - b
third_pt = b + np.array([-direction[1], direction[0]], dtype=np.float32)
return third_pt
def get_affine_transform(center,
input_size,
rot,
output_size,
shift=(0., 0.),
inv=False):
"""Get the affine transform matrix, given the center/scale/rot/output_size.
Args:
center (np.ndarray[2, ]): Center of the bounding box (x, y).
scale (np.ndarray[2, ]): Scale of the bounding box
wrt [width, height].
rot (float): Rotation angle (degree).
output_size (np.ndarray[2, ]): Size of the destination heatmaps.
shift (0-100%): Shift translation ratio wrt the width/height.
Default (0., 0.).
inv (bool): Option to inverse the affine transform direction.
(inv=False: src->dst or inv=True: dst->src)
Returns:
np.ndarray: The transform matrix.
"""
assert len(center) == 2
assert len(output_size) == 2
assert len(shift) == 2
if not isinstance(input_size, (np.ndarray, list)):
input_size = np.array([input_size, input_size], dtype=np.float32)
scale_tmp = input_size
shift = np.array(shift)
src_w = scale_tmp[0]
dst_w = output_size[0]
dst_h = output_size[1]
rot_rad = np.pi * rot / 180
src_dir = rotate_point([0., src_w * -0.5], rot_rad)
dst_dir = np.array([0., dst_w * -0.5])
src = np.zeros((3, 2), dtype=np.float32)
src[0, :] = center + scale_tmp * shift
src[1, :] = center + src_dir + scale_tmp * shift
src[2, :] = _get_3rd_point(src[0, :], src[1, :])
dst = np.zeros((3, 2), dtype=np.float32)
dst[0, :] = [dst_w * 0.5, dst_h * 0.5]
dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5]) + dst_dir
dst[2, :] = _get_3rd_point(dst[0, :], dst[1, :])
if inv:
trans = cv2.getAffineTransform(np.float32(dst), np.float32(src))
else:
trans = cv2.getAffineTransform(np.float32(src), np.float32(dst))
return trans
class WarpAffine(object):
"""Warp affine the image
"""
def __init__(self,
keep_res=False,
pad=31,
input_h=512,
input_w=512,
scale=0.4,
shift=0.1):
self.keep_res = keep_res
self.pad = pad
self.input_h = input_h
self.input_w = input_w
self.scale = scale
self.shift = shift
def __call__(self, im, im_info):
"""
Args:
im (np.ndarray): image (np.ndarray)
im_info (dict): info of image
Returns:
im (np.ndarray): processed image (np.ndarray)
im_info (dict): info of processed image
"""
img = cv2.cvtColor(im, cv2.COLOR_RGB2BGR)
h, w = img.shape[:2]
if self.keep_res:
input_h = (h | self.pad) + 1
input_w = (w | self.pad) + 1
s = np.array([input_w, input_h], dtype=np.float32)
c = np.array([w // 2, h // 2], dtype=np.float32)
else:
s = max(h, w) * 1.0
input_h, input_w = self.input_h, self.input_w
c = np.array([w / 2., h / 2.], dtype=np.float32)
trans_input = get_affine_transform(c, s, 0, [input_w, input_h])
img = cv2.resize(img, (w, h))
inp = cv2.warpAffine(
img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR)
return inp, im_info
# keypoint preprocess
def get_warp_matrix(theta, size_input, size_dst, size_target):
"""This code is based on
https://github.com/open-mmlab/mmpose/blob/master/mmpose/core/post_processing/post_transforms.py
Calculate the transformation matrix under the constraint of unbiased.
Paper ref: Huang et al. The Devil is in the Details: Delving into Unbiased
Data Processing for Human Pose Estimation (CVPR 2020).
Args:
theta (float): Rotation angle in degrees.
size_input (np.ndarray): Size of input image [w, h].
size_dst (np.ndarray): Size of output image [w, h].
size_target (np.ndarray): Size of ROI in input plane [w, h].
Returns:
matrix (np.ndarray): A matrix for transformation.
"""
theta = np.deg2rad(theta)
matrix = np.zeros((2, 3), dtype=np.float32)
scale_x = size_dst[0] / size_target[0]
scale_y = size_dst[1] / size_target[1]
matrix[0, 0] = np.cos(theta) * scale_x
matrix[0, 1] = -np.sin(theta) * scale_x
matrix[0, 2] = scale_x * (
-0.5 * size_input[0] * np.cos(theta) + 0.5 * size_input[1] *
np.sin(theta) + 0.5 * size_target[0])
matrix[1, 0] = np.sin(theta) * scale_y
matrix[1, 1] = np.cos(theta) * scale_y
matrix[1, 2] = scale_y * (
-0.5 * size_input[0] * np.sin(theta) - 0.5 * size_input[1] *
np.cos(theta) + 0.5 * size_target[1])
return matrix
class TopDownEvalAffine(object):
"""apply affine transform to image and coords
Args:
trainsize (list): [w, h], the standard size used to train
use_udp (bool): whether to use Unbiased Data Processing.
records(dict): the dict contained the image and coords
Returns:
records (dict): contain the image and coords after tranformed
"""
def __init__(self, trainsize, use_udp=False):
self.trainsize = trainsize
self.use_udp = use_udp
def __call__(self, image, im_info):
rot = 0
imshape = im_info['im_shape'][::-1]
center = im_info['center'] if 'center' in im_info else imshape / 2.
scale = im_info['scale'] if 'scale' in im_info else imshape
if self.use_udp:
trans = get_warp_matrix(
rot, center * 2.0,
[self.trainsize[0] - 1.0, self.trainsize[1] - 1.0], scale)
image = cv2.warpAffine(
image,
trans, (int(self.trainsize[0]), int(self.trainsize[1])),
flags=cv2.INTER_LINEAR)
else:
trans = get_affine_transform(center, scale, rot, self.trainsize)
image = cv2.warpAffine(
image,
trans, (int(self.trainsize[0]), int(self.trainsize[1])),
flags=cv2.INTER_LINEAR)
return image, im_info
class Compose:
def __init__(self, transforms):
self.transforms = []
for op_info in transforms:
new_op_info = op_info.copy()
op_type = new_op_info.pop('type')
self.transforms.append(eval(op_type)(**new_op_info))
def __call__(self, img_path):
img, im_info = decode_image(img_path)
for t in self.transforms:
img, im_info = t(img, im_info)
inputs = copy.deepcopy(im_info)
inputs['image'] = img
return inputs
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment