Commit fccfdfa5 authored by dlyrm's avatar dlyrm
Browse files

update code

parent dcc7bf4f
Pipeline #681 canceled with stages
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <ctime>
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include "paddle_inference_api.h" // NOLINT
#include "include/config_parser.h"
#include "include/preprocess_op.h"
#include "include/tracker.h"
using namespace paddle_infer;
namespace PaddleDetection {
// JDE Detection Result
struct MOT_Rect {
float left;
float top;
float right;
float bottom;
};
struct MOT_Track {
int ids;
float score;
MOT_Rect rects;
};
typedef std::vector<MOT_Track> MOT_Result;
// Generate visualization color
cv::Scalar GetColor(int idx);
// Visualiztion Detection Result
cv::Mat VisualizeTrackResult(const cv::Mat& img,
const MOT_Result& results,
const float fps,
const int frame_id);
class JDEDetector {
public:
explicit JDEDetector(const std::string& model_dir,
const std::string& device = "CPU",
bool use_mkldnn = false,
int cpu_threads = 1,
const std::string& run_mode = "paddle",
const int batch_size = 1,
const int gpu_id = 0,
const int trt_min_shape = 1,
const int trt_max_shape = 1280,
const int trt_opt_shape = 640,
bool trt_calib_mode = false,
const int min_box_area = 200) {
this->device_ = device;
this->gpu_id_ = gpu_id;
this->cpu_math_library_num_threads_ = cpu_threads;
this->use_mkldnn_ = use_mkldnn;
this->trt_min_shape_ = trt_min_shape;
this->trt_max_shape_ = trt_max_shape;
this->trt_opt_shape_ = trt_opt_shape;
this->trt_calib_mode_ = trt_calib_mode;
config_.load_config(model_dir);
this->use_dynamic_shape_ = config_.use_dynamic_shape_;
this->min_subgraph_size_ = config_.min_subgraph_size_;
threshold_ = config_.draw_threshold_;
preprocessor_.Init(config_.preprocess_info_);
LoadModel(model_dir, batch_size, run_mode);
this->min_box_area_ = min_box_area;
this->conf_thresh_ = config_.conf_thresh_;
}
// Load Paddle inference model
void LoadModel(const std::string& model_dir,
const int batch_size = 1,
const std::string& run_mode = "paddle");
// Run predictor
void Predict(const std::vector<cv::Mat> imgs,
const double threshold = 0.5,
const int warmup = 0,
const int repeats = 1,
MOT_Result* result = nullptr,
std::vector<double>* times = nullptr);
private:
std::string device_ = "CPU";
int gpu_id_ = 0;
int cpu_math_library_num_threads_ = 1;
bool use_mkldnn_ = false;
int min_subgraph_size_ = 3;
bool use_dynamic_shape_ = false;
int trt_min_shape_ = 1;
int trt_max_shape_ = 1280;
int trt_opt_shape_ = 640;
bool trt_calib_mode_ = false;
// Preprocess image and copy data to input buffer
void Preprocess(const cv::Mat& image_mat);
// Postprocess result
void Postprocess(const cv::Mat dets, const cv::Mat emb, MOT_Result* result);
std::shared_ptr<Predictor> predictor_;
Preprocessor preprocessor_;
ImageBlob inputs_;
std::vector<float> bbox_data_;
std::vector<float> emb_data_;
float threshold_;
ConfigPaser config_;
float min_box_area_;
float conf_thresh_;
};
} // namespace PaddleDetection
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <ctime>
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include "paddle_inference_api.h" // NOLINT
#include "include/config_parser.h"
#include "include/keypoint_postprocess.h"
#include "include/preprocess_op.h"
using namespace paddle_infer;
namespace PaddleDetection {
// Visualiztion KeyPoint Result
cv::Mat VisualizeKptsResult(const cv::Mat& img,
const std::vector<KeyPointResult>& results,
const std::vector<int>& colormap);
class KeyPointDetector {
public:
explicit KeyPointDetector(const std::string& model_dir,
const std::string& device = "CPU",
bool use_mkldnn = false,
int cpu_threads = 1,
const std::string& run_mode = "paddle",
const int batch_size = 1,
const int gpu_id = 0,
const int trt_min_shape = 1,
const int trt_max_shape = 1280,
const int trt_opt_shape = 640,
bool trt_calib_mode = false,
bool use_dark = true) {
this->device_ = device;
this->gpu_id_ = gpu_id;
this->cpu_math_library_num_threads_ = cpu_threads;
this->use_mkldnn_ = use_mkldnn;
this->use_dark = use_dark;
this->trt_min_shape_ = trt_min_shape;
this->trt_max_shape_ = trt_max_shape;
this->trt_opt_shape_ = trt_opt_shape;
this->trt_calib_mode_ = trt_calib_mode;
config_.load_config(model_dir);
this->use_dynamic_shape_ = config_.use_dynamic_shape_;
this->min_subgraph_size_ = config_.min_subgraph_size_;
threshold_ = config_.draw_threshold_;
preprocessor_.Init(config_.preprocess_info_);
LoadModel(model_dir, batch_size, run_mode);
}
// Load Paddle inference model
void LoadModel(const std::string& model_dir,
const int batch_size = 1,
const std::string& run_mode = "paddle");
// Run predictor
void Predict(const std::vector<cv::Mat> imgs,
std::vector<std::vector<float>>& center,
std::vector<std::vector<float>>& scale,
const double threshold = 0.5,
const int warmup = 0,
const int repeats = 1,
std::vector<KeyPointResult>* result = nullptr,
std::vector<double>* times = nullptr);
// Get Model Label list
const std::vector<std::string>& GetLabelList() const {
return config_.label_list_;
}
private:
std::string device_ = "CPU";
int gpu_id_ = 0;
int cpu_math_library_num_threads_ = 1;
bool use_dark = true;
bool use_mkldnn_ = false;
int min_subgraph_size_ = 3;
bool use_dynamic_shape_ = false;
int trt_min_shape_ = 1;
int trt_max_shape_ = 1280;
int trt_opt_shape_ = 640;
bool trt_calib_mode_ = false;
// Preprocess image and copy data to input buffer
void Preprocess(const cv::Mat& image_mat);
// Postprocess result
void Postprocess(std::vector<float>& output,
std::vector<int> output_shape,
std::vector<int64_t>& idxout,
std::vector<int> idx_shape,
std::vector<KeyPointResult>* result,
std::vector<std::vector<float>>& center,
std::vector<std::vector<float>>& scale);
std::shared_ptr<Predictor> predictor_;
Preprocessor preprocessor_;
ImageBlob inputs_;
std::vector<float> output_data_;
std::vector<int64_t> idx_data_;
float threshold_;
ConfigPaser config_;
};
} // namespace PaddleDetection
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <math.h>
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <vector>
namespace PaddleDetection {
std::vector<float> get_3rd_point(std::vector<float>& a, std::vector<float>& b);
std::vector<float> get_dir(float src_point_x, float src_point_y, float rot_rad);
void affine_tranform(
float pt_x, float pt_y, cv::Mat& trans, std::vector<float>& preds, int p);
cv::Mat get_affine_transform(std::vector<float>& center,
std::vector<float>& scale,
float rot,
std::vector<int>& output_size,
int inv);
void transform_preds(std::vector<float>& coords,
std::vector<float>& center,
std::vector<float>& scale,
std::vector<int>& output_size,
std::vector<int>& dim,
std::vector<float>& target_coords,
bool affine = false);
void box_to_center_scale(std::vector<int>& box,
int width,
int height,
std::vector<float>& center,
std::vector<float>& scale);
void get_max_preds(float* heatmap,
std::vector<int>& dim,
std::vector<float>& preds,
float* maxvals,
int batchid,
int joint_idx);
void get_final_preds(std::vector<float>& heatmap,
std::vector<int>& dim,
std::vector<int64_t>& idxout,
std::vector<int>& idxdim,
std::vector<float>& center,
std::vector<float> scale,
std::vector<float>& preds,
int batchid,
bool DARK = true);
// Object KeyPoint Result
struct KeyPointResult {
// Keypoints: shape(N x 3); N: number of Joints; 3: x,y,conf
std::vector<float> keypoints;
int num_joints = -1;
};
class PoseSmooth {
public:
explicit PoseSmooth(const int width,
const int height,
std::string filter_type = "OneEuro",
float alpha = 0.5,
float fc_d = 0.1,
float fc_min = 0.1,
float beta = 0.1,
float thres_mult = 0.3)
: width(width),
height(height),
alpha(alpha),
fc_d(fc_d),
fc_min(fc_min),
beta(beta),
filter_type(filter_type),
thres_mult(thres_mult){};
// Run predictor
KeyPointResult smooth_process(KeyPointResult* result);
void PointSmooth(KeyPointResult* result,
KeyPointResult* keypoint_smoothed,
std::vector<float> thresholds,
int index);
float OneEuroFilter(float x_cur, float x_pre, int loc);
float smoothing_factor(float te, float fc);
float ExpSmoothing(float x_cur, float x_pre, int loc = 0);
private:
int width = 0;
int height = 0;
float alpha = 0.;
float fc_d = 1.;
float fc_min = 0.;
float beta = 1.;
float thres_mult = 1.;
std::string filter_type = "OneEuro";
std::vector<float> thresholds = {0.005,
0.005,
0.005,
0.005,
0.005,
0.01,
0.01,
0.01,
0.01,
0.01,
0.01,
0.01,
0.01,
0.01,
0.01,
0.01,
0.01};
KeyPointResult x_prev_hat;
KeyPointResult dx_prev_hat;
};
} // namespace PaddleDetection
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// The code is based on:
// https://github.com/gatagat/lap/blob/master/lap/lapjv.h
// Ths copyright of gatagat/lap is as follows:
// MIT License
#ifndef LAPJV_H
#define LAPJV_H
#define LARGE 1000000
#if !defined TRUE
#define TRUE 1
#endif
#if !defined FALSE
#define FALSE 0
#endif
#define NEW(x, t, n) if ((x = (t *)malloc(sizeof(t) * (n))) == 0) {return -1;}
#define FREE(x) if (x != 0) { free(x); x = 0; }
#define SWAP_INDICES(a, b) { int_t _temp_index = a; a = b; b = _temp_index; }
#include <opencv2/opencv.hpp>
namespace PaddleDetection {
typedef signed int int_t;
typedef unsigned int uint_t;
typedef double cost_t;
typedef char boolean;
typedef enum fp_t { FP_1 = 1, FP_2 = 2, FP_DYNAMIC = 3 } fp_t;
int lapjv_internal(
const cv::Mat &cost, const bool extend_cost, const float cost_limit,
int *x, int *y);
} // namespace PaddleDetection
#endif // LAPJV_H
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <ctime>
#include <memory>
#include <numeric>
#include <string>
#include <utility>
#include <vector>
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include "paddle_inference_api.h" // NOLINT
#include "include/config_parser.h"
#include "include/picodet_postprocess.h"
#include "include/preprocess_op.h"
#include "include/utils.h"
using namespace paddle_infer;
namespace PaddleDetection {
// Generate visualization colormap for each class
std::vector<int> GenerateColorMap(int num_class);
// Visualiztion Detection Result
cv::Mat
VisualizeResult(const cv::Mat &img,
const std::vector<PaddleDetection::ObjectResult> &results,
const std::vector<std::string> &lables,
const std::vector<int> &colormap, const bool is_rbox);
class ObjectDetector {
public:
explicit ObjectDetector(const std::string &model_dir,
const std::string &device = "CPU",
bool use_mkldnn = false, int cpu_threads = 1,
const std::string &run_mode = "paddle",
const int batch_size = 1, const int gpu_id = 0,
const int trt_min_shape = 1,
const int trt_max_shape = 1280,
const int trt_opt_shape = 640,
bool trt_calib_mode = false) {
this->device_ = device;
this->gpu_id_ = gpu_id;
this->cpu_math_library_num_threads_ = cpu_threads;
this->use_mkldnn_ = use_mkldnn;
this->trt_min_shape_ = trt_min_shape;
this->trt_max_shape_ = trt_max_shape;
this->trt_opt_shape_ = trt_opt_shape;
this->trt_calib_mode_ = trt_calib_mode;
config_.load_config(model_dir);
this->use_dynamic_shape_ = config_.use_dynamic_shape_;
this->min_subgraph_size_ = config_.min_subgraph_size_;
threshold_ = config_.draw_threshold_;
preprocessor_.Init(config_.preprocess_info_);
LoadModel(model_dir, batch_size, run_mode);
}
// Load Paddle inference model
void LoadModel(const std::string &model_dir, const int batch_size = 1,
const std::string &run_mode = "paddle");
// Run predictor
void Predict(const std::vector<cv::Mat> imgs, const double threshold = 0.5,
const int warmup = 0, const int repeats = 1,
std::vector<PaddleDetection::ObjectResult> *result = nullptr,
std::vector<int> *bbox_num = nullptr,
std::vector<double> *times = nullptr);
// Get Model Label list
const std::vector<std::string> &GetLabelList() const {
return config_.label_list_;
}
private:
std::string device_ = "CPU";
int gpu_id_ = 0;
int cpu_math_library_num_threads_ = 1;
bool use_mkldnn_ = false;
int min_subgraph_size_ = 3;
bool use_dynamic_shape_ = false;
int trt_min_shape_ = 1;
int trt_max_shape_ = 1280;
int trt_opt_shape_ = 640;
bool trt_calib_mode_ = false;
// Preprocess image and copy data to input buffer
void Preprocess(const cv::Mat &image_mat);
// Postprocess result
void Postprocess(const std::vector<cv::Mat> mats,
std::vector<PaddleDetection::ObjectResult> *result,
std::vector<int> bbox_num, std::vector<float> output_data_,
std::vector<int> output_mask_data_, bool is_rbox);
void SOLOv2Postprocess(
const std::vector<cv::Mat> mats, std::vector<ObjectResult> *result,
std::vector<int> *bbox_num, std::vector<int> out_bbox_num_data_,
std::vector<int64_t> out_label_data_, std::vector<float> out_score_data_,
std::vector<uint8_t> out_global_mask_data_, float threshold = 0.5);
std::shared_ptr<Predictor> predictor_;
Preprocessor preprocessor_;
ImageBlob inputs_;
float threshold_;
ConfigPaser config_;
};
} // namespace PaddleDetection
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <cmath>
#include <ctime>
#include <memory>
#include <numeric>
#include <string>
#include <utility>
#include <vector>
#include "include/utils.h"
namespace PaddleDetection {
void PicoDetPostProcess(std::vector<PaddleDetection::ObjectResult> *results,
std::vector<const float *> outs,
std::vector<int> fpn_stride,
std::vector<float> im_shape,
std::vector<float> scale_factor,
float score_threshold = 0.3, float nms_threshold = 0.5,
int num_class = 80, int reg_max = 7);
} // namespace PaddleDetection
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <glog/logging.h>
#include <yaml-cpp/yaml.h>
#include <iostream>
#include <memory>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
namespace PaddleDetection {
// Object for storing all preprocessed data
class ImageBlob {
public:
// image width and height
std::vector<float> im_shape_;
// Buffer for image data after preprocessing
std::vector<float> im_data_;
// in net data shape(after pad)
std::vector<float> in_net_shape_;
// Evaluation image width and height
// std::vector<float> eval_im_size_f_;
// Scale factor for image size to origin image size
std::vector<float> scale_factor_;
// in net image after preprocessing
cv::Mat in_net_im_;
};
// Abstraction of preprocessing opration class
class PreprocessOp {
public:
virtual void Init(const YAML::Node& item) = 0;
virtual void Run(cv::Mat* im, ImageBlob* data) = 0;
};
class InitInfo : public PreprocessOp {
public:
virtual void Init(const YAML::Node& item) {}
virtual void Run(cv::Mat* im, ImageBlob* data);
};
class NormalizeImage : public PreprocessOp {
public:
virtual void Init(const YAML::Node& item) {
mean_ = item["mean"].as<std::vector<float>>();
scale_ = item["std"].as<std::vector<float>>();
if (item["is_scale"]) is_scale_ = item["is_scale"].as<bool>();
if (item["norm_type"]) norm_type_ = item["norm_type"].as<std::string>();
}
virtual void Run(cv::Mat* im, ImageBlob* data);
private:
// CHW or HWC
std::vector<float> mean_;
std::vector<float> scale_;
bool is_scale_ = true;
std::string norm_type_ = "mean_std";
};
class Permute : public PreprocessOp {
public:
virtual void Init(const YAML::Node& item) {}
virtual void Run(cv::Mat* im, ImageBlob* data);
};
class Resize : public PreprocessOp {
public:
virtual void Init(const YAML::Node& item) {
interp_ = item["interp"].as<int>();
keep_ratio_ = item["keep_ratio"].as<bool>();
target_size_ = item["target_size"].as<std::vector<int>>();
}
// Compute best resize scale for x-dimension, y-dimension
std::pair<float, float> GenerateScale(const cv::Mat& im);
virtual void Run(cv::Mat* im, ImageBlob* data);
private:
int interp_;
bool keep_ratio_;
std::vector<int> target_size_;
std::vector<int> in_net_shape_;
};
class LetterBoxResize : public PreprocessOp {
public:
virtual void Init(const YAML::Node& item) {
target_size_ = item["target_size"].as<std::vector<int>>();
}
float GenerateScale(const cv::Mat& im);
virtual void Run(cv::Mat* im, ImageBlob* data);
private:
std::vector<int> target_size_;
std::vector<int> in_net_shape_;
};
// Models with FPN need input shape % stride == 0
class PadStride : public PreprocessOp {
public:
virtual void Init(const YAML::Node& item) {
stride_ = item["stride"].as<int>();
}
virtual void Run(cv::Mat* im, ImageBlob* data);
private:
int stride_;
};
class TopDownEvalAffine : public PreprocessOp {
public:
virtual void Init(const YAML::Node& item) {
trainsize_ = item["trainsize"].as<std::vector<int>>();
}
virtual void Run(cv::Mat* im, ImageBlob* data);
private:
int interp_ = 1;
std::vector<int> trainsize_;
};
class WarpAffine : public PreprocessOp {
public:
virtual void Init(const YAML::Node& item) {
input_h_ = item["input_h"].as<int>();
input_w_ = item["input_w"].as<int>();
keep_res_ = item["keep_res"].as<bool>();
}
virtual void Run(cv::Mat* im, ImageBlob* data);
private:
int input_h_;
int input_w_;
int interp_ = 1;
bool keep_res_ = true;
int pad_ = 31;
};
class Pad : public PreprocessOp {
public:
virtual void Init(const YAML::Node& item) {
size_ = item["size"].as<std::vector<int>>();
fill_value_ = item["fill_value"].as<std::vector<float>>();
}
virtual void Run(cv::Mat* im, ImageBlob* data);
private:
std::vector<int> size_;
std::vector<float> fill_value_;
};
void CropImg(cv::Mat& img,
cv::Mat& crop_img,
std::vector<int>& area,
std::vector<float>& center,
std::vector<float>& scale,
float expandratio = 0.15);
// check whether the input size is dynamic
bool CheckDynamicInput(const std::vector<cv::Mat>& imgs);
// Pad images in batch
std::vector<cv::Mat> PadBatch(const std::vector<cv::Mat>& imgs);
class Preprocessor {
public:
void Init(const YAML::Node& config_node) {
// initialize image info at first
ops_["InitInfo"] = std::make_shared<InitInfo>();
for (const auto& item : config_node) {
auto op_name = item["type"].as<std::string>();
ops_[op_name] = CreateOp(op_name);
ops_[op_name]->Init(item);
}
}
std::shared_ptr<PreprocessOp> CreateOp(const std::string& name) {
if (name == "Resize") {
return std::make_shared<Resize>();
} else if (name == "LetterBoxResize") {
return std::make_shared<LetterBoxResize>();
} else if (name == "Permute") {
return std::make_shared<Permute>();
} else if (name == "NormalizeImage") {
return std::make_shared<NormalizeImage>();
} else if (name == "PadStride") {
// use PadStride instead of PadBatch
return std::make_shared<PadStride>();
} else if (name == "TopDownEvalAffine") {
return std::make_shared<TopDownEvalAffine>();
} else if (name == "WarpAffine") {
return std::make_shared<WarpAffine>();
}else if (name == "Pad") {
return std::make_shared<Pad>();
}
std::cerr << "can not find function of OP: " << name
<< " and return: nullptr" << std::endl;
return nullptr;
}
void Run(cv::Mat* im, ImageBlob* data);
public:
static const std::vector<std::string> RUN_ORDER;
private:
std::unordered_map<std::string, std::shared_ptr<PreprocessOp>> ops_;
};
} // namespace PaddleDetection
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// The code is based on:
// https://github.com/CnybTseng/JDE/blob/master/platforms/common/jdetracker.h
// Ths copyright of CnybTseng/JDE is as follows:
// MIT License
#pragma once
#include <map>
#include <vector>
#include <opencv2/opencv.hpp>
#include "trajectory.h"
namespace PaddleDetection {
typedef std::map<int, int> Match;
typedef std::map<int, int>::iterator MatchIterator;
struct Track
{
int id;
float score;
cv::Vec4f ltrb;
};
class JDETracker
{
public:
static JDETracker *instance(void);
virtual bool update(const cv::Mat &dets, const cv::Mat &emb, std::vector<Track> &tracks);
private:
JDETracker(void);
virtual ~JDETracker(void) {}
cv::Mat motion_distance(const TrajectoryPtrPool &a, const TrajectoryPool &b);
void linear_assignment(const cv::Mat &cost, float cost_limit, Match &matches,
std::vector<int> &mismatch_row, std::vector<int> &mismatch_col);
void remove_duplicate_trajectory(TrajectoryPool &a, TrajectoryPool &b, float iou_thresh=0.15f);
private:
static JDETracker *me;
int timestamp;
TrajectoryPool tracked_trajectories;
TrajectoryPool lost_trajectories;
TrajectoryPool removed_trajectories;
int max_lost_time;
float lambda;
float det_thresh;
};
} // namespace PaddleDetection
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// The code is based on:
// https://github.com/CnybTseng/JDE/blob/master/platforms/common/trajectory.h
// Ths copyright of CnybTseng/JDE is as follows:
// MIT License
#pragma once
#include <vector>
#include <opencv2/opencv.hpp>
namespace PaddleDetection {
typedef enum
{
New = 0,
Tracked = 1,
Lost = 2,
Removed = 3
} TrajectoryState;
class Trajectory;
typedef std::vector<Trajectory> TrajectoryPool;
typedef std::vector<Trajectory>::iterator TrajectoryPoolIterator;
typedef std::vector<Trajectory *>TrajectoryPtrPool;
typedef std::vector<Trajectory *>::iterator TrajectoryPtrPoolIterator;
class TKalmanFilter : public cv::KalmanFilter
{
public:
TKalmanFilter(void);
virtual ~TKalmanFilter(void) {}
virtual void init(const cv::Mat &measurement);
virtual const cv::Mat &predict();
virtual const cv::Mat &correct(const cv::Mat &measurement);
virtual void project(cv::Mat &mean, cv::Mat &covariance) const;
private:
float std_weight_position;
float std_weight_velocity;
};
inline TKalmanFilter::TKalmanFilter(void) : cv::KalmanFilter(8, 4)
{
cv::KalmanFilter::transitionMatrix = cv::Mat::eye(8, 8, CV_32F);
for (int i = 0; i < 4; ++i)
cv::KalmanFilter::transitionMatrix.at<float>(i, i + 4) = 1;
cv::KalmanFilter::measurementMatrix = cv::Mat::eye(4, 8, CV_32F);
std_weight_position = 1/20.f;
std_weight_velocity = 1/160.f;
}
class Trajectory : public TKalmanFilter
{
public:
Trajectory();
Trajectory(cv::Vec4f &ltrb, float score, const cv::Mat &embedding);
Trajectory(const Trajectory &other);
Trajectory &operator=(const Trajectory &rhs);
virtual ~Trajectory(void) {};
static int next_id();
virtual const cv::Mat &predict(void);
virtual void update(Trajectory &traj, int timestamp, bool update_embedding=true);
virtual void activate(int timestamp);
virtual void reactivate(Trajectory &traj, int timestamp, bool newid=false);
virtual void mark_lost(void);
virtual void mark_removed(void);
friend TrajectoryPool operator+(const TrajectoryPool &a, const TrajectoryPool &b);
friend TrajectoryPool operator+(const TrajectoryPool &a, const TrajectoryPtrPool &b);
friend TrajectoryPool &operator+=(TrajectoryPool &a, const TrajectoryPtrPool &b);
friend TrajectoryPool operator-(const TrajectoryPool &a, const TrajectoryPool &b);
friend TrajectoryPool &operator-=(TrajectoryPool &a, const TrajectoryPool &b);
friend TrajectoryPtrPool operator+(const TrajectoryPtrPool &a, const TrajectoryPtrPool &b);
friend TrajectoryPtrPool operator+(const TrajectoryPtrPool &a, TrajectoryPool &b);
friend TrajectoryPtrPool operator-(const TrajectoryPtrPool &a, const TrajectoryPtrPool &b);
friend cv::Mat embedding_distance(const TrajectoryPool &a, const TrajectoryPool &b);
friend cv::Mat embedding_distance(const TrajectoryPtrPool &a, const TrajectoryPtrPool &b);
friend cv::Mat embedding_distance(const TrajectoryPtrPool &a, const TrajectoryPool &b);
friend cv::Mat mahalanobis_distance(const TrajectoryPool &a, const TrajectoryPool &b);
friend cv::Mat mahalanobis_distance(const TrajectoryPtrPool &a, const TrajectoryPtrPool &b);
friend cv::Mat mahalanobis_distance(const TrajectoryPtrPool &a, const TrajectoryPool &b);
friend cv::Mat iou_distance(const TrajectoryPool &a, const TrajectoryPool &b);
friend cv::Mat iou_distance(const TrajectoryPtrPool &a, const TrajectoryPtrPool &b);
friend cv::Mat iou_distance(const TrajectoryPtrPool &a, const TrajectoryPool &b);
private:
void update_embedding(const cv::Mat &embedding);
public:
TrajectoryState state;
cv::Vec4f ltrb;
cv::Mat smooth_embedding;
int id;
bool is_activated;
int timestamp;
int starttime;
float score;
private:
static int count;
cv::Vec4f xyah;
cv::Mat current_embedding;
float eta;
int length;
};
inline cv::Vec4f ltrb2xyah(cv::Vec4f &ltrb)
{
cv::Vec4f xyah;
xyah[0] = (ltrb[0] + ltrb[2]) * 0.5f;
xyah[1] = (ltrb[1] + ltrb[3]) * 0.5f;
xyah[3] = ltrb[3] - ltrb[1];
xyah[2] = (ltrb[2] - ltrb[0]) / xyah[3];
return xyah;
}
inline Trajectory::Trajectory() :
state(New), ltrb(cv::Vec4f()), smooth_embedding(cv::Mat()), id(0),
is_activated(false), timestamp(0), starttime(0), score(0), eta(0.9), length(0)
{
}
inline Trajectory::Trajectory(cv::Vec4f &ltrb_, float score_, const cv::Mat &embedding) :
state(New), ltrb(ltrb_), smooth_embedding(cv::Mat()), id(0),
is_activated(false), timestamp(0), starttime(0), score(score_), eta(0.9), length(0)
{
xyah = ltrb2xyah(ltrb);
update_embedding(embedding);
}
inline Trajectory::Trajectory(const Trajectory &other):
state(other.state), ltrb(other.ltrb), id(other.id), is_activated(other.is_activated),
timestamp(other.timestamp), starttime(other.starttime), xyah(other.xyah),
score(other.score), eta(other.eta), length(other.length)
{
other.smooth_embedding.copyTo(smooth_embedding);
other.current_embedding.copyTo(current_embedding);
// copy state in KalmanFilter
other.statePre.copyTo(cv::KalmanFilter::statePre);
other.statePost.copyTo(cv::KalmanFilter::statePost);
other.errorCovPre.copyTo(cv::KalmanFilter::errorCovPre);
other.errorCovPost.copyTo(cv::KalmanFilter::errorCovPost);
}
inline Trajectory &Trajectory::operator=(const Trajectory &rhs)
{
this->state = rhs.state;
this->ltrb = rhs.ltrb;
rhs.smooth_embedding.copyTo(this->smooth_embedding);
this->id = rhs.id;
this->is_activated = rhs.is_activated;
this->timestamp = rhs.timestamp;
this->starttime = rhs.starttime;
this->xyah = rhs.xyah;
this->score = rhs.score;
rhs.current_embedding.copyTo(this->current_embedding);
this->eta = rhs.eta;
this->length = rhs.length;
// copy state in KalmanFilter
rhs.statePre.copyTo(cv::KalmanFilter::statePre);
rhs.statePost.copyTo(cv::KalmanFilter::statePost);
rhs.errorCovPre.copyTo(cv::KalmanFilter::errorCovPre);
rhs.errorCovPost.copyTo(cv::KalmanFilter::errorCovPost);
return *this;
}
inline int Trajectory::next_id()
{
++count;
return count;
}
inline void Trajectory::mark_lost(void)
{
state = Lost;
}
inline void Trajectory::mark_removed(void)
{
state = Removed;
}
} // namespace PaddleDetection
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <algorithm>
#include <ctime>
#include <memory>
#include <numeric>
#include <string>
#include <utility>
#include <vector>
namespace PaddleDetection {
// Object Detection Result
struct ObjectResult {
// Rectangle coordinates of detected object: left, right, top, down
std::vector<int> rect;
// Class id of detected object
int class_id;
// Confidence of detected object
float confidence;
// Mask of detected object
std::vector<int> mask;
};
void nms(std::vector<ObjectResult> &input_boxes, float nms_threshold);
} // namespace PaddleDetection
# 是否使用GPU(即是否使用 CUDA)
WITH_GPU=OFF
# 是否使用MKL or openblas,TX2需要设置为OFF
WITH_MKL=ON
# 是否集成 TensorRT(仅WITH_GPU=ON 有效)
WITH_TENSORRT=OFF
# paddle 预测库lib名称,由于不同平台不同版本预测库lib名称不同,请查看所下载的预测库中`paddle_inference/lib/`文件夹下`lib`的名称
PADDLE_LIB_NAME=libpaddle_inference
# TensorRT 的include路径
TENSORRT_INC_DIR=/path/to/tensorrt/include
# TensorRT 的lib路径
TENSORRT_LIB_DIR=/path/to/tensorrt/lib
# Paddle 预测库路径
PADDLE_DIR=/path/to/paddle_inference
# CUDA 的 lib 路径
CUDA_LIB=/path/to/cuda/lib
# CUDNN 的 lib 路径
CUDNN_LIB=/path/to/cudnn/lib
# 是否开启关键点模型预测功能
WITH_KEYPOINT=OFF
# 是否开启跟踪模型预测功能
WITH_MOT=OFF
MACHINE_TYPE=`uname -m`
echo "MACHINE_TYPE: "${MACHINE_TYPE}
if [ "$MACHINE_TYPE" = "x86_64" ]
then
echo "set OPENCV_DIR for x86_64"
# linux系统通过以下命令下载预编译的opencv
mkdir -p $(pwd)/deps && cd $(pwd)/deps
wget -c https://paddledet.bj.bcebos.com/data/opencv-3.4.16_gcc8.2_ffmpeg.tar.gz
tar -xvf opencv-3.4.16_gcc8.2_ffmpeg.tar.gz && cd ..
# set OPENCV_DIR
OPENCV_DIR=$(pwd)/deps/opencv-3.4.16_gcc8.2_ffmpeg
elif [ "$MACHINE_TYPE" = "aarch64" ]
then
echo "set OPENCV_DIR for aarch64"
# TX2平台通过以下命令下载预编译的opencv
mkdir -p $(pwd)/deps && cd $(pwd)/deps
wget -c https://bj.bcebos.com/v1/paddledet/data/TX2_JetPack4.3_opencv_3.4.6_gcc7.5.0.tar.gz
tar -xvf TX2_JetPack4.3_opencv_3.4.6_gcc7.5.0.tar.gz && cd ..
# set OPENCV_DIR
OPENCV_DIR=$(pwd)/deps/TX2_JetPack4.3_opencv_3.4.6_gcc7.5.0/
else
echo "Please set OPENCV_DIR manually"
fi
echo "OPENCV_DIR: "$OPENCV_DIR
# 以下无需改动
rm -rf build
mkdir -p build
cd build
cmake .. \
-DWITH_GPU=${WITH_GPU} \
-DWITH_MKL=${WITH_MKL} \
-DWITH_TENSORRT=${WITH_TENSORRT} \
-DTENSORRT_LIB_DIR=${TENSORRT_LIB_DIR} \
-DTENSORRT_INC_DIR=${TENSORRT_INC_DIR} \
-DPADDLE_DIR=${PADDLE_DIR} \
-DWITH_STATIC_LIB=${WITH_STATIC_LIB} \
-DCUDA_LIB=${CUDA_LIB} \
-DCUDNN_LIB=${CUDNN_LIB} \
-DOPENCV_DIR=${OPENCV_DIR} \
-DPADDLE_LIB_NAME=${PADDLE_LIB_NAME} \
-DWITH_KEYPOINT=${WITH_KEYPOINT} \
-DWITH_MOT=${WITH_MOT}
make
echo "make finished!"
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <sstream>
// for setprecision
#include <chrono>
#include <iomanip>
#include "include/jde_detector.h"
using namespace paddle_infer;
namespace PaddleDetection {
// Load Model and create model predictor
void JDEDetector::LoadModel(const std::string& model_dir,
const int batch_size,
const std::string& run_mode) {
paddle_infer::Config config;
std::string prog_file = model_dir + OS_PATH_SEP + "model.pdmodel";
std::string params_file = model_dir + OS_PATH_SEP + "model.pdiparams";
config.SetModel(prog_file, params_file);
if (this->device_ == "GPU") {
config.EnableUseGpu(200, this->gpu_id_);
config.SwitchIrOptim(true);
// use tensorrt
if (run_mode != "paddle") {
auto precision = paddle_infer::Config::Precision::kFloat32;
if (run_mode == "trt_fp32") {
precision = paddle_infer::Config::Precision::kFloat32;
} else if (run_mode == "trt_fp16") {
precision = paddle_infer::Config::Precision::kHalf;
} else if (run_mode == "trt_int8") {
precision = paddle_infer::Config::Precision::kInt8;
} else {
printf(
"run_mode should be 'paddle', 'trt_fp32', 'trt_fp16' or "
"'trt_int8'");
}
// set tensorrt
config.EnableTensorRtEngine(1 << 30,
batch_size,
this->min_subgraph_size_,
precision,
false,
this->trt_calib_mode_);
// set use dynamic shape
if (this->use_dynamic_shape_) {
// set DynamicShsape for image tensor
const std::vector<int> min_input_shape = {
1, 3, this->trt_min_shape_, this->trt_min_shape_};
const std::vector<int> max_input_shape = {
1, 3, this->trt_max_shape_, this->trt_max_shape_};
const std::vector<int> opt_input_shape = {
1, 3, this->trt_opt_shape_, this->trt_opt_shape_};
const std::map<std::string, std::vector<int>> map_min_input_shape = {
{"image", min_input_shape}};
const std::map<std::string, std::vector<int>> map_max_input_shape = {
{"image", max_input_shape}};
const std::map<std::string, std::vector<int>> map_opt_input_shape = {
{"image", opt_input_shape}};
config.SetTRTDynamicShapeInfo(
map_min_input_shape, map_max_input_shape, map_opt_input_shape);
std::cout << "TensorRT dynamic shape enabled" << std::endl;
}
}
} else if (this->device_ == "XPU") {
config.EnableXpu(10 * 1024 * 1024);
} else {
config.DisableGpu();
if (this->use_mkldnn_) {
config.EnableMKLDNN();
// cache 10 different shapes for mkldnn to avoid memory leak
config.SetMkldnnCacheCapacity(10);
}
config.SetCpuMathLibraryNumThreads(this->cpu_math_library_num_threads_);
}
config.SwitchUseFeedFetchOps(false);
config.SwitchIrOptim(true);
config.DisableGlogInfo();
// Memory optimization
config.EnableMemoryOptim();
predictor_ = std::move(CreatePredictor(config));
}
// Visualiztion results
cv::Mat VisualizeTrackResult(const cv::Mat& img,
const MOT_Result& results,
const float fps,
const int frame_id) {
cv::Mat vis_img = img.clone();
int im_h = img.rows;
int im_w = img.cols;
float text_scale = std::max(1, int(im_w / 1600.));
float text_thickness = 2.;
float line_thickness = std::max(1, int(im_w / 500.));
std::ostringstream oss;
oss << std::setiosflags(std::ios::fixed) << std::setprecision(4);
oss << "frame: " << frame_id << " ";
oss << "fps: " << fps << " ";
oss << "num: " << results.size();
std::string text = oss.str();
cv::Point origin;
origin.x = 0;
origin.y = int(15 * text_scale);
cv::putText(vis_img,
text,
origin,
cv::FONT_HERSHEY_PLAIN,
text_scale,
(0, 0, 255),
2);
for (int i = 0; i < results.size(); ++i) {
const int obj_id = results[i].ids;
const float score = results[i].score;
cv::Scalar color = GetColor(obj_id);
cv::Point pt1 = cv::Point(results[i].rects.left, results[i].rects.top);
cv::Point pt2 = cv::Point(results[i].rects.right, results[i].rects.bottom);
cv::Point id_pt =
cv::Point(results[i].rects.left, results[i].rects.top + 10);
cv::Point score_pt =
cv::Point(results[i].rects.left, results[i].rects.top - 10);
cv::rectangle(vis_img, pt1, pt2, color, line_thickness);
std::ostringstream idoss;
idoss << std::setiosflags(std::ios::fixed) << std::setprecision(4);
idoss << obj_id;
std::string id_text = idoss.str();
cv::putText(vis_img,
id_text,
id_pt,
cv::FONT_HERSHEY_PLAIN,
text_scale,
cv::Scalar(0, 255, 255),
text_thickness);
std::ostringstream soss;
soss << std::setiosflags(std::ios::fixed) << std::setprecision(2);
soss << score;
std::string score_text = soss.str();
cv::putText(vis_img,
score_text,
score_pt,
cv::FONT_HERSHEY_PLAIN,
text_scale,
cv::Scalar(0, 255, 255),
text_thickness);
}
return vis_img;
}
void FilterDets(const float conf_thresh,
const cv::Mat dets,
std::vector<int>* index) {
for (int i = 0; i < dets.rows; ++i) {
float score = *dets.ptr<float>(i, 4);
if (score > conf_thresh) {
index->push_back(i);
}
}
}
void JDEDetector::Preprocess(const cv::Mat& ori_im) {
// Clone the image : keep the original mat for postprocess
cv::Mat im = ori_im.clone();
preprocessor_.Run(&im, &inputs_);
}
void JDEDetector::Postprocess(const cv::Mat dets,
const cv::Mat emb,
MOT_Result* result) {
result->clear();
std::vector<Track> tracks;
std::vector<int> valid;
FilterDets(conf_thresh_, dets, &valid);
cv::Mat new_dets, new_emb;
for (int i = 0; i < valid.size(); ++i) {
new_dets.push_back(dets.row(valid[i]));
new_emb.push_back(emb.row(valid[i]));
}
JDETracker::instance()->update(new_dets, new_emb, tracks);
if (tracks.size() == 0) {
MOT_Track mot_track;
MOT_Rect ret = {*dets.ptr<float>(0, 0),
*dets.ptr<float>(0, 1),
*dets.ptr<float>(0, 2),
*dets.ptr<float>(0, 3)};
mot_track.ids = 1;
mot_track.score = *dets.ptr<float>(0, 4);
mot_track.rects = ret;
result->push_back(mot_track);
} else {
std::vector<Track>::iterator titer;
for (titer = tracks.begin(); titer != tracks.end(); ++titer) {
if (titer->score < threshold_) {
continue;
} else {
float w = titer->ltrb[2] - titer->ltrb[0];
float h = titer->ltrb[3] - titer->ltrb[1];
bool vertical = w / h > 1.6;
float area = w * h;
if (area > min_box_area_ && !vertical) {
MOT_Track mot_track;
MOT_Rect ret = {
titer->ltrb[0], titer->ltrb[1], titer->ltrb[2], titer->ltrb[3]};
mot_track.rects = ret;
mot_track.score = titer->score;
mot_track.ids = titer->id;
result->push_back(mot_track);
}
}
}
}
}
void JDEDetector::Predict(const std::vector<cv::Mat> imgs,
const double threshold,
const int warmup,
const int repeats,
MOT_Result* result,
std::vector<double>* times) {
auto preprocess_start = std::chrono::steady_clock::now();
int batch_size = imgs.size();
// in_data_batch
std::vector<float> in_data_all;
std::vector<float> im_shape_all(batch_size * 2);
std::vector<float> scale_factor_all(batch_size * 2);
// Preprocess image
for (int bs_idx = 0; bs_idx < batch_size; bs_idx++) {
cv::Mat im = imgs.at(bs_idx);
Preprocess(im);
im_shape_all[bs_idx * 2] = inputs_.im_shape_[0];
im_shape_all[bs_idx * 2 + 1] = inputs_.im_shape_[1];
scale_factor_all[bs_idx * 2] = inputs_.scale_factor_[0];
scale_factor_all[bs_idx * 2 + 1] = inputs_.scale_factor_[1];
// TODO: reduce cost time
in_data_all.insert(
in_data_all.end(), inputs_.im_data_.begin(), inputs_.im_data_.end());
}
// Prepare input tensor
auto input_names = predictor_->GetInputNames();
for (const auto& tensor_name : input_names) {
auto in_tensor = predictor_->GetInputHandle(tensor_name);
if (tensor_name == "image") {
int rh = inputs_.in_net_shape_[0];
int rw = inputs_.in_net_shape_[1];
in_tensor->Reshape({batch_size, 3, rh, rw});
in_tensor->CopyFromCpu(in_data_all.data());
} else if (tensor_name == "im_shape") {
in_tensor->Reshape({batch_size, 2});
in_tensor->CopyFromCpu(im_shape_all.data());
} else if (tensor_name == "scale_factor") {
in_tensor->Reshape({batch_size, 2});
in_tensor->CopyFromCpu(scale_factor_all.data());
}
}
auto preprocess_end = std::chrono::steady_clock::now();
std::vector<int> bbox_shape;
std::vector<int> emb_shape;
// Run predictor
// warmup
for (int i = 0; i < warmup; i++) {
predictor_->Run();
// Get output tensor
auto output_names = predictor_->GetOutputNames();
auto bbox_tensor = predictor_->GetOutputHandle(output_names[0]);
bbox_shape = bbox_tensor->shape();
auto emb_tensor = predictor_->GetOutputHandle(output_names[1]);
emb_shape = emb_tensor->shape();
// Calculate bbox length
int bbox_size = 1;
for (int j = 0; j < bbox_shape.size(); ++j) {
bbox_size *= bbox_shape[j];
}
// Calculate emb length
int emb_size = 1;
for (int j = 0; j < emb_shape.size(); ++j) {
emb_size *= emb_shape[j];
}
bbox_data_.resize(bbox_size);
bbox_tensor->CopyToCpu(bbox_data_.data());
emb_data_.resize(emb_size);
emb_tensor->CopyToCpu(emb_data_.data());
}
auto inference_start = std::chrono::steady_clock::now();
for (int i = 0; i < repeats; i++) {
predictor_->Run();
// Get output tensor
auto output_names = predictor_->GetOutputNames();
auto bbox_tensor = predictor_->GetOutputHandle(output_names[0]);
bbox_shape = bbox_tensor->shape();
auto emb_tensor = predictor_->GetOutputHandle(output_names[1]);
emb_shape = emb_tensor->shape();
// Calculate bbox length
int bbox_size = 1;
for (int j = 0; j < bbox_shape.size(); ++j) {
bbox_size *= bbox_shape[j];
}
// Calculate emb length
int emb_size = 1;
for (int j = 0; j < emb_shape.size(); ++j) {
emb_size *= emb_shape[j];
}
bbox_data_.resize(bbox_size);
bbox_tensor->CopyToCpu(bbox_data_.data());
emb_data_.resize(emb_size);
emb_tensor->CopyToCpu(emb_data_.data());
}
auto inference_end = std::chrono::steady_clock::now();
auto postprocess_start = std::chrono::steady_clock::now();
// Postprocessing result
result->clear();
cv::Mat dets(bbox_shape[0], 6, CV_32FC1, bbox_data_.data());
cv::Mat emb(bbox_shape[0], emb_shape[1], CV_32FC1, emb_data_.data());
Postprocess(dets, emb, result);
auto postprocess_end = std::chrono::steady_clock::now();
std::chrono::duration<float> preprocess_diff =
preprocess_end - preprocess_start;
(*times)[0] += double(preprocess_diff.count() * 1000);
std::chrono::duration<float> inference_diff = inference_end - inference_start;
(*times)[1] += double(inference_diff.count() * 1000);
std::chrono::duration<float> postprocess_diff =
postprocess_end - postprocess_start;
(*times)[2] += double(postprocess_diff.count() * 1000);
}
cv::Scalar GetColor(int idx) {
idx = idx * 3;
cv::Scalar color =
cv::Scalar((37 * idx) % 255, (17 * idx) % 255, (29 * idx) % 255);
return color;
}
} // namespace PaddleDetection
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <sstream>
// for setprecision
#include <chrono>
#include <iomanip>
#include "include/keypoint_detector.h"
using namespace paddle_infer;
namespace PaddleDetection {
// Load Model and create model predictor
void KeyPointDetector::LoadModel(const std::string& model_dir,
const int batch_size,
const std::string& run_mode) {
paddle_infer::Config config;
std::string prog_file = model_dir + OS_PATH_SEP + "model.pdmodel";
std::string params_file = model_dir + OS_PATH_SEP + "model.pdiparams";
config.SetModel(prog_file, params_file);
if (this->device_ == "GPU") {
config.EnableUseGpu(200, this->gpu_id_);
config.SwitchIrOptim(true);
// use tensorrt
if (run_mode != "paddle") {
auto precision = paddle_infer::Config::Precision::kFloat32;
if (run_mode == "trt_fp32") {
precision = paddle_infer::Config::Precision::kFloat32;
} else if (run_mode == "trt_fp16") {
precision = paddle_infer::Config::Precision::kHalf;
} else if (run_mode == "trt_int8") {
precision = paddle_infer::Config::Precision::kInt8;
} else {
printf(
"run_mode should be 'paddle', 'trt_fp32', 'trt_fp16' or "
"'trt_int8'");
}
// set tensorrt
config.EnableTensorRtEngine(1 << 30,
batch_size,
this->min_subgraph_size_,
precision,
false,
this->trt_calib_mode_);
// set use dynamic shape
if (this->use_dynamic_shape_) {
// set DynamicShsape for image tensor
const std::vector<int> min_input_shape = {
1, 3, this->trt_min_shape_, this->trt_min_shape_};
const std::vector<int> max_input_shape = {
1, 3, this->trt_max_shape_, this->trt_max_shape_};
const std::vector<int> opt_input_shape = {
1, 3, this->trt_opt_shape_, this->trt_opt_shape_};
const std::map<std::string, std::vector<int>> map_min_input_shape = {
{"image", min_input_shape}};
const std::map<std::string, std::vector<int>> map_max_input_shape = {
{"image", max_input_shape}};
const std::map<std::string, std::vector<int>> map_opt_input_shape = {
{"image", opt_input_shape}};
config.SetTRTDynamicShapeInfo(
map_min_input_shape, map_max_input_shape, map_opt_input_shape);
std::cout << "TensorRT dynamic shape enabled" << std::endl;
}
}
} else if (this->device_ == "XPU") {
config.EnableXpu(10 * 1024 * 1024);
} else {
config.DisableGpu();
if (this->use_mkldnn_) {
config.EnableMKLDNN();
// cache 10 different shapes for mkldnn to avoid memory leak
config.SetMkldnnCacheCapacity(10);
}
config.SetCpuMathLibraryNumThreads(this->cpu_math_library_num_threads_);
}
config.SwitchUseFeedFetchOps(false);
config.SwitchIrOptim(true);
config.DisableGlogInfo();
// Memory optimization
config.EnableMemoryOptim();
predictor_ = std::move(CreatePredictor(config));
}
// Visualization MaskDetector results
cv::Mat VisualizeKptsResult(const cv::Mat& img,
const std::vector<KeyPointResult>& results,
const std::vector<int>& colormap) {
const int edge[][2] = {{0, 1},
{0, 2},
{1, 3},
{2, 4},
{3, 5},
{4, 6},
{5, 7},
{6, 8},
{7, 9},
{8, 10},
{5, 11},
{6, 12},
{11, 13},
{12, 14},
{13, 15},
{14, 16},
{11, 12}};
cv::Mat vis_img = img.clone();
for (int batchid = 0; batchid < results.size(); batchid++) {
for (int i = 0; i < results[batchid].num_joints; i++) {
if (results[batchid].keypoints[i * 3] > 0.5) {
int x_coord = int(results[batchid].keypoints[i * 3 + 1]);
int y_coord = int(results[batchid].keypoints[i * 3 + 2]);
cv::circle(vis_img,
cv::Point2d(x_coord, y_coord),
1,
cv::Scalar(0, 0, 255),
2);
}
}
for (int i = 0; i < results[batchid].num_joints; i++) {
int x_start = int(results[batchid].keypoints[edge[i][0] * 3 + 1]);
int y_start = int(results[batchid].keypoints[edge[i][0] * 3 + 2]);
int x_end = int(results[batchid].keypoints[edge[i][1] * 3 + 1]);
int y_end = int(results[batchid].keypoints[edge[i][1] * 3 + 2]);
cv::line(vis_img,
cv::Point2d(x_start, y_start),
cv::Point2d(x_end, y_end),
colormap[i],
1);
}
}
return vis_img;
}
void KeyPointDetector::Preprocess(const cv::Mat& ori_im) {
// Clone the image : keep the original mat for postprocess
cv::Mat im = ori_im.clone();
cv::cvtColor(im, im, cv::COLOR_BGR2RGB);
preprocessor_.Run(&im, &inputs_);
}
void KeyPointDetector::Postprocess(std::vector<float>& output,
std::vector<int> output_shape,
std::vector<int64_t>& idxout,
std::vector<int> idx_shape,
std::vector<KeyPointResult>* result,
std::vector<std::vector<float>>& center_bs,
std::vector<std::vector<float>>& scale_bs) {
std::vector<float> preds(output_shape[1] * 3, 0);
for (int batchid = 0; batchid < output_shape[0]; batchid++) {
get_final_preds(output,
output_shape,
idxout,
idx_shape,
center_bs[batchid],
scale_bs[batchid],
preds,
batchid,
this->use_dark);
KeyPointResult result_item;
result_item.num_joints = output_shape[1];
result_item.keypoints.clear();
for (int i = 0; i < output_shape[1]; i++) {
result_item.keypoints.emplace_back(preds[i * 3]);
result_item.keypoints.emplace_back(preds[i * 3 + 1]);
result_item.keypoints.emplace_back(preds[i * 3 + 2]);
}
result->push_back(result_item);
}
}
void KeyPointDetector::Predict(const std::vector<cv::Mat> imgs,
std::vector<std::vector<float>>& center_bs,
std::vector<std::vector<float>>& scale_bs,
const double threshold,
const int warmup,
const int repeats,
std::vector<KeyPointResult>* result,
std::vector<double>* times) {
auto preprocess_start = std::chrono::steady_clock::now();
int batch_size = imgs.size();
// in_data_batch
std::vector<float> in_data_all;
std::vector<float> im_shape_all(batch_size * 2);
std::vector<float> scale_factor_all(batch_size * 2);
// Preprocess image
for (int bs_idx = 0; bs_idx < batch_size; bs_idx++) {
cv::Mat im = imgs.at(bs_idx);
Preprocess(im);
im_shape_all[bs_idx * 2] = inputs_.im_shape_[0];
im_shape_all[bs_idx * 2 + 1] = inputs_.im_shape_[1];
scale_factor_all[bs_idx * 2] = inputs_.scale_factor_[0];
scale_factor_all[bs_idx * 2 + 1] = inputs_.scale_factor_[1];
// TODO: reduce cost time
in_data_all.insert(
in_data_all.end(), inputs_.im_data_.begin(), inputs_.im_data_.end());
}
// Prepare input tensor
auto input_names = predictor_->GetInputNames();
for (const auto& tensor_name : input_names) {
auto in_tensor = predictor_->GetInputHandle(tensor_name);
if (tensor_name == "image") {
int rh = inputs_.in_net_shape_[0];
int rw = inputs_.in_net_shape_[1];
in_tensor->Reshape({batch_size, 3, rh, rw});
in_tensor->CopyFromCpu(in_data_all.data());
} else if (tensor_name == "im_shape") {
in_tensor->Reshape({batch_size, 2});
in_tensor->CopyFromCpu(im_shape_all.data());
} else if (tensor_name == "scale_factor") {
in_tensor->Reshape({batch_size, 2});
in_tensor->CopyFromCpu(scale_factor_all.data());
}
}
auto preprocess_end = std::chrono::steady_clock::now();
std::vector<int> output_shape, idx_shape;
// Run predictor
// warmup
for (int i = 0; i < warmup; i++) {
predictor_->Run();
// Get output tensor
auto output_names = predictor_->GetOutputNames();
auto out_tensor = predictor_->GetOutputHandle(output_names[0]);
output_shape = out_tensor->shape();
// Calculate output length
int output_size = 1;
for (int j = 0; j < output_shape.size(); ++j) {
output_size *= output_shape[j];
}
output_data_.resize(output_size);
out_tensor->CopyToCpu(output_data_.data());
auto idx_tensor = predictor_->GetOutputHandle(output_names[1]);
idx_shape = idx_tensor->shape();
// Calculate output length
output_size = 1;
for (int j = 0; j < idx_shape.size(); ++j) {
output_size *= idx_shape[j];
}
idx_data_.resize(output_size);
idx_tensor->CopyToCpu(idx_data_.data());
}
auto inference_start = std::chrono::steady_clock::now();
for (int i = 0; i < repeats; i++) {
predictor_->Run();
// Get output tensor
auto output_names = predictor_->GetOutputNames();
auto out_tensor = predictor_->GetOutputHandle(output_names[0]);
output_shape = out_tensor->shape();
// Calculate output length
int output_size = 1;
for (int j = 0; j < output_shape.size(); ++j) {
output_size *= output_shape[j];
}
if (output_size < 6) {
std::cerr << "[WARNING] No object detected." << std::endl;
}
output_data_.resize(output_size);
out_tensor->CopyToCpu(output_data_.data());
auto idx_tensor = predictor_->GetOutputHandle(output_names[1]);
idx_shape = idx_tensor->shape();
// Calculate output length
output_size = 1;
for (int j = 0; j < idx_shape.size(); ++j) {
output_size *= idx_shape[j];
}
idx_data_.resize(output_size);
idx_tensor->CopyToCpu(idx_data_.data());
}
auto inference_end = std::chrono::steady_clock::now();
auto postprocess_start = std::chrono::steady_clock::now();
// Postprocessing result
Postprocess(output_data_,
output_shape,
idx_data_,
idx_shape,
result,
center_bs,
scale_bs);
auto postprocess_end = std::chrono::steady_clock::now();
std::chrono::duration<float> preprocess_diff =
preprocess_end - preprocess_start;
times->push_back(double(preprocess_diff.count() * 1000));
std::chrono::duration<float> inference_diff = inference_end - inference_start;
times->push_back(double(inference_diff.count() / repeats * 1000));
std::chrono::duration<float> postprocess_diff =
postprocess_end - postprocess_start;
times->push_back(double(postprocess_diff.count() * 1000));
}
} // namespace PaddleDetection
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "include/keypoint_postprocess.h"
#include <math.h>
#define PI 3.1415926535
#define HALF_CIRCLE_DEGREE 180
namespace PaddleDetection {
cv::Point2f get_3rd_point(cv::Point2f& a, cv::Point2f& b) {
cv::Point2f direct{a.x - b.x, a.y - b.y};
return cv::Point2f(a.x - direct.y, a.y + direct.x);
}
std::vector<float> get_dir(float src_point_x,
float src_point_y,
float rot_rad) {
float sn = sin(rot_rad);
float cs = cos(rot_rad);
std::vector<float> src_result{0.0, 0.0};
src_result[0] = src_point_x * cs - src_point_y * sn;
src_result[1] = src_point_x * sn + src_point_y * cs;
return src_result;
}
void affine_tranform(
float pt_x, float pt_y, cv::Mat& trans, std::vector<float>& preds, int p) {
double new1[3] = {pt_x, pt_y, 1.0};
cv::Mat new_pt(3, 1, trans.type(), new1);
cv::Mat w = trans * new_pt;
preds[p * 3 + 1] = static_cast<float>(w.at<double>(0, 0));
preds[p * 3 + 2] = static_cast<float>(w.at<double>(1, 0));
}
void get_affine_transform(std::vector<float>& center,
std::vector<float>& scale,
float rot,
std::vector<int>& output_size,
cv::Mat& trans,
int inv) {
float src_w = scale[0];
float dst_w = static_cast<float>(output_size[0]);
float dst_h = static_cast<float>(output_size[1]);
float rot_rad = rot * PI / HALF_CIRCLE_DEGREE;
std::vector<float> src_dir = get_dir(-0.5 * src_w, 0, rot_rad);
std::vector<float> dst_dir{-0.5f * dst_w, 0.0};
cv::Point2f srcPoint2f[3], dstPoint2f[3];
srcPoint2f[0] = cv::Point2f(center[0], center[1]);
srcPoint2f[1] = cv::Point2f(center[0] + src_dir[0], center[1] + src_dir[1]);
srcPoint2f[2] = get_3rd_point(srcPoint2f[0], srcPoint2f[1]);
dstPoint2f[0] = cv::Point2f(dst_w * 0.5, dst_h * 0.5);
dstPoint2f[1] =
cv::Point2f(dst_w * 0.5 + dst_dir[0], dst_h * 0.5 + dst_dir[1]);
dstPoint2f[2] = get_3rd_point(dstPoint2f[0], dstPoint2f[1]);
if (inv == 0) {
trans = cv::getAffineTransform(srcPoint2f, dstPoint2f);
} else {
trans = cv::getAffineTransform(dstPoint2f, srcPoint2f);
}
}
void transform_preds(std::vector<float>& coords,
std::vector<float>& center,
std::vector<float>& scale,
std::vector<int>& output_size,
std::vector<int>& dim,
std::vector<float>& target_coords,
bool affine) {
if (affine) {
cv::Mat trans(2, 3, CV_64FC1);
get_affine_transform(center, scale, 0, output_size, trans, 1);
for (int p = 0; p < dim[1]; ++p) {
affine_tranform(
coords[p * 2], coords[p * 2 + 1], trans, target_coords, p);
}
} else {
float heat_w = static_cast<float>(output_size[0]);
float heat_h = static_cast<float>(output_size[1]);
float x_scale = scale[0] / heat_w;
float y_scale = scale[1] / heat_h;
float offset_x = center[0] - scale[0] / 2.;
float offset_y = center[1] - scale[1] / 2.;
for (int i = 0; i < dim[1]; i++) {
target_coords[i * 3 + 1] = x_scale * coords[i * 2] + offset_x;
target_coords[i * 3 + 2] = y_scale * coords[i * 2 + 1] + offset_y;
}
}
}
// only for batchsize == 1
void get_max_preds(float* heatmap,
std::vector<int>& dim,
std::vector<float>& preds,
float* maxvals,
int batchid,
int joint_idx) {
int num_joints = dim[1];
int width = dim[3];
std::vector<int> idx;
idx.resize(num_joints * 2);
for (int j = 0; j < dim[1]; j++) {
float* index = &(
heatmap[batchid * num_joints * dim[2] * dim[3] + j * dim[2] * dim[3]]);
float* end = index + dim[2] * dim[3];
float* max_dis = std::max_element(index, end);
auto max_id = std::distance(index, max_dis);
maxvals[j] = *max_dis;
if (*max_dis > 0) {
preds[j * 2] = static_cast<float>(max_id % width);
preds[j * 2 + 1] = static_cast<float>(max_id / width);
}
}
}
void dark_parse(std::vector<float>& heatmap,
std::vector<int>& dim,
std::vector<float>& coords,
int px,
int py,
int index,
int ch) {
/*DARK postpocessing, Zhang et al. Distribution-Aware Coordinate
Representation for Human Pose Estimation (CVPR 2020).
1) offset = - hassian.inv() * derivative
2) dx = (heatmap[x+1] - heatmap[x-1])/2.
3) dxx = (dx[x+1] - dx[x-1])/2.
4) derivative = Mat([dx, dy])
5) hassian = Mat([[dxx, dxy], [dxy, dyy]])
*/
std::vector<float>::const_iterator first1 = heatmap.begin() + index;
std::vector<float>::const_iterator last1 =
heatmap.begin() + index + dim[2] * dim[3];
std::vector<float> heatmap_ch(first1, last1);
cv::Mat heatmap_mat = cv::Mat(heatmap_ch).reshape(0, dim[2]);
heatmap_mat.convertTo(heatmap_mat, CV_32FC1);
cv::GaussianBlur(heatmap_mat, heatmap_mat, cv::Size(3, 3), 0, 0);
heatmap_mat = heatmap_mat.reshape(1, 1);
heatmap_ch = std::vector<float>(heatmap_mat.reshape(1, 1));
float epsilon = 1e-10;
// sample heatmap to get values in around target location
float xy = log(fmax(heatmap_ch[py * dim[3] + px], epsilon));
float xr = log(fmax(heatmap_ch[py * dim[3] + px + 1], epsilon));
float xl = log(fmax(heatmap_ch[py * dim[3] + px - 1], epsilon));
float xr2 = log(fmax(heatmap_ch[py * dim[3] + px + 2], epsilon));
float xl2 = log(fmax(heatmap_ch[py * dim[3] + px - 2], epsilon));
float yu = log(fmax(heatmap_ch[(py + 1) * dim[3] + px], epsilon));
float yd = log(fmax(heatmap_ch[(py - 1) * dim[3] + px], epsilon));
float yu2 = log(fmax(heatmap_ch[(py + 2) * dim[3] + px], epsilon));
float yd2 = log(fmax(heatmap_ch[(py - 2) * dim[3] + px], epsilon));
float xryu = log(fmax(heatmap_ch[(py + 1) * dim[3] + px + 1], epsilon));
float xryd = log(fmax(heatmap_ch[(py - 1) * dim[3] + px + 1], epsilon));
float xlyu = log(fmax(heatmap_ch[(py + 1) * dim[3] + px - 1], epsilon));
float xlyd = log(fmax(heatmap_ch[(py - 1) * dim[3] + px - 1], epsilon));
// compute dx/dy and dxx/dyy with sampled values
float dx = 0.5 * (xr - xl);
float dy = 0.5 * (yu - yd);
float dxx = 0.25 * (xr2 - 2 * xy + xl2);
float dxy = 0.25 * (xryu - xryd - xlyu + xlyd);
float dyy = 0.25 * (yu2 - 2 * xy + yd2);
// finally get offset by derivative and hassian, which combined by dx/dy and
// dxx/dyy
if (dxx * dyy - dxy * dxy != 0) {
float M[2][2] = {dxx, dxy, dxy, dyy};
float D[2] = {dx, dy};
cv::Mat hassian(2, 2, CV_32F, M);
cv::Mat derivative(2, 1, CV_32F, D);
cv::Mat offset = -hassian.inv() * derivative;
coords[ch * 2] += offset.at<float>(0, 0);
coords[ch * 2 + 1] += offset.at<float>(1, 0);
}
}
void get_final_preds(std::vector<float>& heatmap,
std::vector<int>& dim,
std::vector<int64_t>& idxout,
std::vector<int>& idxdim,
std::vector<float>& center,
std::vector<float> scale,
std::vector<float>& preds,
int batchid,
bool DARK) {
std::vector<float> coords;
coords.resize(dim[1] * 2);
int heatmap_height = dim[2];
int heatmap_width = dim[3];
for (int j = 0; j < dim[1]; ++j) {
int index = (batchid * dim[1] + j) * dim[2] * dim[3];
int idx = idxout[batchid * dim[1] + j];
preds[j * 3] = heatmap[index + idx];
coords[j * 2] = idx % heatmap_width;
coords[j * 2 + 1] = idx / heatmap_width;
int px = int(coords[j * 2] + 0.5);
int py = int(coords[j * 2 + 1] + 0.5);
if (DARK && px > 1 && px < heatmap_width - 2 && py > 1 &&
py < heatmap_height - 2) {
dark_parse(heatmap, dim, coords, px, py, index, j);
} else {
if (px > 0 && px < heatmap_width - 1) {
float diff_x = heatmap[index + py * dim[3] + px + 1] -
heatmap[index + py * dim[3] + px - 1];
coords[j * 2] += diff_x > 0 ? 1 : -1 * 0.25;
}
if (py > 0 && py < heatmap_height - 1) {
float diff_y = heatmap[index + (py + 1) * dim[3] + px] -
heatmap[index + (py - 1) * dim[3] + px];
coords[j * 2 + 1] += diff_y > 0 ? 1 : -1 * 0.25;
}
}
}
std::vector<int> img_size{heatmap_width, heatmap_height};
transform_preds(coords, center, scale, img_size, dim, preds);
}
// Run predictor
KeyPointResult PoseSmooth::smooth_process(KeyPointResult* result) {
KeyPointResult keypoint_smoothed = *result;
if (this->x_prev_hat.num_joints == -1) {
this->x_prev_hat = *result;
this->dx_prev_hat = *result;
std::fill(dx_prev_hat.keypoints.begin(), dx_prev_hat.keypoints.end(), 0.);
return keypoint_smoothed;
} else {
for (int i = 0; i < result->num_joints; i++) {
this->PointSmooth(result, &keypoint_smoothed, this->thresholds, i);
}
return keypoint_smoothed;
}
}
void PoseSmooth::PointSmooth(KeyPointResult* result,
KeyPointResult* keypoint_smoothed,
std::vector<float> thresholds,
int index) {
float distance = sqrt(pow((result->keypoints[index * 3 + 1] -
this->x_prev_hat.keypoints[index * 3 + 1]) /
this->width,
2) +
pow((result->keypoints[index * 3 + 2] -
this->x_prev_hat.keypoints[index * 3 + 2]) /
this->height,
2));
if (distance < thresholds[index] * this->thres_mult) {
keypoint_smoothed->keypoints[index * 3 + 1] =
this->x_prev_hat.keypoints[index * 3 + 1];
keypoint_smoothed->keypoints[index * 3 + 2] =
this->x_prev_hat.keypoints[index * 3 + 2];
} else {
if (this->filter_type == "OneEuro") {
keypoint_smoothed->keypoints[index * 3 + 1] =
this->OneEuroFilter(result->keypoints[index * 3 + 1],
this->x_prev_hat.keypoints[index * 3 + 1],
index * 3 + 1);
keypoint_smoothed->keypoints[index * 3 + 2] =
this->OneEuroFilter(result->keypoints[index * 3 + 2],
this->x_prev_hat.keypoints[index * 3 + 2],
index * 3 + 2);
} else {
keypoint_smoothed->keypoints[index * 3 + 1] =
this->ExpSmoothing(result->keypoints[index * 3 + 1],
this->x_prev_hat.keypoints[index * 3 + 1],
index * 3 + 1);
keypoint_smoothed->keypoints[index * 3 + 2] =
this->ExpSmoothing(result->keypoints[index * 3 + 2],
this->x_prev_hat.keypoints[index * 3 + 2],
index * 3 + 2);
}
}
return;
}
float PoseSmooth::OneEuroFilter(float x_cur, float x_pre, int loc) {
float te = 1.0;
this->alpha = this->smoothing_factor(te, this->fc_d);
float dx_cur = (x_cur - x_pre) / te;
float dx_cur_hat =
this->ExpSmoothing(dx_cur, this->dx_prev_hat.keypoints[loc]);
float fc = this->fc_min + this->beta * abs(dx_cur_hat);
this->alpha = this->smoothing_factor(te, fc);
float x_cur_hat = this->ExpSmoothing(x_cur, x_pre);
this->x_prev_hat.keypoints[loc] = x_cur_hat;
this->dx_prev_hat.keypoints[loc] = dx_cur_hat;
return x_cur_hat;
}
float PoseSmooth::smoothing_factor(float te, float fc) {
float r = 2 * PI * fc * te;
return r / (r + 1);
}
float PoseSmooth::ExpSmoothing(float x_cur, float x_pre, int loc) {
return this->alpha * x_cur + (1 - this->alpha) * x_pre;
}
} // namespace PaddleDetection
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// The code is based on:
// https://github.com/gatagat/lap/blob/master/lap/lapjv.cpp
// Ths copyright of gatagat/lap is as follows:
// MIT License
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "include/lapjv.h"
namespace PaddleDetection {
/** Column-reduction and reduction transfer for a dense cost matrix.
*/
int _ccrrt_dense(const int n, float *cost[],
int *free_rows, int *x, int *y, float *v)
{
int n_free_rows;
bool *unique;
for (int i = 0; i < n; i++) {
x[i] = -1;
v[i] = LARGE;
y[i] = 0;
}
for (int i = 0; i < n; i++) {
for (int j = 0; j < n; j++) {
const float c = cost[i][j];
if (c < v[j]) {
v[j] = c;
y[j] = i;
}
}
}
NEW(unique, bool, n);
memset(unique, TRUE, n);
{
int j = n;
do {
j--;
const int i = y[j];
if (x[i] < 0) {
x[i] = j;
} else {
unique[i] = FALSE;
y[j] = -1;
}
} while (j > 0);
}
n_free_rows = 0;
for (int i = 0; i < n; i++) {
if (x[i] < 0) {
free_rows[n_free_rows++] = i;
} else if (unique[i]) {
const int j = x[i];
float min = LARGE;
for (int j2 = 0; j2 < n; j2++) {
if (j2 == (int)j) {
continue;
}
const float c = cost[i][j2] - v[j2];
if (c < min) {
min = c;
}
}
v[j] -= min;
}
}
FREE(unique);
return n_free_rows;
}
/** Augmenting row reduction for a dense cost matrix.
*/
int _carr_dense(
const int n, float *cost[],
const int n_free_rows,
int *free_rows, int *x, int *y, float *v)
{
int current = 0;
int new_free_rows = 0;
int rr_cnt = 0;
while (current < n_free_rows) {
int i0;
int j1, j2;
float v1, v2, v1_new;
bool v1_lowers;
rr_cnt++;
const int free_i = free_rows[current++];
j1 = 0;
v1 = cost[free_i][0] - v[0];
j2 = -1;
v2 = LARGE;
for (int j = 1; j < n; j++) {
const float c = cost[free_i][j] - v[j];
if (c < v2) {
if (c >= v1) {
v2 = c;
j2 = j;
} else {
v2 = v1;
v1 = c;
j2 = j1;
j1 = j;
}
}
}
i0 = y[j1];
v1_new = v[j1] - (v2 - v1);
v1_lowers = v1_new < v[j1];
if (rr_cnt < current * n) {
if (v1_lowers) {
v[j1] = v1_new;
} else if (i0 >= 0 && j2 >= 0) {
j1 = j2;
i0 = y[j2];
}
if (i0 >= 0) {
if (v1_lowers) {
free_rows[--current] = i0;
} else {
free_rows[new_free_rows++] = i0;
}
}
} else {
if (i0 >= 0) {
free_rows[new_free_rows++] = i0;
}
}
x[free_i] = j1;
y[j1] = free_i;
}
return new_free_rows;
}
/** Find columns with minimum d[j] and put them on the SCAN list.
*/
int _find_dense(const int n, int lo, float *d, int *cols, int *y)
{
int hi = lo + 1;
float mind = d[cols[lo]];
for (int k = hi; k < n; k++) {
int j = cols[k];
if (d[j] <= mind) {
if (d[j] < mind) {
hi = lo;
mind = d[j];
}
cols[k] = cols[hi];
cols[hi++] = j;
}
}
return hi;
}
// Scan all columns in TODO starting from arbitrary column in SCAN
// and try to decrease d of the TODO columns using the SCAN column.
int _scan_dense(const int n, float *cost[],
int *plo, int*phi,
float *d, int *cols, int *pred,
int *y, float *v)
{
int lo = *plo;
int hi = *phi;
float h, cred_ij;
while (lo != hi) {
int j = cols[lo++];
const int i = y[j];
const float mind = d[j];
h = cost[i][j] - v[j] - mind;
// For all columns in TODO
for (int k = hi; k < n; k++) {
j = cols[k];
cred_ij = cost[i][j] - v[j] - h;
if (cred_ij < d[j]) {
d[j] = cred_ij;
pred[j] = i;
if (cred_ij == mind) {
if (y[j] < 0) {
return j;
}
cols[k] = cols[hi];
cols[hi++] = j;
}
}
}
}
*plo = lo;
*phi = hi;
return -1;
}
/** Single iteration of modified Dijkstra shortest path algorithm as explained in the JV paper.
*
* This is a dense matrix version.
*
* \return The closest free column index.
*/
int find_path_dense(
const int n, float *cost[],
const int start_i,
int *y, float *v,
int *pred)
{
int lo = 0, hi = 0;
int final_j = -1;
int n_ready = 0;
int *cols;
float *d;
NEW(cols, int, n);
NEW(d, float, n);
for (int i = 0; i < n; i++) {
cols[i] = i;
pred[i] = start_i;
d[i] = cost[start_i][i] - v[i];
}
while (final_j == -1) {
// No columns left on the SCAN list.
if (lo == hi) {
n_ready = lo;
hi = _find_dense(n, lo, d, cols, y);
for (int k = lo; k < hi; k++) {
const int j = cols[k];
if (y[j] < 0) {
final_j = j;
}
}
}
if (final_j == -1) {
final_j = _scan_dense(
n, cost, &lo, &hi, d, cols, pred, y, v);
}
}
{
const float mind = d[cols[lo]];
for (int k = 0; k < n_ready; k++) {
const int j = cols[k];
v[j] += d[j] - mind;
}
}
FREE(cols);
FREE(d);
return final_j;
}
/** Augment for a dense cost matrix.
*/
int _ca_dense(
const int n, float *cost[],
const int n_free_rows,
int *free_rows, int *x, int *y, float *v)
{
int *pred;
NEW(pred, int, n);
for (int *pfree_i = free_rows; pfree_i < free_rows + n_free_rows; pfree_i++) {
int i = -1, j;
int k = 0;
j = find_path_dense(n, cost, *pfree_i, y, v, pred);
while (i != *pfree_i) {
i = pred[j];
y[j] = i;
SWAP_INDICES(j, x[i]);
k++;
}
}
FREE(pred);
return 0;
}
/** Solve dense sparse LAP.
*/
int lapjv_internal(
const cv::Mat &cost, const bool extend_cost, const float cost_limit,
int *x, int *y ) {
int n_rows = cost.rows;
int n_cols = cost.cols;
int n;
if (n_rows == n_cols) {
n = n_rows;
} else if (!extend_cost) {
throw std::invalid_argument("Square cost array expected. If cost is intentionally non-square, pass extend_cost=True.");
}
// Get extend cost
if (extend_cost || cost_limit < LARGE) {
n = n_rows + n_cols;
}
cv::Mat cost_expand(n, n, CV_32F);
float expand_value;
if (cost_limit < LARGE) {
expand_value = cost_limit / 2;
} else {
double max_v;
minMaxLoc(cost, nullptr, &max_v);
expand_value = (float)max_v + 1;
}
for (int i = 0; i < n; ++i) {
for (int j = 0; j < n; ++j) {
cost_expand.at<float>(i, j) = expand_value;
if (i >= n_rows && j >= n_cols) {
cost_expand.at<float>(i, j) = 0;
} else if (i < n_rows && j < n_cols) {
cost_expand.at<float>(i, j) = cost.at<float>(i, j);
}
}
}
// Convert Mat to pointer array
float **cost_ptr;
NEW(cost_ptr, float *, n);
for (int i = 0; i < n; ++i) {
NEW(cost_ptr[i], float, n);
}
for (int i = 0; i < n; ++i) {
for (int j = 0; j < n; ++j) {
cost_ptr[i][j] = cost_expand.at<float>(i, j);
}
}
int ret;
int *free_rows;
float *v;
int *x_c;
int *y_c;
NEW(free_rows, int, n);
NEW(v, float, n);
NEW(x_c, int, n);
NEW(y_c, int, n);
ret = _ccrrt_dense(n, cost_ptr, free_rows, x_c, y_c, v);
int i = 0;
while (ret > 0 && i < 2) {
ret = _carr_dense(n, cost_ptr, ret, free_rows, x_c, y_c, v);
i++;
}
if (ret > 0) {
ret = _ca_dense(n, cost_ptr, ret, free_rows, x_c, y_c, v);
}
FREE(v);
FREE(free_rows);
for (int i = 0; i < n; ++i) {
FREE(cost_ptr[i]);
}
FREE(cost_ptr);
if (ret != 0) {
if (ret == -1){
throw "Out of memory.";
}
throw "Unknown error (lapjv_internal)";
}
// Get output of x, y, opt
for (int i = 0; i < n; ++i) {
if (i < n_rows) {
x[i] = x_c[i];
if (x[i] >= n_cols) {
x[i] = -1;
}
}
if (i < n_cols) {
y[i] = y_c[i];
if (y[i] >= n_rows) {
y[i] = -1;
}
}
}
FREE(x_c);
FREE(y_c);
return ret;
}
} // namespace PaddleDetection
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <glog/logging.h>
#include <math.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <algorithm>
#include <iostream>
#include <numeric>
#include <string>
#include <vector>
#ifdef _WIN32
#include <direct.h>
#include <io.h>
#elif LINUX
#include <stdarg.h>
#include <sys/stat.h>
#endif
#include <gflags/gflags.h>
#include "include/object_detector.h"
DEFINE_string(model_dir, "", "Path of inference model");
DEFINE_string(image_file, "", "Path of input image");
DEFINE_string(image_dir,
"",
"Dir of input image, `image_file` has a higher priority.");
DEFINE_int32(batch_size, 1, "batch_size");
DEFINE_string(
video_file,
"",
"Path of input video, `video_file` or `camera_id` has a highest priority.");
DEFINE_int32(camera_id, -1, "Device id of camera to predict");
DEFINE_bool(
use_gpu,
false,
"Deprecated, please use `--device` to set the device you want to run.");
DEFINE_string(device,
"CPU",
"Choose the device you want to run, it can be: CPU/GPU/XPU, "
"default is CPU.");
DEFINE_double(threshold, 0.5, "Threshold of score.");
DEFINE_string(output_dir, "output", "Directory of output visualization files.");
DEFINE_string(run_mode,
"paddle",
"Mode of running(paddle/trt_fp32/trt_fp16/trt_int8)");
DEFINE_int32(gpu_id, 0, "Device id of GPU to execute");
DEFINE_bool(run_benchmark,
false,
"Whether to predict a image_file repeatedly for benchmark");
DEFINE_bool(use_mkldnn, false, "Whether use mkldnn with CPU");
DEFINE_int32(cpu_threads, 1, "Num of threads with CPU");
DEFINE_int32(trt_min_shape, 1, "Min shape of TRT DynamicShapeI");
DEFINE_int32(trt_max_shape, 1280, "Max shape of TRT DynamicShapeI");
DEFINE_int32(trt_opt_shape, 640, "Opt shape of TRT DynamicShapeI");
DEFINE_bool(trt_calib_mode,
false,
"If the model is produced by TRT offline quantitative calibration, "
"trt_calib_mode need to set True");
void PrintBenchmarkLog(std::vector<double> det_time, int img_num) {
LOG(INFO) << "----------------------- Config info -----------------------";
LOG(INFO) << "runtime_device: " << FLAGS_device;
LOG(INFO) << "ir_optim: "
<< "True";
LOG(INFO) << "enable_memory_optim: "
<< "True";
int has_trt = FLAGS_run_mode.find("trt");
if (has_trt >= 0) {
LOG(INFO) << "enable_tensorrt: "
<< "True";
std::string precision = FLAGS_run_mode.substr(4, 8);
LOG(INFO) << "precision: " << precision;
} else {
LOG(INFO) << "enable_tensorrt: "
<< "False";
LOG(INFO) << "precision: "
<< "fp32";
}
LOG(INFO) << "enable_mkldnn: " << (FLAGS_use_mkldnn ? "True" : "False");
LOG(INFO) << "cpu_math_library_num_threads: " << FLAGS_cpu_threads;
LOG(INFO) << "----------------------- Data info -----------------------";
LOG(INFO) << "batch_size: " << FLAGS_batch_size;
LOG(INFO) << "input_shape: "
<< "dynamic shape";
LOG(INFO) << "----------------------- Model info -----------------------";
FLAGS_model_dir.erase(FLAGS_model_dir.find_last_not_of("/") + 1);
LOG(INFO) << "model_name: "
<< FLAGS_model_dir.substr(FLAGS_model_dir.find_last_of('/') + 1);
LOG(INFO) << "----------------------- Perf info ------------------------";
LOG(INFO) << "Total number of predicted data: " << img_num
<< " and total time spent(ms): "
<< std::accumulate(det_time.begin(), det_time.end(), 0);
LOG(INFO) << "preproce_time(ms): " << det_time[0] / img_num
<< ", inference_time(ms): " << det_time[1] / img_num
<< ", postprocess_time(ms): " << det_time[2] / img_num;
}
static std::string DirName(const std::string& filepath) {
auto pos = filepath.rfind(OS_PATH_SEP);
if (pos == std::string::npos) {
return "";
}
return filepath.substr(0, pos);
}
static bool PathExists(const std::string& path) {
#ifdef _WIN32
struct _stat buffer;
return (_stat(path.c_str(), &buffer) == 0);
#else
struct stat buffer;
return (stat(path.c_str(), &buffer) == 0);
#endif // !_WIN32
}
static void MkDir(const std::string& path) {
if (PathExists(path)) return;
int ret = 0;
#ifdef _WIN32
ret = _mkdir(path.c_str());
#else
ret = mkdir(path.c_str(), 0755);
#endif // !_WIN32
if (ret != 0) {
std::string path_error(path);
path_error += " mkdir failed!";
throw std::runtime_error(path_error);
}
}
static void MkDirs(const std::string& path) {
if (path.empty()) return;
if (PathExists(path)) return;
MkDirs(DirName(path));
MkDir(path);
}
void PredictVideo(const std::string& video_path,
PaddleDetection::ObjectDetector* det,
const std::string& output_dir = "output") {
// Open video
cv::VideoCapture capture;
std::string video_out_name = "output.mp4";
if (FLAGS_camera_id != -1) {
capture.open(FLAGS_camera_id);
} else {
capture.open(video_path.c_str());
video_out_name =
video_path.substr(video_path.find_last_of(OS_PATH_SEP) + 1);
}
if (!capture.isOpened()) {
printf("can not open video : %s\n", video_path.c_str());
return;
}
// Get Video info : resolution, fps, frame count
int video_width = static_cast<int>(capture.get(CV_CAP_PROP_FRAME_WIDTH));
int video_height = static_cast<int>(capture.get(CV_CAP_PROP_FRAME_HEIGHT));
int video_fps = static_cast<int>(capture.get(CV_CAP_PROP_FPS));
int video_frame_count =
static_cast<int>(capture.get(CV_CAP_PROP_FRAME_COUNT));
printf("fps: %d, frame_count: %d\n", video_fps, video_frame_count);
// Create VideoWriter for output
cv::VideoWriter video_out;
std::string video_out_path(output_dir);
if (output_dir.rfind(OS_PATH_SEP) != output_dir.size() - 1) {
video_out_path += OS_PATH_SEP;
}
video_out_path += video_out_name;
video_out.open(video_out_path.c_str(),
0x00000021,
video_fps,
cv::Size(video_width, video_height),
true);
if (!video_out.isOpened()) {
printf("create video writer failed!\n");
return;
}
std::vector<PaddleDetection::ObjectResult> result;
std::vector<int> bbox_num;
std::vector<double> det_times;
auto labels = det->GetLabelList();
auto colormap = PaddleDetection::GenerateColorMap(labels.size());
// Capture all frames and do inference
cv::Mat frame;
int frame_id = 1;
bool is_rbox = false;
while (capture.read(frame)) {
if (frame.empty()) {
break;
}
std::vector<cv::Mat> imgs;
imgs.push_back(frame);
printf("detect frame: %d\n", frame_id);
det->Predict(imgs, FLAGS_threshold, 0, 1, &result, &bbox_num, &det_times);
std::vector<PaddleDetection::ObjectResult> out_result;
for (const auto& item : result) {
if (item.confidence < FLAGS_threshold || item.class_id == -1) {
continue;
}
out_result.push_back(item);
if (item.rect.size() > 6) {
is_rbox = true;
printf("class=%d confidence=%.4f rect=[%d %d %d %d %d %d %d %d]\n",
item.class_id,
item.confidence,
item.rect[0],
item.rect[1],
item.rect[2],
item.rect[3],
item.rect[4],
item.rect[5],
item.rect[6],
item.rect[7]);
} else {
printf("class=%d confidence=%.4f rect=[%d %d %d %d]\n",
item.class_id,
item.confidence,
item.rect[0],
item.rect[1],
item.rect[2],
item.rect[3]);
}
}
cv::Mat out_im = PaddleDetection::VisualizeResult(
frame, out_result, labels, colormap, is_rbox);
video_out.write(out_im);
frame_id += 1;
}
capture.release();
video_out.release();
}
void PredictImage(const std::vector<std::string> all_img_paths,
const int batch_size,
const double threshold,
const bool run_benchmark,
PaddleDetection::ObjectDetector* det,
const std::string& output_dir = "output") {
std::vector<double> det_t = {0, 0, 0};
int steps = ceil(float(all_img_paths.size()) / batch_size);
printf("total images = %d, batch_size = %d, total steps = %d\n",
all_img_paths.size(),
batch_size,
steps);
for (int idx = 0; idx < steps; idx++) {
std::vector<cv::Mat> batch_imgs;
int left_image_cnt = all_img_paths.size() - idx * batch_size;
if (left_image_cnt > batch_size) {
left_image_cnt = batch_size;
}
for (int bs = 0; bs < left_image_cnt; bs++) {
std::string image_file_path = all_img_paths.at(idx * batch_size + bs);
cv::Mat im = cv::imread(image_file_path, 1);
batch_imgs.insert(batch_imgs.end(), im);
}
// Store all detected result
std::vector<PaddleDetection::ObjectResult> result;
std::vector<int> bbox_num;
std::vector<double> det_times;
bool is_rbox = false;
if (run_benchmark) {
det->Predict(
batch_imgs, threshold, 10, 10, &result, &bbox_num, &det_times);
} else {
det->Predict(batch_imgs, threshold, 0, 1, &result, &bbox_num, &det_times);
// get labels and colormap
auto labels = det->GetLabelList();
auto colormap = PaddleDetection::GenerateColorMap(labels.size());
int item_start_idx = 0;
for (int i = 0; i < left_image_cnt; i++) {
cv::Mat im = batch_imgs[i];
std::vector<PaddleDetection::ObjectResult> im_result;
int detect_num = 0;
for (int j = 0; j < bbox_num[i]; j++) {
PaddleDetection::ObjectResult item = result[item_start_idx + j];
if (item.confidence < threshold || item.class_id == -1) {
continue;
}
detect_num += 1;
im_result.push_back(item);
if (item.rect.size() > 6) {
is_rbox = true;
printf("class=%d confidence=%.4f rect=[%d %d %d %d %d %d %d %d]\n",
item.class_id,
item.confidence,
item.rect[0],
item.rect[1],
item.rect[2],
item.rect[3],
item.rect[4],
item.rect[5],
item.rect[6],
item.rect[7]);
} else {
printf("class=%d confidence=%.4f rect=[%d %d %d %d]\n",
item.class_id,
item.confidence,
item.rect[0],
item.rect[1],
item.rect[2],
item.rect[3]);
}
}
std::cout << all_img_paths.at(idx * batch_size + i)
<< " The number of detected box: " << detect_num << std::endl;
item_start_idx = item_start_idx + bbox_num[i];
// Visualization result
cv::Mat vis_img = PaddleDetection::VisualizeResult(
im, im_result, labels, colormap, is_rbox);
std::vector<int> compression_params;
compression_params.push_back(CV_IMWRITE_JPEG_QUALITY);
compression_params.push_back(95);
std::string output_path(output_dir);
if (output_dir.rfind(OS_PATH_SEP) != output_dir.size() - 1) {
output_path += OS_PATH_SEP;
}
std::string image_file_path = all_img_paths.at(idx * batch_size + i);
output_path +=
image_file_path.substr(image_file_path.find_last_of('/') + 1);
cv::imwrite(output_path, vis_img, compression_params);
printf("Visualized output saved as %s\n", output_path.c_str());
}
}
det_t[0] += det_times[0];
det_t[1] += det_times[1];
det_t[2] += det_times[2];
det_times.clear();
}
PrintBenchmarkLog(det_t, all_img_paths.size());
}
int main(int argc, char** argv) {
// Parsing command-line
google::ParseCommandLineFlags(&argc, &argv, true);
if (FLAGS_model_dir.empty() ||
(FLAGS_image_file.empty() && FLAGS_image_dir.empty() &&
FLAGS_video_file.empty())) {
std::cout << "Usage: ./main --model_dir=/PATH/TO/INFERENCE_MODEL/ "
<< "--image_file=/PATH/TO/INPUT/IMAGE/" << std::endl;
return -1;
}
if (!(FLAGS_run_mode == "paddle" || FLAGS_run_mode == "trt_fp32" ||
FLAGS_run_mode == "trt_fp16" || FLAGS_run_mode == "trt_int8")) {
std::cout
<< "run_mode should be 'paddle', 'trt_fp32', 'trt_fp16' or 'trt_int8'.";
return -1;
}
transform(FLAGS_device.begin(),
FLAGS_device.end(),
FLAGS_device.begin(),
::toupper);
if (!(FLAGS_device == "CPU" || FLAGS_device == "GPU" ||
FLAGS_device == "XPU")) {
std::cout << "device should be 'CPU', 'GPU' or 'XPU'.";
return -1;
}
if (FLAGS_use_gpu) {
std::cout << "Deprecated, please use `--device` to set the device you want "
"to run.";
return -1;
}
// Load model and create a object detector
PaddleDetection::ObjectDetector det(FLAGS_model_dir,
FLAGS_device,
FLAGS_use_mkldnn,
FLAGS_cpu_threads,
FLAGS_run_mode,
FLAGS_batch_size,
FLAGS_gpu_id,
FLAGS_trt_min_shape,
FLAGS_trt_max_shape,
FLAGS_trt_opt_shape,
FLAGS_trt_calib_mode);
// Do inference on input video or image
if (!PathExists(FLAGS_output_dir)) {
MkDirs(FLAGS_output_dir);
}
if (!FLAGS_video_file.empty() || FLAGS_camera_id != -1) {
PredictVideo(FLAGS_video_file, &det, FLAGS_output_dir);
} else if (!FLAGS_image_file.empty() || !FLAGS_image_dir.empty()) {
std::vector<std::string> all_img_paths;
std::vector<cv::String> cv_all_img_paths;
if (!FLAGS_image_file.empty()) {
all_img_paths.push_back(FLAGS_image_file);
if (FLAGS_batch_size > 1) {
std::cout << "batch_size should be 1, when set `image_file`."
<< std::endl;
return -1;
}
} else {
cv::glob(FLAGS_image_dir, cv_all_img_paths);
for (const auto& img_path : cv_all_img_paths) {
all_img_paths.push_back(img_path);
}
}
PredictImage(all_img_paths,
FLAGS_batch_size,
FLAGS_threshold,
FLAGS_run_benchmark,
&det,
FLAGS_output_dir);
}
return 0;
}
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <glog/logging.h>
#include <math.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <algorithm>
#include <iostream>
#include <numeric>
#include <string>
#include <vector>
#ifdef _WIN32
#include <direct.h>
#include <io.h>
#elif LINUX
#include <stdarg.h>
#include <sys/stat.h>
#endif
#include <gflags/gflags.h>
#include <opencv2/opencv.hpp>
#include "include/jde_detector.h"
#include "include/object_detector.h"
DEFINE_string(model_dir, "", "Path of inference model");
DEFINE_int32(batch_size, 1, "batch_size");
DEFINE_string(
video_file,
"",
"Path of input video, `video_file` or `camera_id` has a highest priority.");
DEFINE_int32(camera_id, -1, "Device id of camera to predict");
DEFINE_bool(
use_gpu,
false,
"Deprecated, please use `--device` to set the device you want to run.");
DEFINE_string(device,
"CPU",
"Choose the device you want to run, it can be: CPU/GPU/XPU, "
"default is CPU.");
DEFINE_double(threshold, 0.5, "Threshold of score.");
DEFINE_string(output_dir, "output", "Directory of output visualization files.");
DEFINE_string(run_mode,
"paddle",
"Mode of running(paddle/trt_fp32/trt_fp16/trt_int8)");
DEFINE_int32(gpu_id, 0, "Device id of GPU to execute");
DEFINE_bool(run_benchmark,
false,
"Whether to predict a image_file repeatedly for benchmark");
DEFINE_bool(use_mkldnn, false, "Whether use mkldnn with CPU");
DEFINE_int32(cpu_threads, 1, "Num of threads with CPU");
DEFINE_int32(trt_min_shape, 1, "Min shape of TRT DynamicShapeI");
DEFINE_int32(trt_max_shape, 1280, "Max shape of TRT DynamicShapeI");
DEFINE_int32(trt_opt_shape, 640, "Opt shape of TRT DynamicShapeI");
DEFINE_bool(trt_calib_mode,
false,
"If the model is produced by TRT offline quantitative calibration, "
"trt_calib_mode need to set True");
void PrintBenchmarkLog(std::vector<double> det_time, int img_num) {
LOG(INFO) << "----------------------- Config info -----------------------";
LOG(INFO) << "runtime_device: " << FLAGS_device;
LOG(INFO) << "ir_optim: "
<< "True";
LOG(INFO) << "enable_memory_optim: "
<< "True";
int has_trt = FLAGS_run_mode.find("trt");
if (has_trt >= 0) {
LOG(INFO) << "enable_tensorrt: "
<< "True";
std::string precision = FLAGS_run_mode.substr(4, 8);
LOG(INFO) << "precision: " << precision;
} else {
LOG(INFO) << "enable_tensorrt: "
<< "False";
LOG(INFO) << "precision: "
<< "fp32";
}
LOG(INFO) << "enable_mkldnn: " << (FLAGS_use_mkldnn ? "True" : "False");
LOG(INFO) << "cpu_math_library_num_threads: " << FLAGS_cpu_threads;
LOG(INFO) << "----------------------- Data info -----------------------";
LOG(INFO) << "batch_size: " << FLAGS_batch_size;
LOG(INFO) << "input_shape: "
<< "dynamic shape";
LOG(INFO) << "----------------------- Model info -----------------------";
FLAGS_model_dir.erase(FLAGS_model_dir.find_last_not_of("/") + 1);
LOG(INFO) << "model_name: "
<< FLAGS_model_dir.substr(FLAGS_model_dir.find_last_of('/') + 1);
LOG(INFO) << "----------------------- Perf info ------------------------";
LOG(INFO) << "Total number of predicted data: " << img_num
<< " and total time spent(ms): "
<< std::accumulate(det_time.begin(), det_time.end(), 0);
LOG(INFO) << "preproce_time(ms): " << det_time[0] / img_num
<< ", inference_time(ms): " << det_time[1] / img_num
<< ", postprocess_time(ms): " << det_time[2] / img_num;
}
static std::string DirName(const std::string& filepath) {
auto pos = filepath.rfind(OS_PATH_SEP);
if (pos == std::string::npos) {
return "";
}
return filepath.substr(0, pos);
}
static bool PathExists(const std::string& path) {
#ifdef _WIN32
struct _stat buffer;
return (_stat(path.c_str(), &buffer) == 0);
#else
struct stat buffer;
return (stat(path.c_str(), &buffer) == 0);
#endif // !_WIN32
}
static void MkDir(const std::string& path) {
if (PathExists(path)) return;
int ret = 0;
#ifdef _WIN32
ret = _mkdir(path.c_str());
#else
ret = mkdir(path.c_str(), 0755);
#endif // !_WIN32
if (ret != 0) {
std::string path_error(path);
path_error += " mkdir failed!";
throw std::runtime_error(path_error);
}
}
static void MkDirs(const std::string& path) {
if (path.empty()) return;
if (PathExists(path)) return;
MkDirs(DirName(path));
MkDir(path);
}
void PredictVideo(const std::string& video_path,
PaddleDetection::JDEDetector* mot,
const std::string& output_dir = "output") {
// Open video
cv::VideoCapture capture;
std::string video_out_name = "output.mp4";
if (FLAGS_camera_id != -1) {
capture.open(FLAGS_camera_id);
} else {
capture.open(video_path.c_str());
video_out_name =
video_path.substr(video_path.find_last_of(OS_PATH_SEP) + 1);
}
if (!capture.isOpened()) {
printf("can not open video : %s\n", video_path.c_str());
return;
}
// Get Video info : resolution, fps, frame count
int video_width = static_cast<int>(capture.get(CV_CAP_PROP_FRAME_WIDTH));
int video_height = static_cast<int>(capture.get(CV_CAP_PROP_FRAME_HEIGHT));
int video_fps = static_cast<int>(capture.get(CV_CAP_PROP_FPS));
int video_frame_count =
static_cast<int>(capture.get(CV_CAP_PROP_FRAME_COUNT));
printf("fps: %d, frame_count: %d\n", video_fps, video_frame_count);
// Create VideoWriter for output
cv::VideoWriter video_out;
std::string video_out_path(output_dir);
if (output_dir.rfind(OS_PATH_SEP) != output_dir.size() - 1) {
video_out_path += OS_PATH_SEP;
}
video_out_path += video_out_name;
video_out.open(video_out_path.c_str(),
0x00000021,
video_fps,
cv::Size(video_width, video_height),
true);
if (!video_out.isOpened()) {
printf("create video writer failed!\n");
return;
}
PaddleDetection::MOT_Result result;
std::vector<double> det_times(3);
double times;
// Capture all frames and do inference
cv::Mat frame;
int frame_id = 1;
while (capture.read(frame)) {
if (frame.empty()) {
break;
}
std::vector<cv::Mat> imgs;
imgs.push_back(frame);
printf("detect frame: %d\n", frame_id);
mot->Predict(imgs, FLAGS_threshold, 0, 1, &result, &det_times);
frame_id += 1;
times = std::accumulate(det_times.begin(), det_times.end(), 0) / frame_id;
cv::Mat out_im = PaddleDetection::VisualizeTrackResult(
frame, result, 1000. / times, frame_id);
video_out.write(out_im);
}
capture.release();
video_out.release();
PrintBenchmarkLog(det_times, frame_id);
printf("Visualized output saved as %s\n", video_out_path.c_str());
}
int main(int argc, char** argv) {
// Parsing command-line
google::ParseCommandLineFlags(&argc, &argv, true);
if (FLAGS_model_dir.empty() || FLAGS_video_file.empty()) {
std::cout << "Usage: ./main --model_dir=/PATH/TO/INFERENCE_MODEL/ "
<< "--video_file=/PATH/TO/INPUT/VIDEO/" << std::endl;
return -1;
}
if (!(FLAGS_run_mode == "paddle" || FLAGS_run_mode == "trt_fp32" ||
FLAGS_run_mode == "trt_fp16" || FLAGS_run_mode == "trt_int8")) {
std::cout
<< "run_mode should be 'paddle', 'trt_fp32', 'trt_fp16' or 'trt_int8'.";
return -1;
}
transform(FLAGS_device.begin(),
FLAGS_device.end(),
FLAGS_device.begin(),
::toupper);
if (!(FLAGS_device == "CPU" || FLAGS_device == "GPU" ||
FLAGS_device == "XPU")) {
std::cout << "device should be 'CPU', 'GPU' or 'XPU'.";
return -1;
}
if (FLAGS_use_gpu) {
std::cout << "Deprecated, please use `--device` to set the device you want "
"to run.";
return -1;
}
// Do inference on input video or image
PaddleDetection::JDEDetector mot(FLAGS_model_dir,
FLAGS_device,
FLAGS_use_mkldnn,
FLAGS_cpu_threads,
FLAGS_run_mode,
FLAGS_batch_size,
FLAGS_gpu_id,
FLAGS_trt_min_shape,
FLAGS_trt_max_shape,
FLAGS_trt_opt_shape,
FLAGS_trt_calib_mode);
if (!PathExists(FLAGS_output_dir)) {
MkDirs(FLAGS_output_dir);
}
PredictVideo(FLAGS_video_file, &mot, FLAGS_output_dir);
return 0;
}
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <glog/logging.h>
#include <math.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <algorithm>
#include <iostream>
#include <numeric>
#include <string>
#include <vector>
#ifdef _WIN32
#include <direct.h>
#include <io.h>
#elif LINUX
#include <stdarg.h>
#endif
#include <gflags/gflags.h>
#include "include/keypoint_detector.h"
#include "include/object_detector.h"
#include "include/preprocess_op.h"
DEFINE_string(model_dir, "", "Path of object detector inference model");
DEFINE_string(model_dir_keypoint,
"",
"Path of keypoint detector inference model");
DEFINE_string(image_file, "", "Path of input image");
DEFINE_string(image_dir,
"",
"Dir of input image, `image_file` has a higher priority.");
DEFINE_int32(batch_size, 1, "batch_size of object detector");
DEFINE_int32(batch_size_keypoint, 8, "batch_size of keypoint detector");
DEFINE_string(
video_file,
"",
"Path of input video, `video_file` or `camera_id` has a highest priority.");
DEFINE_int32(camera_id, -1, "Device id of camera to predict");
DEFINE_bool(
use_gpu,
false,
"Deprecated, please use `--device` to set the device you want to run.");
DEFINE_string(device,
"CPU",
"Choose the device you want to run, it can be: CPU/GPU/XPU, "
"default is CPU.");
DEFINE_double(threshold, 0.5, "Threshold of score.");
DEFINE_double(threshold_keypoint, 0.5, "Threshold of score.");
DEFINE_string(output_dir, "output", "Directory of output visualization files.");
DEFINE_string(run_mode,
"paddle",
"Mode of running(paddle/trt_fp32/trt_fp16/trt_int8)");
DEFINE_int32(gpu_id, 0, "Device id of GPU to execute");
DEFINE_bool(run_benchmark,
false,
"Whether to predict a image_file repeatedly for benchmark");
DEFINE_bool(use_mkldnn, false, "Whether use mkldnn with CPU");
DEFINE_int32(cpu_threads, 1, "Num of threads with CPU");
DEFINE_int32(trt_min_shape, 1, "Min shape of TRT DynamicShapeI");
DEFINE_int32(trt_max_shape, 1280, "Max shape of TRT DynamicShapeI");
DEFINE_int32(trt_opt_shape, 640, "Opt shape of TRT DynamicShapeI");
DEFINE_bool(trt_calib_mode,
false,
"If the model is produced by TRT offline quantitative calibration, "
"trt_calib_mode need to set True");
DEFINE_bool(use_dark, true, "Whether use dark decode in keypoint postprocess");
void PrintBenchmarkLog(std::vector<double> det_time, int img_num) {
LOG(INFO) << "----------------------- Config info -----------------------";
LOG(INFO) << "runtime_device: " << FLAGS_device;
LOG(INFO) << "ir_optim: "
<< "True";
LOG(INFO) << "enable_memory_optim: "
<< "True";
int has_trt = FLAGS_run_mode.find("trt");
if (has_trt >= 0) {
LOG(INFO) << "enable_tensorrt: "
<< "True";
std::string precision = FLAGS_run_mode.substr(4, 8);
LOG(INFO) << "precision: " << precision;
} else {
LOG(INFO) << "enable_tensorrt: "
<< "False";
LOG(INFO) << "precision: "
<< "fp32";
}
LOG(INFO) << "enable_mkldnn: " << (FLAGS_use_mkldnn ? "True" : "False");
LOG(INFO) << "cpu_math_library_num_threads: " << FLAGS_cpu_threads;
LOG(INFO) << "----------------------- Data info -----------------------";
LOG(INFO) << "batch_size: " << FLAGS_batch_size;
LOG(INFO) << "input_shape: "
<< "dynamic shape";
LOG(INFO) << "----------------------- Model info -----------------------";
FLAGS_model_dir.erase(FLAGS_model_dir.find_last_not_of(OS_PATH_SEP) + 1);
LOG(INFO) << "model_name: " << FLAGS_model_dir;
LOG(INFO) << "----------------------- Perf info ------------------------";
LOG(INFO) << "Total number of predicted data: " << img_num
<< " and total time spent(ms): "
<< std::accumulate(det_time.begin(), det_time.end(), 0.);
img_num = std::max(1, img_num);
LOG(INFO) << "preproce_time(ms): " << det_time[0] / img_num
<< ", inference_time(ms): " << det_time[1] / img_num
<< ", postprocess_time(ms): " << det_time[2] / img_num;
}
void PrintKptsBenchmarkLog(std::vector<double> det_time, int img_num) {
LOG(INFO) << "----------------------- Data info -----------------------";
LOG(INFO) << "batch_size_keypoint: " << FLAGS_batch_size_keypoint;
LOG(INFO) << "----------------------- Model info -----------------------";
FLAGS_model_dir_keypoint.erase(
FLAGS_model_dir_keypoint.find_last_not_of(OS_PATH_SEP) + 1);
LOG(INFO) << "keypoint_model_name: " << FLAGS_model_dir_keypoint;
LOG(INFO) << "----------------------- Perf info ------------------------";
LOG(INFO) << "Total number of predicted data: " << img_num
<< " and total time spent(ms): "
<< std::accumulate(det_time.begin(), det_time.end(), 0.);
img_num = std::max(1, img_num);
LOG(INFO) << "Average time cost per person:";
LOG(INFO) << "preproce_time(ms): " << det_time[0] / img_num
<< ", inference_time(ms): " << det_time[1] / img_num
<< ", postprocess_time(ms): " << det_time[2] / img_num;
}
static std::string DirName(const std::string& filepath) {
auto pos = filepath.rfind(OS_PATH_SEP);
if (pos == std::string::npos) {
return "";
}
return filepath.substr(0, pos);
}
static bool PathExists(const std::string& path) {
#ifdef _WIN32
struct _stat buffer;
return (_stat(path.c_str(), &buffer) == 0);
#else
struct stat buffer;
return (stat(path.c_str(), &buffer) == 0);
#endif // !_WIN32
}
static void MkDir(const std::string& path) {
if (PathExists(path)) return;
int ret = 0;
#ifdef _WIN32
ret = _mkdir(path.c_str());
#else
ret = mkdir(path.c_str(), 0755);
#endif // !_WIN32
if (ret != 0) {
std::string path_error(path);
path_error += " mkdir failed!";
throw std::runtime_error(path_error);
}
}
static void MkDirs(const std::string& path) {
if (path.empty()) return;
if (PathExists(path)) return;
MkDirs(DirName(path));
MkDir(path);
}
void PredictVideo(const std::string& video_path,
PaddleDetection::ObjectDetector* det,
PaddleDetection::KeyPointDetector* keypoint,
const std::string& output_dir = "output") {
// Open video
cv::VideoCapture capture;
std::string video_out_name = "output.mp4";
if (FLAGS_camera_id != -1) {
capture.open(FLAGS_camera_id);
} else {
capture.open(video_path.c_str());
video_out_name =
video_path.substr(video_path.find_last_of(OS_PATH_SEP) + 1);
}
if (!capture.isOpened()) {
printf("can not open video : %s\n", video_path.c_str());
return;
}
// Get Video info : resolution, fps, frame count
int video_width = static_cast<int>(capture.get(CV_CAP_PROP_FRAME_WIDTH));
int video_height = static_cast<int>(capture.get(CV_CAP_PROP_FRAME_HEIGHT));
int video_fps = static_cast<int>(capture.get(CV_CAP_PROP_FPS));
int video_frame_count =
static_cast<int>(capture.get(CV_CAP_PROP_FRAME_COUNT));
printf("fps: %d, frame_count: %d\n", video_fps, video_frame_count);
// Create VideoWriter for output
cv::VideoWriter video_out;
std::string video_out_path(output_dir);
if (output_dir.rfind(OS_PATH_SEP) != output_dir.size() - 1) {
video_out_path += OS_PATH_SEP;
}
video_out_path += video_out_name;
video_out.open(video_out_path.c_str(),
0x00000021,
video_fps,
cv::Size(video_width, video_height),
true);
if (!video_out.isOpened()) {
printf("create video writer failed!\n");
return;
}
PaddleDetection::PoseSmooth smoother =
PaddleDetection::PoseSmooth(video_width, video_height);
std::vector<PaddleDetection::ObjectResult> result;
std::vector<int> bbox_num;
std::vector<double> det_times;
auto labels = det->GetLabelList();
auto colormap = PaddleDetection::GenerateColorMap(labels.size());
// Store keypoint results
std::vector<PaddleDetection::KeyPointResult> result_kpts;
std::vector<cv::Mat> imgs_kpts;
std::vector<std::vector<float>> center_bs;
std::vector<std::vector<float>> scale_bs;
std::vector<int> colormap_kpts = PaddleDetection::GenerateColorMap(20);
// Capture all frames and do inference
cv::Mat frame;
int frame_id = 1;
bool is_rbox = false;
while (capture.read(frame)) {
if (frame.empty()) {
break;
}
std::vector<cv::Mat> imgs;
imgs.push_back(frame);
printf("detect frame: %d\n", frame_id);
det->Predict(imgs, FLAGS_threshold, 0, 1, &result, &bbox_num, &det_times);
std::vector<PaddleDetection::ObjectResult> out_result;
for (const auto& item : result) {
if (item.confidence < FLAGS_threshold || item.class_id == -1) {
continue;
}
out_result.push_back(item);
if (item.rect.size() > 6) {
is_rbox = true;
printf("class=%d confidence=%.4f rect=[%d %d %d %d %d %d %d %d]\n",
item.class_id,
item.confidence,
item.rect[0],
item.rect[1],
item.rect[2],
item.rect[3],
item.rect[4],
item.rect[5],
item.rect[6],
item.rect[7]);
} else {
printf("class=%d confidence=%.4f rect=[%d %d %d %d]\n",
item.class_id,
item.confidence,
item.rect[0],
item.rect[1],
item.rect[2],
item.rect[3]);
}
}
if (keypoint) {
result_kpts.clear();
int imsize = out_result.size();
for (int i = 0; i < imsize; i++) {
auto item = out_result[i];
cv::Mat crop_img;
std::vector<double> keypoint_times;
std::vector<int> rect = {
item.rect[0], item.rect[1], item.rect[2], item.rect[3]};
std::vector<float> center;
std::vector<float> scale;
if (item.class_id == 0) {
PaddleDetection::CropImg(frame, crop_img, rect, center, scale);
center_bs.emplace_back(center);
scale_bs.emplace_back(scale);
imgs_kpts.emplace_back(crop_img);
}
if (imgs_kpts.size() == FLAGS_batch_size_keypoint ||
((i == imsize - 1) && !imgs_kpts.empty())) {
keypoint->Predict(imgs_kpts,
center_bs,
scale_bs,
FLAGS_threshold,
0,
1,
&result_kpts,
&keypoint_times);
imgs_kpts.clear();
center_bs.clear();
scale_bs.clear();
}
}
if (result_kpts.size() == 1) {
for (int i = 0; i < result_kpts.size(); i++) {
result_kpts[i] = smoother.smooth_process(&(result_kpts[i]));
}
}
cv::Mat out_im = VisualizeKptsResult(frame, result_kpts, colormap_kpts);
video_out.write(out_im);
} else {
// Visualization result
cv::Mat out_im = PaddleDetection::VisualizeResult(
frame, out_result, labels, colormap, is_rbox);
video_out.write(out_im);
}
frame_id += 1;
}
capture.release();
video_out.release();
}
void PredictImage(const std::vector<std::string> all_img_paths,
const int batch_size,
const double threshold,
const bool run_benchmark,
PaddleDetection::ObjectDetector* det,
PaddleDetection::KeyPointDetector* keypoint,
const std::string& output_dir = "output") {
std::vector<double> det_t = {0, 0, 0};
int steps = ceil(static_cast<float>(all_img_paths.size()) / batch_size);
int kpts_imgs = 0;
std::vector<double> keypoint_t = {0, 0, 0};
printf("total images = %d, batch_size = %d, total steps = %d\n",
all_img_paths.size(),
batch_size,
steps);
for (int idx = 0; idx < steps; idx++) {
std::vector<cv::Mat> batch_imgs;
int left_image_cnt = all_img_paths.size() - idx * batch_size;
if (left_image_cnt > batch_size) {
left_image_cnt = batch_size;
}
for (int bs = 0; bs < left_image_cnt; bs++) {
std::string image_file_path = all_img_paths.at(idx * batch_size + bs);
cv::Mat im = cv::imread(image_file_path, 1);
batch_imgs.insert(batch_imgs.end(), im);
}
// Store all detected result
std::vector<PaddleDetection::ObjectResult> result;
std::vector<int> bbox_num;
std::vector<double> det_times;
// Store keypoint results
std::vector<PaddleDetection::KeyPointResult> result_kpts;
std::vector<cv::Mat> imgs_kpts;
std::vector<std::vector<float>> center_bs;
std::vector<std::vector<float>> scale_bs;
std::vector<int> colormap_kpts = PaddleDetection::GenerateColorMap(20);
bool is_rbox = false;
if (run_benchmark) {
det->Predict(
batch_imgs, threshold, 10, 10, &result, &bbox_num, &det_times);
} else {
det->Predict(batch_imgs, threshold, 0, 1, &result, &bbox_num, &det_times);
}
// get labels and colormap
auto labels = det->GetLabelList();
auto colormap = PaddleDetection::GenerateColorMap(labels.size());
int item_start_idx = 0;
for (int i = 0; i < left_image_cnt; i++) {
cv::Mat im = batch_imgs[i];
std::vector<PaddleDetection::ObjectResult> im_result;
int detect_num = 0;
for (int j = 0; j < bbox_num[i]; j++) {
PaddleDetection::ObjectResult item = result[item_start_idx + j];
if (item.confidence < threshold || item.class_id == -1) {
continue;
}
detect_num += 1;
im_result.push_back(item);
if (item.rect.size() > 6) {
is_rbox = true;
printf("class=%d confidence=%.4f rect=[%d %d %d %d %d %d %d %d]\n",
item.class_id,
item.confidence,
item.rect[0],
item.rect[1],
item.rect[2],
item.rect[3],
item.rect[4],
item.rect[5],
item.rect[6],
item.rect[7]);
} else {
printf("class=%d confidence=%.4f rect=[%d %d %d %d]\n",
item.class_id,
item.confidence,
item.rect[0],
item.rect[1],
item.rect[2],
item.rect[3]);
}
}
std::cout << all_img_paths.at(idx * batch_size + i)
<< " The number of detected box: " << detect_num << std::endl;
item_start_idx = item_start_idx + bbox_num[i];
std::vector<int> compression_params;
compression_params.push_back(CV_IMWRITE_JPEG_QUALITY);
compression_params.push_back(95);
std::string output_path(output_dir);
if (output_dir.rfind(OS_PATH_SEP) != output_dir.size() - 1) {
output_path += OS_PATH_SEP;
}
std::string image_file_path = all_img_paths.at(idx * batch_size + i);
if (keypoint) {
int imsize = im_result.size();
for (int i = 0; i < imsize; i++) {
auto item = im_result[i];
cv::Mat crop_img;
std::vector<double> keypoint_times;
std::vector<int> rect = {
item.rect[0], item.rect[1], item.rect[2], item.rect[3]};
std::vector<float> center;
std::vector<float> scale;
if (item.class_id == 0) {
PaddleDetection::CropImg(im, crop_img, rect, center, scale);
center_bs.emplace_back(center);
scale_bs.emplace_back(scale);
imgs_kpts.emplace_back(crop_img);
kpts_imgs += 1;
}
if (imgs_kpts.size() == FLAGS_batch_size_keypoint ||
((i == imsize - 1) && !imgs_kpts.empty())) {
if (run_benchmark) {
keypoint->Predict(imgs_kpts,
center_bs,
scale_bs,
0.5,
10,
10,
&result_kpts,
&keypoint_times);
} else {
keypoint->Predict(imgs_kpts,
center_bs,
scale_bs,
0.5,
0,
1,
&result_kpts,
&keypoint_times);
}
imgs_kpts.clear();
center_bs.clear();
scale_bs.clear();
keypoint_t[0] += keypoint_times[0];
keypoint_t[1] += keypoint_times[1];
keypoint_t[2] += keypoint_times[2];
}
}
std::string kpts_savepath =
output_path + "keypoint_" +
image_file_path.substr(image_file_path.find_last_of(OS_PATH_SEP) + 1);
cv::Mat kpts_vis_img =
VisualizeKptsResult(im, result_kpts, colormap_kpts);
cv::imwrite(kpts_savepath, kpts_vis_img, compression_params);
printf("Visualized output saved as %s\n", kpts_savepath.c_str());
} else {
// Visualization result
cv::Mat vis_img = PaddleDetection::VisualizeResult(
im, im_result, labels, colormap, is_rbox);
std::string det_savepath =
output_path +
image_file_path.substr(image_file_path.find_last_of(OS_PATH_SEP) + 1);
cv::imwrite(det_savepath, vis_img, compression_params);
printf("Visualized output saved as %s\n", det_savepath.c_str());
}
}
det_t[0] += det_times[0];
det_t[1] += det_times[1];
det_t[2] += det_times[2];
}
PrintBenchmarkLog(det_t, all_img_paths.size());
if (keypoint) {
PrintKptsBenchmarkLog(keypoint_t, kpts_imgs);
}
}
int main(int argc, char** argv) {
// Parsing command-line
google::ParseCommandLineFlags(&argc, &argv, true);
if (FLAGS_model_dir.empty() ||
(FLAGS_image_file.empty() && FLAGS_image_dir.empty() &&
FLAGS_video_file.empty())) {
std::cout << "Usage: ./main --model_dir=/PATH/TO/INFERENCE_MODEL/ "
"(--model_dir_keypoint=/PATH/TO/INFERENCE_MODEL/)"
<< "--image_file=/PATH/TO/INPUT/IMAGE/" << std::endl;
return -1;
}
if (!(FLAGS_run_mode == "paddle" || FLAGS_run_mode == "trt_fp32" ||
FLAGS_run_mode == "trt_fp16" || FLAGS_run_mode == "trt_int8")) {
std::cout
<< "run_mode should be 'paddle', 'trt_fp32', 'trt_fp16' or 'trt_int8'.";
return -1;
}
transform(FLAGS_device.begin(),
FLAGS_device.end(),
FLAGS_device.begin(),
::toupper);
if (!(FLAGS_device == "CPU" || FLAGS_device == "GPU" ||
FLAGS_device == "XPU")) {
std::cout << "device should be 'CPU', 'GPU' or 'XPU'.";
return -1;
}
if (FLAGS_use_gpu) {
std::cout << "Deprecated, please use `--device` to set the device you want "
"to run.";
return -1;
}
// Load model and create a object detector
PaddleDetection::ObjectDetector det(FLAGS_model_dir,
FLAGS_device,
FLAGS_use_mkldnn,
FLAGS_cpu_threads,
FLAGS_run_mode,
FLAGS_batch_size,
FLAGS_gpu_id,
FLAGS_trt_min_shape,
FLAGS_trt_max_shape,
FLAGS_trt_opt_shape,
FLAGS_trt_calib_mode);
PaddleDetection::KeyPointDetector* keypoint = nullptr;
if (!FLAGS_model_dir_keypoint.empty()) {
keypoint = new PaddleDetection::KeyPointDetector(FLAGS_model_dir_keypoint,
FLAGS_device,
FLAGS_use_mkldnn,
FLAGS_cpu_threads,
FLAGS_run_mode,
FLAGS_batch_size_keypoint,
FLAGS_gpu_id,
FLAGS_trt_min_shape,
FLAGS_trt_max_shape,
FLAGS_trt_opt_shape,
FLAGS_trt_calib_mode,
FLAGS_use_dark);
}
// Do inference on input video or image
if (!PathExists(FLAGS_output_dir)) {
MkDirs(FLAGS_output_dir);
}
if (!FLAGS_video_file.empty() || FLAGS_camera_id != -1) {
PredictVideo(FLAGS_video_file, &det, keypoint, FLAGS_output_dir);
} else if (!FLAGS_image_file.empty() || !FLAGS_image_dir.empty()) {
std::vector<std::string> all_img_paths;
std::vector<cv::String> cv_all_img_paths;
if (!FLAGS_image_file.empty()) {
all_img_paths.push_back(FLAGS_image_file);
if (FLAGS_batch_size > 1) {
std::cout << "batch_size should be 1, when set `image_file`."
<< std::endl;
return -1;
}
} else {
cv::glob(FLAGS_image_dir, cv_all_img_paths);
for (const auto& img_path : cv_all_img_paths) {
all_img_paths.push_back(img_path);
}
}
PredictImage(all_img_paths,
FLAGS_batch_size,
FLAGS_threshold,
FLAGS_run_benchmark,
&det,
keypoint,
FLAGS_output_dir);
}
delete keypoint;
keypoint = nullptr;
return 0;
}
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <sstream>
// for setprecision
#include <chrono>
#include <iomanip>
#include "include/object_detector.h"
namespace PaddleDetection {
// Load Model and create model predictor
void ObjectDetector::LoadModel(const std::string &model_dir,
const int batch_size,
const std::string &run_mode) {
paddle_infer::Config config;
std::string prog_file = model_dir + OS_PATH_SEP + "model.pdmodel";
std::string params_file = model_dir + OS_PATH_SEP + "model.pdiparams";
config.SetModel(prog_file, params_file);
if (this->device_ == "GPU") {
config.EnableUseGpu(200, this->gpu_id_);
config.SwitchIrOptim(true);
// use tensorrt
if (run_mode != "paddle") {
auto precision = paddle_infer::Config::Precision::kFloat32;
if (run_mode == "trt_fp32") {
precision = paddle_infer::Config::Precision::kFloat32;
} else if (run_mode == "trt_fp16") {
precision = paddle_infer::Config::Precision::kHalf;
} else if (run_mode == "trt_int8") {
precision = paddle_infer::Config::Precision::kInt8;
} else {
printf("run_mode should be 'paddle', 'trt_fp32', 'trt_fp16' or "
"'trt_int8'");
}
// set tensorrt
config.EnableTensorRtEngine(1 << 30, batch_size, this->min_subgraph_size_,
precision, false, this->trt_calib_mode_);
// set use dynamic shape
if (this->use_dynamic_shape_) {
// set DynamicShape for image tensor
const std::vector<int> min_input_shape = {
batch_size, 3, this->trt_min_shape_, this->trt_min_shape_};
const std::vector<int> max_input_shape = {
batch_size, 3, this->trt_max_shape_, this->trt_max_shape_};
const std::vector<int> opt_input_shape = {
batch_size, 3, this->trt_opt_shape_, this->trt_opt_shape_};
const std::map<std::string, std::vector<int>> map_min_input_shape = {
{"image", min_input_shape}};
const std::map<std::string, std::vector<int>> map_max_input_shape = {
{"image", max_input_shape}};
const std::map<std::string, std::vector<int>> map_opt_input_shape = {
{"image", opt_input_shape}};
config.SetTRTDynamicShapeInfo(map_min_input_shape, map_max_input_shape,
map_opt_input_shape);
std::cout << "TensorRT dynamic shape enabled" << std::endl;
}
}
} else if (this->device_ == "XPU") {
config.EnableXpu(10 * 1024 * 1024);
} else {
config.DisableGpu();
if (this->use_mkldnn_) {
config.EnableMKLDNN();
// cache 10 different shapes for mkldnn to avoid memory leak
config.SetMkldnnCacheCapacity(10);
}
config.SetCpuMathLibraryNumThreads(this->cpu_math_library_num_threads_);
}
config.SwitchUseFeedFetchOps(false);
config.SwitchIrOptim(true);
config.DisableGlogInfo();
// Memory optimization
config.EnableMemoryOptim();
predictor_ = std::move(CreatePredictor(config));
}
// Visualiztion MaskDetector results
cv::Mat
VisualizeResult(const cv::Mat &img,
const std::vector<PaddleDetection::ObjectResult> &results,
const std::vector<std::string> &lables,
const std::vector<int> &colormap, const bool is_rbox = false) {
cv::Mat vis_img = img.clone();
int img_h = vis_img.rows;
int img_w = vis_img.cols;
for (int i = 0; i < results.size(); ++i) {
// Configure color and text size
std::ostringstream oss;
oss << std::setiosflags(std::ios::fixed) << std::setprecision(4);
oss << lables[results[i].class_id] << " ";
oss << results[i].confidence;
std::string text = oss.str();
int c1 = colormap[3 * results[i].class_id + 0];
int c2 = colormap[3 * results[i].class_id + 1];
int c3 = colormap[3 * results[i].class_id + 2];
cv::Scalar roi_color = cv::Scalar(c1, c2, c3);
int font_face = cv::FONT_HERSHEY_COMPLEX_SMALL;
double font_scale = 0.5f;
float thickness = 0.5;
cv::Size text_size =
cv::getTextSize(text, font_face, font_scale, thickness, nullptr);
cv::Point origin;
if (is_rbox) {
// Draw object, text, and background
for (int k = 0; k < 4; k++) {
cv::Point pt1 = cv::Point(results[i].rect[(k * 2) % 8],
results[i].rect[(k * 2 + 1) % 8]);
cv::Point pt2 = cv::Point(results[i].rect[(k * 2 + 2) % 8],
results[i].rect[(k * 2 + 3) % 8]);
cv::line(vis_img, pt1, pt2, roi_color, 2);
}
} else {
int w = results[i].rect[2] - results[i].rect[0];
int h = results[i].rect[3] - results[i].rect[1];
cv::Rect roi = cv::Rect(results[i].rect[0], results[i].rect[1], w, h);
// Draw roi object, text, and background
cv::rectangle(vis_img, roi, roi_color, 2);
// Draw mask
std::vector<int> mask_v = results[i].mask;
if (mask_v.size() > 0) {
cv::Mat mask = cv::Mat(img_h, img_w, CV_32S);
std::memcpy(mask.data, mask_v.data(), mask_v.size() * sizeof(int));
cv::Mat colored_img = vis_img.clone();
std::vector<cv::Mat> contours;
cv::Mat hierarchy;
mask.convertTo(mask, CV_8U);
cv::findContours(mask, contours, hierarchy, cv::RETR_CCOMP,
cv::CHAIN_APPROX_SIMPLE);
cv::drawContours(colored_img, contours, -1, roi_color, -1, cv::LINE_8,
hierarchy, 100);
cv::Mat debug_roi = vis_img;
colored_img = 0.4 * colored_img + 0.6 * vis_img;
colored_img.copyTo(vis_img, mask);
}
}
origin.x = results[i].rect[0];
origin.y = results[i].rect[1];
// Configure text background
cv::Rect text_back =
cv::Rect(results[i].rect[0], results[i].rect[1] - text_size.height,
text_size.width, text_size.height);
// Draw text, and background
cv::rectangle(vis_img, text_back, roi_color, -1);
cv::putText(vis_img, text, origin, font_face, font_scale,
cv::Scalar(255, 255, 255), thickness);
}
return vis_img;
}
void ObjectDetector::Preprocess(const cv::Mat &ori_im) {
// Clone the image : keep the original mat for postprocess
cv::Mat im = ori_im.clone();
cv::cvtColor(im, im, cv::COLOR_BGR2RGB);
preprocessor_.Run(&im, &inputs_);
}
void ObjectDetector::Postprocess(
const std::vector<cv::Mat> mats,
std::vector<PaddleDetection::ObjectResult> *result,
std::vector<int> bbox_num, std::vector<float> output_data_,
std::vector<int> output_mask_data_, bool is_rbox = false) {
result->clear();
int start_idx = 0;
int total_num = std::accumulate(bbox_num.begin(), bbox_num.end(), 0);
int out_mask_dim = -1;
if (config_.mask_) {
out_mask_dim = output_mask_data_.size() / total_num;
}
for (int im_id = 0; im_id < mats.size(); im_id++) {
cv::Mat raw_mat = mats[im_id];
int rh = 1;
int rw = 1;
for (int j = start_idx; j < start_idx + bbox_num[im_id]; j++) {
if (is_rbox) {
// Class id
int class_id = static_cast<int>(round(output_data_[0 + j * 10]));
// Confidence score
float score = output_data_[1 + j * 10];
int x1 = (output_data_[2 + j * 10] * rw);
int y1 = (output_data_[3 + j * 10] * rh);
int x2 = (output_data_[4 + j * 10] * rw);
int y2 = (output_data_[5 + j * 10] * rh);
int x3 = (output_data_[6 + j * 10] * rw);
int y3 = (output_data_[7 + j * 10] * rh);
int x4 = (output_data_[8 + j * 10] * rw);
int y4 = (output_data_[9 + j * 10] * rh);
PaddleDetection::ObjectResult result_item;
result_item.rect = {x1, y1, x2, y2, x3, y3, x4, y4};
result_item.class_id = class_id;
result_item.confidence = score;
result->push_back(result_item);
} else {
// Class id
int class_id = static_cast<int>(round(output_data_[0 + j * 6]));
// Confidence score
float score = output_data_[1 + j * 6];
int xmin = (output_data_[2 + j * 6] * rw);
int ymin = (output_data_[3 + j * 6] * rh);
int xmax = (output_data_[4 + j * 6] * rw);
int ymax = (output_data_[5 + j * 6] * rh);
int wd = xmax - xmin;
int hd = ymax - ymin;
PaddleDetection::ObjectResult result_item;
result_item.rect = {xmin, ymin, xmax, ymax};
result_item.class_id = class_id;
result_item.confidence = score;
if (config_.mask_) {
std::vector<int> mask;
for (int k = 0; k < out_mask_dim; ++k) {
if (output_mask_data_[k + j * out_mask_dim] > -1) {
mask.push_back(output_mask_data_[k + j * out_mask_dim]);
}
}
result_item.mask = mask;
}
result->push_back(result_item);
}
}
start_idx += bbox_num[im_id];
}
}
// This function is to convert output result from SOLOv2 to class ObjectResult
void ObjectDetector::SOLOv2Postprocess(
const std::vector<cv::Mat> mats, std::vector<ObjectResult> *result,
std::vector<int> *bbox_num, std::vector<int> out_bbox_num_data_,
std::vector<int64_t> out_label_data_, std::vector<float> out_score_data_,
std::vector<uint8_t> out_global_mask_data_, float threshold) {
for (int im_id = 0; im_id < mats.size(); im_id++) {
cv::Mat mat = mats[im_id];
int valid_bbox_count = 0;
for (int bbox_id = 0; bbox_id < out_bbox_num_data_[im_id]; ++bbox_id) {
if (out_score_data_[bbox_id] >= threshold) {
ObjectResult result_item;
result_item.class_id = out_label_data_[bbox_id];
result_item.confidence = out_score_data_[bbox_id];
std::vector<int> global_mask;
for (int k = 0; k < mat.rows * mat.cols; ++k) {
global_mask.push_back(static_cast<int>(
out_global_mask_data_[k + bbox_id * mat.rows * mat.cols]));
}
// find minimize bounding box from mask
cv::Mat mask(mat.rows, mat.cols, CV_32SC1);
std::memcpy(mask.data, global_mask.data(),
global_mask.size() * sizeof(int));
cv::Mat mask_fp;
cv::Mat rowSum;
cv::Mat colSum;
std::vector<float> sum_of_row(mat.rows);
std::vector<float> sum_of_col(mat.cols);
mask.convertTo(mask_fp, CV_32FC1);
cv::reduce(mask_fp, colSum, 0, CV_REDUCE_SUM, CV_32FC1);
cv::reduce(mask_fp, rowSum, 1, CV_REDUCE_SUM, CV_32FC1);
for (int row_id = 0; row_id < mat.rows; ++row_id) {
sum_of_row[row_id] = rowSum.at<float>(row_id, 0);
}
for (int col_id = 0; col_id < mat.cols; ++col_id) {
sum_of_col[col_id] = colSum.at<float>(0, col_id);
}
auto it = std::find_if(sum_of_row.begin(), sum_of_row.end(),
[](int x) { return x > 0.5; });
int y1 = std::distance(sum_of_row.begin(), it);
auto it2 = std::find_if(sum_of_col.begin(), sum_of_col.end(),
[](int x) { return x > 0.5; });
int x1 = std::distance(sum_of_col.begin(), it2);
auto rit = std::find_if(sum_of_row.rbegin(), sum_of_row.rend(),
[](int x) { return x > 0.5; });
int y2 = std::distance(rit, sum_of_row.rend());
auto rit2 = std::find_if(sum_of_col.rbegin(), sum_of_col.rend(),
[](int x) { return x > 0.5; });
int x2 = std::distance(rit2, sum_of_col.rend());
result_item.rect = {x1, y1, x2, y2};
result_item.mask = global_mask;
result->push_back(result_item);
valid_bbox_count++;
}
}
bbox_num->push_back(valid_bbox_count);
}
}
void ObjectDetector::Predict(const std::vector<cv::Mat> imgs,
const double threshold, const int warmup,
const int repeats,
std::vector<PaddleDetection::ObjectResult> *result,
std::vector<int> *bbox_num,
std::vector<double> *times) {
auto preprocess_start = std::chrono::steady_clock::now();
int batch_size = imgs.size();
// in_data_batch
std::vector<float> in_data_all;
std::vector<float> im_shape_all(batch_size * 2);
std::vector<float> scale_factor_all(batch_size * 2);
std::vector<const float *> output_data_list_;
std::vector<int> out_bbox_num_data_;
std::vector<int> out_mask_data_;
// these parameters are for SOLOv2 output
std::vector<float> out_score_data_;
std::vector<uint8_t> out_global_mask_data_;
std::vector<int64_t> out_label_data_;
// in_net img for each batch
std::vector<cv::Mat> in_net_img_all(batch_size);
// Preprocess image
for (int bs_idx = 0; bs_idx < batch_size; bs_idx++) {
cv::Mat im = imgs.at(bs_idx);
Preprocess(im);
im_shape_all[bs_idx * 2] = inputs_.im_shape_[0];
im_shape_all[bs_idx * 2 + 1] = inputs_.im_shape_[1];
scale_factor_all[bs_idx * 2] = inputs_.scale_factor_[0];
scale_factor_all[bs_idx * 2 + 1] = inputs_.scale_factor_[1];
in_data_all.insert(in_data_all.end(), inputs_.im_data_.begin(),
inputs_.im_data_.end());
// collect in_net img
in_net_img_all[bs_idx] = inputs_.in_net_im_;
}
// Pad Batch if batch size > 1
if (batch_size > 1 && CheckDynamicInput(in_net_img_all)) {
in_data_all.clear();
std::vector<cv::Mat> pad_img_all = PadBatch(in_net_img_all);
int rh = pad_img_all[0].rows;
int rw = pad_img_all[0].cols;
int rc = pad_img_all[0].channels();
for (int bs_idx = 0; bs_idx < batch_size; bs_idx++) {
cv::Mat pad_img = pad_img_all[bs_idx];
pad_img.convertTo(pad_img, CV_32FC3);
std::vector<float> pad_data;
pad_data.resize(rc * rh * rw);
float *base = pad_data.data();
for (int i = 0; i < rc; ++i) {
cv::extractChannel(pad_img,
cv::Mat(rh, rw, CV_32FC1, base + i * rh * rw), i);
}
in_data_all.insert(in_data_all.end(), pad_data.begin(), pad_data.end());
}
// update in_net_shape
inputs_.in_net_shape_ = {static_cast<float>(rh), static_cast<float>(rw)};
}
auto preprocess_end = std::chrono::steady_clock::now();
// Prepare input tensor
auto input_names = predictor_->GetInputNames();
for (const auto &tensor_name : input_names) {
auto in_tensor = predictor_->GetInputHandle(tensor_name);
if (tensor_name == "image") {
int rh = inputs_.in_net_shape_[0];
int rw = inputs_.in_net_shape_[1];
in_tensor->Reshape({batch_size, 3, rh, rw});
in_tensor->CopyFromCpu(in_data_all.data());
} else if (tensor_name == "im_shape") {
in_tensor->Reshape({batch_size, 2});
in_tensor->CopyFromCpu(im_shape_all.data());
} else if (tensor_name == "scale_factor") {
in_tensor->Reshape({batch_size, 2});
in_tensor->CopyFromCpu(scale_factor_all.data());
}
}
// Run predictor
std::vector<std::vector<float>> out_tensor_list;
std::vector<std::vector<int>> output_shape_list;
bool is_rbox = false;
int reg_max = 7;
int num_class = 80;
auto inference_start = std::chrono::steady_clock::now();
if (config_.arch_ == "SOLOv2") {
// warmup
for (int i = 0; i < warmup; i++) {
predictor_->Run();
// Get output tensor
auto output_names = predictor_->GetOutputNames();
for (int j = 0; j < output_names.size(); j++) {
auto output_tensor = predictor_->GetOutputHandle(output_names[j]);
std::vector<int> output_shape = output_tensor->shape();
int out_num = std::accumulate(output_shape.begin(), output_shape.end(),
1, std::multiplies<int>());
if (j == 0) {
out_bbox_num_data_.resize(out_num);
output_tensor->CopyToCpu(out_bbox_num_data_.data());
} else if (j == 1) {
out_label_data_.resize(out_num);
output_tensor->CopyToCpu(out_label_data_.data());
} else if (j == 2) {
out_score_data_.resize(out_num);
output_tensor->CopyToCpu(out_score_data_.data());
} else if (config_.mask_ && (j == 3)) {
out_global_mask_data_.resize(out_num);
output_tensor->CopyToCpu(out_global_mask_data_.data());
}
}
}
inference_start = std::chrono::steady_clock::now();
for (int i = 0; i < repeats; i++) {
predictor_->Run();
// Get output tensor
out_tensor_list.clear();
output_shape_list.clear();
auto output_names = predictor_->GetOutputNames();
for (int j = 0; j < output_names.size(); j++) {
auto output_tensor = predictor_->GetOutputHandle(output_names[j]);
std::vector<int> output_shape = output_tensor->shape();
int out_num = std::accumulate(output_shape.begin(), output_shape.end(),
1, std::multiplies<int>());
output_shape_list.push_back(output_shape);
if (j == 0) {
out_bbox_num_data_.resize(out_num);
output_tensor->CopyToCpu(out_bbox_num_data_.data());
} else if (j == 1) {
out_label_data_.resize(out_num);
output_tensor->CopyToCpu(out_label_data_.data());
} else if (j == 2) {
out_score_data_.resize(out_num);
output_tensor->CopyToCpu(out_score_data_.data());
} else if (config_.mask_ && (j == 3)) {
out_global_mask_data_.resize(out_num);
output_tensor->CopyToCpu(out_global_mask_data_.data());
}
}
}
} else {
// warmup
for (int i = 0; i < warmup; i++) {
predictor_->Run();
// Get output tensor
auto output_names = predictor_->GetOutputNames();
for (int j = 0; j < output_names.size(); j++) {
auto output_tensor = predictor_->GetOutputHandle(output_names[j]);
std::vector<int> output_shape = output_tensor->shape();
int out_num = std::accumulate(output_shape.begin(), output_shape.end(),
1, std::multiplies<int>());
if (config_.mask_ && (j == 2)) {
out_mask_data_.resize(out_num);
output_tensor->CopyToCpu(out_mask_data_.data());
} else if (output_tensor->type() == paddle_infer::DataType::INT32) {
out_bbox_num_data_.resize(out_num);
output_tensor->CopyToCpu(out_bbox_num_data_.data());
} else {
std::vector<float> out_data;
out_data.resize(out_num);
output_tensor->CopyToCpu(out_data.data());
out_tensor_list.push_back(out_data);
}
}
}
inference_start = std::chrono::steady_clock::now();
for (int i = 0; i < repeats; i++) {
predictor_->Run();
// Get output tensor
out_tensor_list.clear();
output_shape_list.clear();
auto output_names = predictor_->GetOutputNames();
for (int j = 0; j < output_names.size(); j++) {
auto output_tensor = predictor_->GetOutputHandle(output_names[j]);
std::vector<int> output_shape = output_tensor->shape();
int out_num = std::accumulate(output_shape.begin(), output_shape.end(),
1, std::multiplies<int>());
output_shape_list.push_back(output_shape);
if (config_.mask_ && (j == 2)) {
out_mask_data_.resize(out_num);
output_tensor->CopyToCpu(out_mask_data_.data());
} else if (output_tensor->type() == paddle_infer::DataType::INT32) {
out_bbox_num_data_.resize(out_num);
output_tensor->CopyToCpu(out_bbox_num_data_.data());
} else {
std::vector<float> out_data;
out_data.resize(out_num);
output_tensor->CopyToCpu(out_data.data());
out_tensor_list.push_back(out_data);
}
}
}
}
auto inference_end = std::chrono::steady_clock::now();
auto postprocess_start = std::chrono::steady_clock::now();
// Postprocessing result
result->clear();
bbox_num->clear();
if (config_.arch_ == "PicoDet") {
for (int i = 0; i < out_tensor_list.size(); i++) {
if (i == 0) {
num_class = output_shape_list[i][2];
}
if (i == config_.fpn_stride_.size()) {
reg_max = output_shape_list[i][2] / 4 - 1;
}
float *buffer = new float[out_tensor_list[i].size()];
memcpy(buffer, &out_tensor_list[i][0],
out_tensor_list[i].size() * sizeof(float));
output_data_list_.push_back(buffer);
}
PaddleDetection::PicoDetPostProcess(
result, output_data_list_, config_.fpn_stride_, inputs_.im_shape_,
inputs_.scale_factor_, config_.nms_info_["score_threshold"].as<float>(),
config_.nms_info_["nms_threshold"].as<float>(), num_class, reg_max);
bbox_num->push_back(result->size());
} else if (config_.arch_ == "SOLOv2") {
SOLOv2Postprocess(imgs, result, bbox_num, out_bbox_num_data_,
out_label_data_, out_score_data_, out_global_mask_data_,
threshold);
} else {
is_rbox = output_shape_list[0][output_shape_list[0].size() - 1] % 10 == 0;
Postprocess(imgs, result, out_bbox_num_data_, out_tensor_list[0],
out_mask_data_, is_rbox);
for (int k = 0; k < out_bbox_num_data_.size(); k++) {
int tmp = out_bbox_num_data_[k];
bbox_num->push_back(tmp);
}
}
auto postprocess_end = std::chrono::steady_clock::now();
std::chrono::duration<float> preprocess_diff =
preprocess_end - preprocess_start;
times->push_back(static_cast<double>(preprocess_diff.count() * 1000));
std::chrono::duration<float> inference_diff = inference_end - inference_start;
times->push_back(
static_cast<double>(inference_diff.count() / repeats * 1000));
std::chrono::duration<float> postprocess_diff =
postprocess_end - postprocess_start;
times->push_back(static_cast<double>(postprocess_diff.count() * 1000));
}
std::vector<int> GenerateColorMap(int num_class) {
auto colormap = std::vector<int>(3 * num_class, 0);
for (int i = 0; i < num_class; ++i) {
int j = 0;
int lab = i;
while (lab) {
colormap[i * 3] |= (((lab >> 0) & 1) << (7 - j));
colormap[i * 3 + 1] |= (((lab >> 1) & 1) << (7 - j));
colormap[i * 3 + 2] |= (((lab >> 2) & 1) << (7 - j));
++j;
lab >>= 3;
}
}
return colormap;
}
} // namespace PaddleDetection
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// The code is based on:
// https://github.com/RangiLyu/nanodet/blob/main/demo_mnn/nanodet_mnn.cpp
#include "include/picodet_postprocess.h"
namespace PaddleDetection {
float fast_exp(float x) {
union {
uint32_t i;
float f;
} v{};
v.i = (1 << 23) * (1.4426950409 * x + 126.93490512f);
return v.f;
}
template <typename _Tp>
int activation_function_softmax(const _Tp *src, _Tp *dst, int length) {
const _Tp alpha = *std::max_element(src, src + length);
_Tp denominator{0};
for (int i = 0; i < length; ++i) {
dst[i] = fast_exp(src[i] - alpha);
denominator += dst[i];
}
for (int i = 0; i < length; ++i) {
dst[i] /= denominator;
}
return 0;
}
// PicoDet decode
PaddleDetection::ObjectResult
disPred2Bbox(const float *&dfl_det, int label, float score, int x, int y,
int stride, std::vector<float> im_shape, int reg_max) {
float ct_x = (x + 0.5) * stride;
float ct_y = (y + 0.5) * stride;
std::vector<float> dis_pred;
dis_pred.resize(4);
for (int i = 0; i < 4; i++) {
float dis = 0;
float *dis_after_sm = new float[reg_max + 1];
activation_function_softmax(dfl_det + i * (reg_max + 1), dis_after_sm,
reg_max + 1);
for (int j = 0; j < reg_max + 1; j++) {
dis += j * dis_after_sm[j];
}
dis *= stride;
dis_pred[i] = dis;
delete[] dis_after_sm;
}
int xmin = (int)(std::max)(ct_x - dis_pred[0], .0f);
int ymin = (int)(std::max)(ct_y - dis_pred[1], .0f);
int xmax = (int)(std::min)(ct_x + dis_pred[2], (float)im_shape[0]);
int ymax = (int)(std::min)(ct_y + dis_pred[3], (float)im_shape[1]);
PaddleDetection::ObjectResult result_item;
result_item.rect = {xmin, ymin, xmax, ymax};
result_item.class_id = label;
result_item.confidence = score;
return result_item;
}
void PicoDetPostProcess(std::vector<PaddleDetection::ObjectResult> *results,
std::vector<const float *> outs,
std::vector<int> fpn_stride,
std::vector<float> im_shape,
std::vector<float> scale_factor, float score_threshold,
float nms_threshold, int num_class, int reg_max) {
std::vector<std::vector<PaddleDetection::ObjectResult>> bbox_results;
bbox_results.resize(num_class);
int in_h = im_shape[0], in_w = im_shape[1];
for (int i = 0; i < fpn_stride.size(); ++i) {
int feature_h = std::ceil((float)in_h / fpn_stride[i]);
int feature_w = std::ceil((float)in_w / fpn_stride[i]);
for (int idx = 0; idx < feature_h * feature_w; idx++) {
const float *scores = outs[i] + (idx * num_class);
int row = idx / feature_w;
int col = idx % feature_w;
float score = 0;
int cur_label = 0;
for (int label = 0; label < num_class; label++) {
if (scores[label] > score) {
score = scores[label];
cur_label = label;
}
}
if (score > score_threshold) {
const float *bbox_pred =
outs[i + fpn_stride.size()] + (idx * 4 * (reg_max + 1));
bbox_results[cur_label].push_back(
disPred2Bbox(bbox_pred, cur_label, score, col, row, fpn_stride[i],
im_shape, reg_max));
}
}
}
for (int i = 0; i < (int)bbox_results.size(); i++) {
PaddleDetection::nms(bbox_results[i], nms_threshold);
for (auto box : bbox_results[i]) {
box.rect[0] = box.rect[0] / scale_factor[1];
box.rect[2] = box.rect[2] / scale_factor[1];
box.rect[1] = box.rect[1] / scale_factor[0];
box.rect[3] = box.rect[3] / scale_factor[0];
results->push_back(box);
}
}
}
} // namespace PaddleDetection
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment