"vscode:/vscode.git/clone" did not exist on "19c66f795be6925bb9ad535f571d39a18c77e03a"
Commit 546b4279 authored by limm's avatar limm
Browse files

add csrc and mmdeploy module

parent 502f4fb9
Pipeline #2810 canceled with stages
// Copyright (c) OpenMMLab. All rights reserved.
// Modified from https://github.com/WenmuZhou/PAN.pytorch
// and
// https://github.com/open-mmlab/mmcv/blob/main/mmcv/ops/csrc/pytorch/cpu/pixel_group.cpp
#include <cmath>
#include <queue>
#include <vector>
#include "mmdeploy/core/tensor.h"
#include "opencv2/imgproc/imgproc.hpp"
namespace mmdeploy::mmocr {
std::vector<std::vector<float>> estimate_confidence(const int32_t* label, const float* score,
int label_num, int height, int width) {
std::vector<std::vector<float>> point_vector;
for (int i = 0; i < label_num; i++) {
std::vector<float> point;
point.push_back(0);
point.push_back(0);
point_vector.push_back(point);
}
for (int y = 0; y < height; y++) {
auto label_tmp = label + y * width;
auto score_tmp = score + y * width;
for (int x = 0; x < width; x++) {
auto l = label_tmp[x];
if (l > 0) {
float confidence = score_tmp[x];
point_vector[l].push_back(x);
point_vector[l].push_back(y);
point_vector[l][0] += confidence;
point_vector[l][1] += 1;
}
}
}
for (size_t l = 0; l < point_vector.size(); l++)
if (point_vector[l][1] > 0) {
point_vector[l][0] /= point_vector[l][1];
}
return point_vector;
}
std::vector<std::vector<float>> pixel_group_cpu(const cv::Mat_<float>& score,
const cv::Mat_<uint8_t>& mask,
const cv::Mat_<float>& embedding,
const cv::Mat_<int32_t>& kernel_label,
const cv::Mat_<uint8_t>& kernel_contour,
int kernel_region_num, float dis_threshold) {
int height = score.rows;
int width = score.cols;
assert(embedding.rows == height * width);
assert(height == mask.rows);
assert(width == mask.cols);
auto threshold_square = dis_threshold * dis_threshold;
auto ptr_score = score.ptr<float>();
auto ptr_mask = mask.ptr<uint8_t>();
auto ptr_kernel_contour = kernel_contour.ptr<uint8_t>();
auto ptr_embedding = embedding.ptr<float>();
auto ptr_kernel_label = kernel_label.ptr<int32_t>();
std::queue<std::tuple<int, int, int32_t>> contour_pixels;
auto embedding_dim = embedding.cols;
std::vector<std::vector<float>> kernel_vector(kernel_region_num,
std::vector<float>(embedding_dim + 1, 0));
cv::Mat_<int32_t> text_label = kernel_label.clone();
auto ptr_text_label = text_label.ptr<int32_t>();
for (int i = 0; i < height; i++) {
auto ptr_embedding_tmp = ptr_embedding + i * width * embedding_dim;
auto ptr_kernel_label_tmp = ptr_kernel_label + i * width;
auto ptr_kernel_contour_tmp = ptr_kernel_contour + i * width;
for (int j = 0, k = 0; j < width && k < width * embedding_dim; j++, k += embedding_dim) {
int32_t label = ptr_kernel_label_tmp[j];
if (label > 0) {
for (int d = 0; d < embedding_dim; d++) kernel_vector[label][d] += ptr_embedding_tmp[k + d];
kernel_vector[label][embedding_dim] += 1;
// kernel pixel number
if (ptr_kernel_contour_tmp[j]) {
contour_pixels.push(std::make_tuple(i, j, label));
}
}
}
}
for (int i = 0; i < kernel_region_num; i++) {
for (int j = 0; j < embedding_dim; j++) {
kernel_vector[i][j] /= kernel_vector[i][embedding_dim];
}
}
int dx[4] = {-1, 1, 0, 0};
int dy[4] = {0, 0, -1, 1};
while (!contour_pixels.empty()) {
auto query_pixel = contour_pixels.front();
contour_pixels.pop();
int y = std::get<0>(query_pixel);
int x = std::get<1>(query_pixel);
int32_t l = std::get<2>(query_pixel);
auto kernel_cv = kernel_vector[l];
for (int idx = 0; idx < 4; idx++) {
int tmpy = y + dy[idx];
int tmpx = x + dx[idx];
auto ptr_text_label_tmp = ptr_text_label + tmpy * width;
if (tmpy < 0 || tmpy >= height || tmpx < 0 || tmpx >= width) continue;
if (!ptr_mask[tmpy * width + tmpx] || ptr_text_label_tmp[tmpx] > 0) continue;
float dis = 0;
auto ptr_embedding_tmp = ptr_embedding + tmpy * width * embedding_dim;
for (size_t i = 0; i < embedding_dim; i++) {
dis += std::pow(kernel_cv[i] - ptr_embedding_tmp[tmpx * embedding_dim + i], 2);
// ignore further computing if dis is big enough
if (dis >= threshold_square) break;
}
if (dis >= threshold_square) continue;
contour_pixels.push(std::make_tuple(tmpy, tmpx, l));
ptr_text_label_tmp[tmpx] = l;
}
}
return estimate_confidence(ptr_text_label, ptr_score, kernel_region_num, height, width);
}
} // namespace mmdeploy::mmocr
// Copyright (c) OpenMMLab. All rights reserved.
#include "mmdeploy/codebase/mmocr/psenet.h"
#include <algorithm>
#include "mmdeploy/codebase/mmocr/mmocr.h"
#include "mmdeploy/core/device.h"
#include "mmdeploy/core/registry.h"
#include "mmdeploy/core/serialization.h"
#include "mmdeploy/core/utils/device_utils.h"
#include "opencv2/imgproc/imgproc.hpp"
namespace mmdeploy::mmocr {
void contour_expand(const cv::Mat_<uint8_t>& kernel_masks, const cv::Mat_<int32_t>& kernel_label,
const cv::Mat_<float>& score, int min_kernel_area, int kernel_num,
std::vector<int>& text_areas, std::vector<float>& text_scores,
std::vector<std::vector<int>>& text_points);
class PSEHead : public MMOCR {
public:
explicit PSEHead(const Value& config) : MMOCR(config) {
if (config.contains("params")) {
auto& params = config["params"];
min_kernel_confidence_ = params.value("min_kernel_confidence", min_kernel_confidence_);
min_text_avg_confidence_ = params.value("min_text_avg_confidence", min_text_avg_confidence_);
min_kernel_area_ = params.value("min_kernel_area", min_kernel_area_);
min_text_area_ = params.value("min_text_area", min_text_area_);
rescale_ = params.value("rescale", rescale_);
downsample_ratio_ = params.value("downsample_ratio", downsample_ratio_);
}
auto platform = Platform(device_.platform_id()).GetPlatformName();
auto creator = gRegistry<PseHeadImpl>().Get(platform);
if (!creator) {
MMDEPLOY_ERROR(
"PSEHead: implementation for platform \"{}\" not found. Available platforms: {}",
platform, gRegistry<PseHeadImpl>().List());
throw_exception(eEntryNotFound);
}
impl_ = creator->Create();
impl_->Init(stream_);
}
Result<Value> operator()(const Value& _data, const Value& _pred) noexcept {
auto _preds = _pred["output"].get<Tensor>();
if (_preds.shape().size() != 4 || _preds.shape(0) != 1 ||
_preds.data_type() != DataType::kFLOAT) {
MMDEPLOY_ERROR("unsupported `output` tensor, shape: {}, dtype: {}", _preds.shape(),
(int)_preds.data_type());
return Status(eNotSupported);
}
// drop batch dimension
_preds.Squeeze(0);
cv::Mat_<uint8_t> masks;
cv::Mat_<int> kernel_labels;
cv::Mat_<float> score;
int region_num = 0;
OUTCOME_TRY(
impl_->Process(_preds, min_kernel_confidence_, score, masks, kernel_labels, region_num));
std::vector<int> text_areas;
std::vector<float> text_scores;
std::vector<std::vector<int>> text_points;
contour_expand(masks.rowRange(1, masks.rows), kernel_labels, score, min_kernel_area_,
region_num, text_areas, text_scores, text_points);
auto scale_w = _data["img_metas"]["scale_factor"][0].get<float>();
auto scale_h = _data["img_metas"]["scale_factor"][1].get<float>();
TextDetections output;
for (int text_index = 1; text_index < region_num; ++text_index) {
auto& text_point = text_points[text_index];
auto text_confidence = text_scores[text_index];
auto area = text_areas[text_index];
if (filter_instance(static_cast<float>(area), text_confidence, min_text_area_,
min_text_avg_confidence_)) {
continue;
}
cv::Mat_<int> points(text_point.size() / 2, 2, text_point.data());
cv::RotatedRect rect = cv::minAreaRect(points);
std::vector<cv::Point2f> vertices(4);
rect.points(vertices.data());
if (rescale_) {
for (auto& p : vertices) {
p.x /= scale_w * downsample_ratio_;
p.y /= scale_h * downsample_ratio_;
}
}
auto& det = output.emplace_back();
for (int i = 0; i < 4; ++i) {
det.bbox[i * 2] = vertices[i].x;
det.bbox[i * 2 + 1] = vertices[i].y;
}
det.score = text_confidence;
}
return to_value(output);
}
static bool filter_instance(float area, float confidence, float min_area, float min_confidence) {
return area < min_area || confidence < min_confidence;
}
float min_kernel_confidence_{.5f};
float min_text_avg_confidence_{0.85};
int min_kernel_area_{0};
float min_text_area_{16};
bool rescale_{true};
float downsample_ratio_{.25f};
std::unique_ptr<PseHeadImpl> impl_;
};
MMDEPLOY_REGISTER_CODEBASE_COMPONENT(MMOCR, PSEHead);
MMDEPLOY_DEFINE_REGISTRY(PseHeadImpl);
} // namespace mmdeploy::mmocr
// Copyright (c) OpenMMLab. All rights reserved.
#ifndef MMDEPLOY_CSRC_CODEBASE_MMOCR_PSENET_H_
#define MMDEPLOY_CSRC_CODEBASE_MMOCR_PSENET_H_
#include "mmdeploy/codebase/mmocr/mmocr.h"
#include "mmdeploy/core/device.h"
#include "mmdeploy/core/registry.h"
#include "mmdeploy/core/tensor.h"
#include "opencv2/core.hpp"
namespace mmdeploy::mmocr {
class PseHeadImpl {
public:
virtual ~PseHeadImpl() = default;
virtual void Init(const Stream& stream) { stream_ = stream; }
virtual Result<void> Process(Tensor preds, //
float min_kernel_confidence, //
cv::Mat_<float>& score, //
cv::Mat_<uint8_t>& masks, //
cv::Mat_<int>& label, //
int& region_num) = 0;
protected:
Stream stream_;
};
MMDEPLOY_DECLARE_REGISTRY(PseHeadImpl, std::unique_ptr<PseHeadImpl>());
} // namespace mmdeploy::mmocr
#endif // MMDEPLOY_CSRC_CODEBASE_MMOCR_PSENET_H_
// Copyright (c) OpenMMLab. All rights reserved.
#include <set>
#include "mmdeploy/archive/value_archive.h"
#include "mmdeploy/core/registry.h"
#include "mmdeploy/core/tensor.h"
#include "mmdeploy/core/utils/formatter.h"
#include "mmdeploy/operation/managed.h"
#include "mmdeploy/operation/vision.h"
#include "mmdeploy/preprocess/transform/transform.h"
using namespace std;
namespace mmdeploy::mmocr {
class RescaleToHeight : public transform::Transform {
public:
explicit RescaleToHeight(const Value& args) noexcept {
height_ = args.value("height", height_);
min_width_ = args.contains("min_width") && args["min_width"].is_number_integer()
? args["min_width"].get<int>()
: min_width_;
max_width_ = args.contains("max_width") && args["max_width"].is_number_integer()
? args["max_width"].get<int>()
: max_width_;
width_divisor_ = args.contains("width_divisor") && args["width_divisor"].is_number_integer()
? args["width_divisor"].get<int>()
: width_divisor_;
resize_ = operation::Managed<operation::Resize>::Create("bilinear");
}
~RescaleToHeight() override = default;
Result<void> Apply(Value& data) override {
MMDEPLOY_DEBUG("input: {}", data);
auto dst_height = height_;
auto dst_min_width = min_width_;
auto dst_max_width = max_width_;
std::vector<int> img_shape; // NHWC
from_value(data["img_shape"], img_shape);
std::vector<int> ori_shape; // NHWC
from_value(data["ori_shape"], ori_shape);
auto ori_height = ori_shape[1];
auto ori_width = ori_shape[2];
auto valid_ratio = 1.f;
auto img = data["img"].get<Tensor>();
Tensor img_resize;
auto new_width = static_cast<int>(std::ceil(1.f * dst_height / ori_height * ori_width));
auto width_divisor = width_divisor_;
if (dst_min_width > 0) {
new_width = std::max(dst_min_width, new_width);
}
if (dst_max_width > 0) {
new_width = std::min(dst_max_width, new_width);
}
if (new_width % width_divisor != 0) {
new_width = std::round(1.f * new_width / width_divisor) * width_divisor;
}
OUTCOME_TRY(resize_.Apply(img, img_resize, dst_height, new_width));
data["img"] = img_resize;
data["resize_shape"] = to_value(img_resize.desc().shape);
data["pad_shape"] = data["resize_shape"];
data["ori_shape"] = data["ori_shape"];
data["scale"] = to_value(std::vector<int>({new_width, dst_height}));
data["valid_ratio"] = valid_ratio;
MMDEPLOY_DEBUG("output: {}", data);
return success();
}
protected:
operation::Managed<operation::Resize> resize_;
int height_{-1};
int min_width_{-1};
int max_width_{-1};
bool keep_aspect_ratio_{true};
int width_divisor_{1};
};
MMDEPLOY_REGISTER_TRANSFORM(RescaleToHeight);
} // namespace mmdeploy::mmocr
// Copyright (c) OpenMMLab. All rights reserved.
#include "mmdeploy/archive/value_archive.h"
#include "mmdeploy/core/registry.h"
#include "mmdeploy/core/tensor.h"
#include "mmdeploy/core/utils/formatter.h"
#include "mmdeploy/operation/managed.h"
#include "mmdeploy/operation/vision.h"
#include "mmdeploy/preprocess/transform/transform.h"
using namespace std;
namespace mmdeploy::mmocr {
class ResizeOCR : public transform::Transform {
public:
explicit ResizeOCR(const Value& args) noexcept {
height_ = args.value("height", height_);
min_width_ = args.contains("min_width") && args["min_width"].is_number_integer()
? args["min_width"].get<int>()
: min_width_;
max_width_ = args.contains("max_width") && args["max_width"].is_number_integer()
? args["max_width"].get<int>()
: max_width_;
keep_aspect_ratio_ = args.value("keep_aspect_ratio", keep_aspect_ratio_);
backend_ = args.contains("backend") && args["backend"].is_string()
? args["backend"].get<string>()
: backend_;
img_pad_value_ = args.value("img_pad_value", img_pad_value_);
width_downsample_ratio_ = args.value("width_downsample_ratio", width_downsample_ratio_);
resize_ = operation::Managed<operation::Resize>::Create("bilinear");
pad_ = operation::Managed<operation::Pad>::Create("constant", img_pad_value_);
}
~ResizeOCR() override = default;
Result<void> Apply(Value& data) override {
MMDEPLOY_DEBUG("input: {}", data);
auto dst_height = height_;
auto dst_min_width = min_width_;
auto dst_max_width = max_width_;
std::vector<int> img_shape; // NHWC
from_value(data["img_shape"], img_shape);
std::vector<int> ori_shape; // NHWC
from_value(data["ori_shape"], ori_shape);
auto ori_height = ori_shape[1];
auto ori_width = ori_shape[2];
auto valid_ratio = 1.f;
auto img = data["img"].get<Tensor>();
Tensor img_resize;
if (keep_aspect_ratio_) {
auto new_width = static_cast<int>(std::ceil(1.f * dst_height / ori_height * ori_width));
auto width_divisor = static_cast<int>(1 / width_downsample_ratio_);
if (new_width % width_divisor != 0) {
new_width = std::round(1.f * new_width / width_divisor) * width_divisor;
}
if (dst_min_width > 0) {
new_width = std::max(dst_min_width, new_width);
}
if (dst_max_width > 0) {
valid_ratio = std::min(1., 1. * new_width / dst_max_width);
auto resize_width = std::min(dst_max_width, new_width);
OUTCOME_TRY(resize_.Apply(img, img_resize, dst_height, resize_width));
if (new_width < dst_max_width) {
auto pad_w = std::max(0, dst_max_width - resize_width);
OUTCOME_TRY(pad_.Apply(img_resize, img_resize, 0, 0, 0, pad_w));
}
} else {
OUTCOME_TRY(resize_.Apply(img, img_resize, dst_height, new_width));
}
} else {
OUTCOME_TRY(resize_.Apply(img, img_resize, dst_height, dst_max_width));
}
data["img"] = img_resize;
data["resize_shape"] = to_value(img_resize.desc().shape);
data["pad_shape"] = data["resize_shape"];
data["valid_ratio"] = valid_ratio;
MMDEPLOY_DEBUG("output: {}", data);
return success();
}
protected:
operation::Managed<operation::Resize> resize_;
operation::Managed<operation::Pad> pad_;
int height_{-1};
int min_width_{-1};
int max_width_{-1};
bool keep_aspect_ratio_{true};
float img_pad_value_{0};
float width_downsample_ratio_{1. / 16};
std::string backend_;
};
MMDEPLOY_REGISTER_TRANSFORM(ResizeOCR);
} // namespace mmdeploy::mmocr
// Copyright (c) OpenMMLab. All rights reserved.
#include <set>
#include "mmdeploy/archive/value_archive.h"
#include "mmdeploy/core/registry.h"
#include "mmdeploy/core/tensor.h"
#include "mmdeploy/core/utils/formatter.h"
#include "mmdeploy/operation/managed.h"
#include "mmdeploy/operation/vision.h"
#include "mmdeploy/preprocess/transform/transform.h"
using namespace std;
namespace mmdeploy::mmocr {
class ShortScaleAspectJitter : public transform::Transform {
public:
explicit ShortScaleAspectJitter(const Value& args) noexcept {
short_size_ = args.contains("short_size") && args["short_size"].is_number_integer()
? args["short_size"].get<int>()
: short_size_;
if (args["ratio_range"].is_array() && args["ratio_range"].size() == 2) {
ratio_range_[0] = args["ratio_range"][0].get<float>();
ratio_range_[1] = args["ratio_range"][1].get<float>();
} else {
MMDEPLOY_ERROR("'ratio_range' should be a float array of size 2");
throw_exception(eInvalidArgument);
}
if (args["aspect_ratio_range"].is_array() && args["aspect_ratio_range"].size() == 2) {
aspect_ratio_range_[0] = args["aspect_ratio_range"][0].get<float>();
aspect_ratio_range_[1] = args["aspect_ratio_range"][1].get<float>();
} else {
MMDEPLOY_ERROR("'aspect_ratio_range' should be a float array of size 2");
throw_exception(eInvalidArgument);
}
scale_divisor_ = args.contains("scale_divisor") && args["scale_divisor"].is_number_integer()
? args["scale_divisor"].get<int>()
: scale_divisor_;
resize_ = operation::Managed<operation::Resize>::Create("bilinear");
}
~ShortScaleAspectJitter() override = default;
Result<void> Apply(Value& data) override {
MMDEPLOY_DEBUG("input: {}", data);
auto short_size = short_size_;
auto ratio_range = ratio_range_;
auto aspect_ratio_range = aspect_ratio_range_;
auto scale_divisor = scale_divisor_;
if (ratio_range[0] != 1.0 || ratio_range[1] != 1.0 || aspect_ratio_range[0] != 1.0 ||
aspect_ratio_range[1] != 1.0) {
MMDEPLOY_ERROR("unsupported `ratio_range` and `aspect_ratio_range`");
return Status(eNotSupported);
}
std::vector<int> img_shape; // NHWC
from_value(data["img_shape"], img_shape);
std::vector<int> ori_shape; // NHWC
from_value(data["ori_shape"], ori_shape);
auto ori_height = ori_shape[1];
auto ori_width = ori_shape[2];
auto img = data["img"].get<Tensor>();
Tensor img_resize;
auto scale = static_cast<float>(1.0 * short_size / std::min(img_shape[1], img_shape[2]));
auto dst_height = static_cast<int>(std::round(scale * img_shape[1]));
auto dst_width = static_cast<int>(std::round(scale * img_shape[2]));
dst_height = static_cast<int>(std::ceil(1.0 * dst_height / scale_divisor) * scale_divisor);
dst_width = static_cast<int>(std::ceil(1.0 * dst_width / scale_divisor) * scale_divisor);
std::vector<float> scale_factor = {(float)1.0 * dst_width / img_shape[2],
(float)1.0 * dst_height / img_shape[1]};
OUTCOME_TRY(resize_.Apply(img, img_resize, dst_height, dst_width));
data["img"] = img_resize;
data["resize_shape"] = to_value(img_resize.desc().shape);
data["scale"] = to_value(std::vector<int>({dst_width, dst_height}));
data["scale_factor"] = to_value(scale_factor);
MMDEPLOY_DEBUG("output: {}", data);
return success();
}
protected:
operation::Managed<operation::Resize> resize_;
int short_size_{736};
std::vector<float> ratio_range_{0.7, 1.3};
std::vector<float> aspect_ratio_range_{0.9, 1.1};
int scale_divisor_{1};
};
MMDEPLOY_REGISTER_TRANSFORM(ShortScaleAspectJitter);
} // namespace mmdeploy::mmocr
// Copyright (c) OpenMMLab. All rights reserved.
#include <numeric>
#include "mmdeploy/archive/value_archive.h"
#include "mmdeploy/core/module.h"
#include "mmdeploy/core/registry.h"
#include "mmdeploy/core/utils/formatter.h"
#include "mmdeploy/core/value.h"
#include "mmdeploy/experimental/module_adapter.h"
#include "opencv2/imgproc/imgproc.hpp"
#include "opencv_utils.h"
namespace mmdeploy::mmocr {
// Warp rotated rect
class WarpBbox {
public:
Result<Value> operator()(const Value& img, const Value& det) {
auto ori_img = img["ori_img"].get<framework::Mat>();
if (det.is_object() && det.contains("bbox")) {
auto bbox = from_value<std::vector<cv::Point>>(det["bbox"]);
auto patch = warp(mmdeploy::cpu::Mat2CVMat(ori_img), bbox);
return Value{{"ori_img", cpu::CVMat2Mat(patch, ori_img.pixel_format())}};
} else { // whole image as a bbox
return Value{{"ori_img", ori_img}};
}
}
// assuming rect
static cv::Mat warp(const cv::Mat& img, const std::vector<cv::Point>& _pts) {
auto pts = sort_vertex(_pts);
std::vector<cv::Point2f> src(begin(pts), end(pts));
auto e0 = norm(pts[0] - pts[1]);
auto e1 = norm(pts[1] - pts[2]);
auto w = static_cast<float>(std::max(e0, e1));
auto h = static_cast<float>(std::min(e0, e1));
std::vector<cv::Point2f> dst{{0, 0}, {w, 0}, {w, h}, {0, h}};
auto m = cv::getAffineTransform(src.data(), dst.data());
cv::Mat warped;
cv::warpAffine(img, warped, m, {static_cast<int>(w), static_cast<int>(h)});
return warped;
}
static std::vector<cv::Point> sort_vertex(std::vector<cv::Point> ps) {
auto pivot = *min_element(begin(ps), end(ps), [](auto r, auto p) {
return (r.y != p.y) ? (r.y < p.y) : (r.x < p.x);
});
// TODO: resolve tie with distance
sort(begin(ps), end(ps), [&](auto a, auto b) {
if (a == pivot) return b != pivot;
return (a - pivot).cross(b - pivot) > 0;
});
auto tl = accumulate(begin(ps) + 1, end(ps), ps[0], [](auto r, auto p) {
return cv::Point{std::min(r.x, p.x), std::min(r.y, p.y)};
});
auto cmp = [&](auto r, auto p) {
cv::Point2f u{r - tl}, v{p - tl};
return u.dot(u) < v.dot(v);
};
auto tl_idx = min_element(begin(ps), end(ps), cmp) - begin(ps);
rotate(begin(ps), begin(ps) + tl_idx, end(ps));
return ps;
}
};
MMDEPLOY_REGISTER_FACTORY_FUNC(Module, (WarpBbox, 0),
[](const Value&) { return CreateTask(WarpBbox{}); });
} // namespace mmdeploy::mmocr
# Copyright (c) OpenMMLab. All rights reserved.
project(mmdeploy_mmpose)
aux_source_directory(${CMAKE_CURRENT_SOURCE_DIR} MMPOSE_SRCS)
aux_source_directory(${CMAKE_CURRENT_SOURCE_DIR}/pose_tracker POSE_TRACKER_SRCS)
mmdeploy_add_module(${PROJECT_NAME} ${MMPOSE_SRCS} ${POSE_TRACKER_SRCS})
target_link_libraries(${PROJECT_NAME} PRIVATE
mmdeploy::transform
mmdeploy_operation
mmdeploy_opencv_utils)
target_include_directories(${PROJECT_NAME} PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}
${CMAKE_CURRENT_SOURCE_DIR}/../../apis/c)
add_library(mmdeploy::mmpose ALIAS ${PROJECT_NAME})
set(MMDEPLOY_TASKS ${MMDEPLOY_TASKS} pose_detector pose_tracker CACHE INTERNAL "")
// Copyright (c) OpenMMLab. All rights reserved.
#include <cctype>
#include <opencv2/imgproc.hpp>
#include "mmdeploy/core/device.h"
#include "mmdeploy/core/registry.h"
#include "mmdeploy/core/serialization.h"
#include "mmdeploy/core/tensor.h"
#include "mmdeploy/core/utils/device_utils.h"
#include "mmdeploy/core/utils/formatter.h"
#include "mmdeploy/core/value.h"
#include "mmdeploy/experimental/module_adapter.h"
#include "mmpose.h"
namespace mmdeploy::mmpose {
using std::string;
using std::vector;
std::string to_lower(const std::string& s) {
std::string t = s;
std::transform(t.begin(), t.end(), t.begin(), [](unsigned char c) { return std::tolower(c); });
return t;
}
class TopdownHeatmapBaseHeadDecode : public MMPose {
public:
explicit TopdownHeatmapBaseHeadDecode(const Value& config) : MMPose(config) {
if (config.contains("params")) {
auto& params = config["params"];
flip_test_ = params.value("flip_test", flip_test_);
use_udp_ = params.value("use_udp", use_udp_);
target_type_ = params.value("target_type", target_type_);
valid_radius_factor_ = params.value("valid_radius_factor", valid_radius_factor_);
unbiased_decoding_ = params.value("unbiased_decoding", unbiased_decoding_);
post_process_ = params.value("post_process", post_process_);
shift_heatmap_ = params.value("shift_heatmap", shift_heatmap_);
modulate_kernel_ = params.value("modulate_kernel", modulate_kernel_);
}
}
Result<Value> operator()(const Value& _data, const Value& _prob) {
MMDEPLOY_DEBUG("preprocess_result: {}", _data);
MMDEPLOY_DEBUG("inference_result: {}", _prob);
Device cpu_device{"cpu"};
OUTCOME_TRY(auto heatmap,
MakeAvailableOnDevice(_prob["output"].get<Tensor>(), cpu_device, stream()));
OUTCOME_TRY(stream().Wait());
if (!(heatmap.shape().size() == 4 && heatmap.data_type() == DataType::kFLOAT)) {
MMDEPLOY_ERROR("unsupported `output` tensor, shape: {}, dtype: {}", heatmap.shape(),
(int)heatmap.data_type());
return Status(eNotSupported);
}
auto& img_metas = _data["img_metas"];
vector<float> center;
vector<float> scale;
from_value(img_metas["center"], center);
from_value(img_metas["scale"], scale);
Tensor pred =
keypoints_from_heatmap(heatmap, center, scale, unbiased_decoding_, post_process_,
modulate_kernel_, valid_radius_factor_, use_udp_, target_type_);
return GetOutput(pred);
}
Value GetOutput(Tensor& pred) {
PoseDetectorOutput output;
int K = pred.shape(1);
float* data = pred.data<float>();
for (int i = 0; i < K; i++) {
float x = *(data + 0);
float y = *(data + 1);
float s = *(data + 2);
output.key_points.push_back({{x, y}, s});
data += 3;
}
return to_value(std::move(output));
}
Tensor keypoints_from_heatmap(Tensor& heatmap, const vector<float>& center,
const vector<float>& scale, bool unbiased_decoding,
const string& post_process, int modulate_kernel,
float valid_radius_factor, bool use_udp,
const string& target_type) {
int K = heatmap.shape(1);
int H = heatmap.shape(2);
int W = heatmap.shape(3);
if (post_process == "megvii") {
heatmap = gaussian_blur(heatmap, modulate_kernel);
}
Tensor pred;
if (use_udp) {
if (to_lower(target_type) == to_lower(string("GaussianHeatMap"))) {
pred = get_max_pred(heatmap);
post_dark_udp(pred, heatmap, modulate_kernel);
} else if (to_lower(target_type) == to_lower(string("CombinedTarget"))) {
// output channel = 3 * channel_cfg['num_output_channels']
assert(K % 3 == 0);
for (int i = 0; i < K; i++) {
int kt = (i % 3 == 0) ? 2 * modulate_kernel + 1 : modulate_kernel;
float* data = heatmap.data<float>() + i * H * W;
cv::Mat work = cv::Mat(H, W, CV_32FC(1), data);
cv::GaussianBlur(work, work, {kt, kt}, 0); // inplace
}
float valid_radius = valid_radius_factor_ * H;
TensorDesc desc = {Device{"cpu"}, DataType::kFLOAT, {1, K / 3, H, W}};
Tensor offset_x(desc);
Tensor offset_y(desc);
Tensor heatmap_(desc);
{
// split heatmap
float* src = heatmap.data<float>();
float* dst0 = heatmap_.data<float>();
float* dst1 = offset_x.data<float>();
float* dst2 = offset_y.data<float>();
for (int i = 0; i < K / 3; i++) {
std::copy_n(src, H * W, dst0);
std::transform(src + H * W, src + 2 * H * W, dst1,
[=](float& x) { return x * valid_radius; });
std::transform(src + 2 * H * W, src + 3 * H * W, dst2,
[=](float& x) { return x * valid_radius; });
src += 3 * H * W;
dst0 += H * W;
dst1 += H * W;
dst2 += H * W;
}
}
pred = get_max_pred(heatmap_);
for (int i = 0; i < K / 3; i++) {
float* data = pred.data<float>() + i * 3;
int index = *(data + 0) + *(data + 1) * W + H * W * i;
float* offx = offset_x.data<float>() + index;
float* offy = offset_y.data<float>() + index;
*(data + 0) += *offx;
*(data + 1) += *offy;
}
}
} else {
pred = get_max_pred(heatmap);
if (post_process == "unbiased") {
heatmap = gaussian_blur(heatmap, modulate_kernel);
float* data = heatmap.data<float>();
std::for_each(data, data + K * H * W, [](float& v) {
double _v = std::max((double)v, 1e-10);
v = std::log(_v);
});
for (int i = 0; i < K; i++) {
taylor(heatmap, pred, i);
}
} else if (post_process != "null") {
for (int i = 0; i < K; i++) {
float* data = heatmap.data<float>() + i * W * H;
auto _data = [&](int y, int x) { return *(data + y * W + x); };
int px = *(pred.data<float>() + i * 3 + 0);
int py = *(pred.data<float>() + i * 3 + 1);
if (1 < px && px < W - 1 && 1 < py && py < H - 1) {
float v1 = _data(py, px + 1) - _data(py, px - 1);
float v2 = _data(py + 1, px) - _data(py - 1, px);
*(pred.data<float>() + i * 3 + 0) += (v1 > 0) ? 0.25 : ((v1 < 0) ? -0.25 : 0);
*(pred.data<float>() + i * 3 + 1) += (v2 > 0) ? 0.25 : ((v2 < 0) ? -0.25 : 0);
if (post_process_ == "megvii") {
*(pred.data<float>() + i * 3 + 0) += 0.5;
*(pred.data<float>() + i * 3 + 1) += 0.5;
}
}
}
}
}
K = pred.shape(1); // changed if target_type is CombinedTarget
// Transform back to the image
for (int i = 0; i < K; i++) {
transform_pred(pred, i, center, scale, {W, H}, use_udp);
}
if (post_process_ == "megvii") {
for (int i = 0; i < K; i++) {
float* data = pred.data<float>() + i * 3 + 2;
*data = *data / 255.0 + 0.5;
}
}
return pred;
}
void post_dark_udp(Tensor& pred, Tensor& heatmap, int kernel) {
int K = heatmap.shape(1);
int H = heatmap.shape(2);
int W = heatmap.shape(3);
for (int i = 0; i < K; i++) {
float* data = heatmap.data<float>() + i * H * W;
cv::Mat work = cv::Mat(H, W, CV_32FC(1), data);
cv::GaussianBlur(work, work, {kernel, kernel}, 0); // inplace
}
std::for_each(heatmap.data<float>(), heatmap.data<float>() + K * H * W, [](float& x) {
x = std::max(0.001f, std::min(50.f, x));
x = std::log(x);
});
auto _heatmap_data = [&](int index, int c) -> float {
int y = index / (W + 2);
int x = index % (W + 2);
y = std::max(0, y - 1);
x = std::max(0, x - 1);
return *(heatmap.data<float>() + c * H * W + y * W + x);
};
for (int i = 0; i < K; i++) {
float* data = pred.data<float>() + i * 3;
int index = *(data + 0) + 1 + (*(data + 1) + 1) * (W + 2);
float i_ = _heatmap_data(index, i);
float ix1 = _heatmap_data(index + 1, i);
float iy1 = _heatmap_data(index + W + 2, i);
float ix1y1 = _heatmap_data(index + W + 3, i);
float ix1_y1_ = _heatmap_data(index - W - 3, i);
float ix1_ = _heatmap_data(index - 1, i);
float iy1_ = _heatmap_data(index - 2 - W, i);
float dx = 0.5 * (ix1 - ix1_);
float dy = 0.5 * (iy1 - iy1_);
float dxx = ix1 - 2 * i_ + ix1_;
float dyy = iy1 - 2 * i_ + iy1_;
float dxy = 0.5 * (ix1y1 - ix1 - iy1 + i_ + i_ - ix1_ - iy1_ + ix1_y1_);
vector<float> _data0 = {dx, dy};
vector<float> _data1 = {dxx, dxy, dxy, dyy};
cv::Mat derivative = cv::Mat(2, 1, CV_32FC1, _data0.data());
cv::Mat hessian = cv::Mat(2, 2, CV_32FC1, _data1.data());
cv::Mat hessianinv = hessian.inv();
cv::Mat offset = -hessianinv * derivative;
*(data + 0) += offset.at<float>(0, 0);
*(data + 1) += offset.at<float>(1, 0);
}
}
void transform_pred(Tensor& pred, int k, const vector<float>& center, const vector<float>& _scale,
const vector<int>& output_size, bool use_udp = false) {
auto scale = _scale;
scale[0] *= 200;
scale[1] *= 200;
float scale_x, scale_y;
if (use_udp) {
scale_x = scale[0] / (output_size[0] - 1.0);
scale_y = scale[1] / (output_size[1] - 1.0);
} else {
scale_x = scale[0] / output_size[0];
scale_y = scale[1] / output_size[1];
}
float* data = pred.data<float>() + k * 3;
*(data + 0) = *(data + 0) * scale_x + center[0] - scale[0] * 0.5;
*(data + 1) = *(data + 1) * scale_y + center[1] - scale[1] * 0.5;
}
void taylor(const Tensor& heatmap, Tensor& pred, int k) {
int K = heatmap.shape(1);
int H = heatmap.shape(2);
int W = heatmap.shape(3);
int px = *(pred.data<float>() + k * 3 + 0);
int py = *(pred.data<float>() + k * 3 + 1);
if (1 < px && px < W - 2 && 1 < py && py < H - 2) {
float* data = const_cast<float*>(heatmap.data<float>() + k * H * W);
auto get_data = [&](int r, int c) { return *(data + r * W + c); };
float dx = 0.5 * (get_data(py, px + 1) - get_data(py, px - 1));
float dy = 0.5 * (get_data(py + 1, px) - get_data(py - 1, px));
float dxx = 0.25 * (get_data(py, px + 2) - 2 * get_data(py, px) + get_data(py, px - 2));
float dxy = 0.25 * (get_data(py + 1, px + 1) - get_data(py - 1, px + 1) -
get_data(py + 1, px - 1) + get_data(py - 1, px - 1));
float dyy = 0.25 * (get_data(py + 2, px) - 2 * get_data(py, px) + get_data(py - 2, px));
vector<float> _data0 = {dx, dy};
vector<float> _data1 = {dxx, dxy, dxy, dyy};
cv::Mat derivative = cv::Mat(2, 1, CV_32FC1, _data0.data());
cv::Mat hessian = cv::Mat(2, 2, CV_32FC1, _data1.data());
if (std::fabs(dxx * dyy - dxy * dxy) > 1e-6) {
cv::Mat hessianinv = hessian.inv();
cv::Mat offset = -hessianinv * derivative;
*(pred.data<float>() + k * 3 + 0) += offset.at<float>(0, 0);
*(pred.data<float>() + k * 3 + 1) += offset.at<float>(1, 0);
}
}
}
Tensor gaussian_blur(const Tensor& _heatmap, int kernel) {
assert(kernel % 2 == 1);
auto desc = _heatmap.desc();
Tensor heatmap(desc);
int K = _heatmap.shape(1);
int H = _heatmap.shape(2);
int W = _heatmap.shape(3);
int num_points = H * W;
int border = (kernel - 1) / 2;
for (int i = 0; i < K; i++) {
int offset = i * H * W;
float* data = const_cast<float*>(_heatmap.data<float>()) + offset;
float origin_max = *std::max_element(data, data + num_points);
cv::Mat work = cv::Mat(H + 2 * border, W + 2 * border, CV_32FC1, cv::Scalar{});
cv::Mat curr = cv::Mat(H, W, CV_32FC1, data);
cv::Rect roi = {border, border, W, H};
curr.copyTo(work(roi));
cv::GaussianBlur(work, work, {kernel, kernel}, 0);
cv::Mat valid = work(roi).clone();
float cur_max = *std::max_element((float*)valid.data, (float*)valid.data + num_points);
float* dst = heatmap.data<float>() + offset;
std::transform((float*)valid.data, (float*)valid.data + num_points, dst,
[&](float v) { return v * origin_max / cur_max; });
}
return heatmap;
}
Tensor get_max_pred(const Tensor& heatmap) {
int K = heatmap.shape(1);
int H = heatmap.shape(2);
int W = heatmap.shape(3);
int num_points = H * W;
TensorDesc pred_desc = {Device{"cpu"}, DataType::kFLOAT, {1, K, 3}};
Tensor pred(pred_desc);
for (int i = 0; i < K; i++) {
float* src_data = const_cast<float*>(heatmap.data<float>()) + i * H * W;
cv::Mat mat = cv::Mat(H, W, CV_32FC1, src_data);
double min_val, max_val;
cv::Point min_loc, max_loc;
cv::minMaxLoc(mat, &min_val, &max_val, &min_loc, &max_loc);
float* dst_data = pred.data<float>() + i * 3;
*(dst_data + 0) = -1;
*(dst_data + 1) = -1;
*(dst_data + 2) = max_val;
if (max_val > 0.0) {
*(dst_data + 0) = max_loc.x;
*(dst_data + 1) = max_loc.y;
}
}
return pred;
}
private:
bool flip_test_{true};
bool shift_heatmap_{true};
string post_process_ = {"default"};
int modulate_kernel_{11};
bool unbiased_decoding_{false};
float valid_radius_factor_{0.0546875f};
bool use_udp_{false};
string target_type_{"GaussianHeatmap"};
};
MMDEPLOY_REGISTER_CODEBASE_COMPONENT(MMPose, TopdownHeatmapBaseHeadDecode);
// decode process is same
using TopdownHeatmapSimpleHeadDecode = TopdownHeatmapBaseHeadDecode;
MMDEPLOY_REGISTER_CODEBASE_COMPONENT(MMPose, TopdownHeatmapSimpleHeadDecode);
using TopdownHeatmapMultiStageHeadDecode = TopdownHeatmapBaseHeadDecode;
MMDEPLOY_REGISTER_CODEBASE_COMPONENT(MMPose, TopdownHeatmapMultiStageHeadDecode);
using ViPNASHeatmapSimpleHeadDecode = TopdownHeatmapBaseHeadDecode;
MMDEPLOY_REGISTER_CODEBASE_COMPONENT(MMPose, ViPNASHeatmapSimpleHeadDecode);
using TopdownHeatmapMSMUHeadDecode = TopdownHeatmapBaseHeadDecode;
MMDEPLOY_REGISTER_CODEBASE_COMPONENT(MMPose, TopdownHeatmapMSMUHeadDecode);
} // namespace mmdeploy::mmpose
// Copyright (c) OpenMMLab. All rights reserved.
#include <opencv2/imgproc.hpp>
#include "mmdeploy/core/device.h"
#include "mmdeploy/core/registry.h"
#include "mmdeploy/core/serialization.h"
#include "mmdeploy/core/tensor.h"
#include "mmdeploy/core/utils/device_utils.h"
#include "mmdeploy/core/utils/formatter.h"
#include "mmdeploy/core/value.h"
#include "mmdeploy/experimental/module_adapter.h"
#include "mmpose.h"
namespace mmdeploy::mmpose {
using std::string;
using std::vector;
class DeepposeRegressionHeadDecode : public MMPose {
public:
explicit DeepposeRegressionHeadDecode(const Value& config) : MMPose(config) {}
Result<Value> operator()(const Value& _data, const Value& _prob) {
MMDEPLOY_DEBUG("preprocess_result: {}", _data);
MMDEPLOY_DEBUG("inference_result: {}", _prob);
Device cpu_device{"cpu"};
OUTCOME_TRY(auto output,
MakeAvailableOnDevice(_prob["output"].get<Tensor>(), cpu_device, stream()));
OUTCOME_TRY(stream().Wait());
if (!(output.shape().size() == 3 && output.data_type() == DataType::kFLOAT)) {
MMDEPLOY_ERROR("unsupported `output` tensor, shape: {}, dtype: {}", output.shape(),
(int)output.data_type());
return Status(eNotSupported);
}
auto& img_metas = _data["img_metas"];
vector<float> center;
vector<float> scale;
from_value(img_metas["center"], center);
from_value(img_metas["scale"], scale);
vector<int> img_size = {img_metas["img_shape"][2].get<int>(),
img_metas["img_shape"][1].get<int>()};
Tensor pred = keypoints_from_regression(output, center, scale, img_size);
return GetOutput(pred);
}
Value GetOutput(Tensor& pred) {
PoseDetectorOutput output;
int K = pred.shape(1);
float* data = pred.data<float>();
for (int i = 0; i < K; i++) {
float x = *(data + 0);
float y = *(data + 1);
float s = *(data + 2);
output.key_points.push_back({{x, y}, s});
data += 3;
}
return to_value(std::move(output));
}
Tensor keypoints_from_regression(const Tensor& output, const vector<float>& center,
const vector<float>& scale, const vector<int>& img_size) {
int K = output.shape(1);
TensorDesc pred_desc = {Device{"cpu"}, DataType::kFLOAT, {1, K, 3}};
Tensor pred(pred_desc);
float* src = const_cast<float*>(output.data<float>());
float* dst = pred.data<float>();
for (int i = 0; i < K; i++) {
*(dst + 0) = *(src + 0) * img_size[0];
*(dst + 1) = *(src + 1) * img_size[1];
*(dst + 2) = 1.f;
src += 2;
dst += 3;
}
// Transform back to the image
for (int i = 0; i < K; i++) {
transform_pred(pred, i, center, scale, img_size, false);
}
return pred;
}
void transform_pred(Tensor& pred, int k, const vector<float>& center, const vector<float>& _scale,
const vector<int>& output_size, bool use_udp = false) {
auto scale = _scale;
scale[0] *= 200;
scale[1] *= 200;
float scale_x, scale_y;
if (use_udp) {
scale_x = scale[0] / (output_size[0] - 1.0);
scale_y = scale[1] / (output_size[1] - 1.0);
} else {
scale_x = scale[0] / output_size[0];
scale_y = scale[1] / output_size[1];
}
float* data = pred.data<float>() + k * 3;
*(data + 0) = *(data + 0) * scale_x + center[0] - scale[0] * 0.5;
*(data + 1) = *(data + 1) * scale_y + center[1] - scale[1] * 0.5;
}
};
MMDEPLOY_REGISTER_CODEBASE_COMPONENT(MMPose, DeepposeRegressionHeadDecode);
} // namespace mmdeploy::mmpose
// Copyright (c) OpenMMLab. All rights reserved.
#include "mmdeploy/codebase/mmpose/mmpose.h"
namespace mmdeploy::mmpose {
MMDEPLOY_REGISTER_CODEBASE(MMPose);
} // namespace mmdeploy::mmpose
// Copyright (c) OpenMMLab. All rights reserved.
#ifndef MMDEPLOY_MMPOSE_H
#define MMDEPLOY_MMPOSE_H
#include <array>
#include "mmdeploy/codebase/common.h"
#include "mmdeploy/core/device.h"
#include "mmdeploy/core/module.h"
namespace mmdeploy::mmpose {
struct PoseDetectorOutput {
struct KeyPoint {
std::array<float, 2> bbox; // x, y
float score;
MMDEPLOY_ARCHIVE_MEMBERS(bbox, score);
};
std::vector<KeyPoint> key_points;
MMDEPLOY_ARCHIVE_MEMBERS(key_points);
};
MMDEPLOY_DECLARE_CODEBASE(MMPose, mmpose);
} // namespace mmdeploy::mmpose
#endif // MMDEPLOY_MMPOSE_H
// Copyright (c) OpenMMLab. All rights reserved.
#ifndef MMDEPLOY_CODEBASE_MMPOSE_POSE_TRACKER_COMMON_H
#define MMDEPLOY_CODEBASE_MMPOSE_POSE_TRACKER_COMMON_H
#include <array>
#include <vector>
#include "mmdeploy/core/mpl/type_traits.h"
#include "mmdeploy/pose_tracker.h"
namespace mmdeploy::mmpose::_pose_tracker {
struct TrackerResult {
std::vector<std::vector<mmdeploy_point_t>> keypoints;
std::vector<std::vector<float>> scores;
std::vector<mmdeploy_rect_t> bboxes;
std::vector<uint32_t> track_ids;
// debug info
std::vector<std::array<float, 4>> pose_input_bboxes;
std::vector<std::array<float, 4>> pose_output_bboxes;
};
inline void SetDefaultParams(mmdeploy_pose_tracker_param_t& p) {
p.det_interval = 1;
p.det_label = 0;
p.det_min_bbox_size = -1;
p.det_thr = .5f;
p.det_nms_thr = .7f;
p.pose_max_num_bboxes = -1;
p.pose_min_keypoints = -1;
p.pose_min_bbox_size = 0;
p.pose_kpt_thr = .5f;
p.pose_nms_thr = .5f;
p.keypoint_sigmas = nullptr;
p.keypoint_sigmas_size = 0;
p.track_iou_thr = .4f;
p.pose_bbox_scale = 1.25f;
p.track_max_missing = 10;
p.track_history_size = 1;
p.std_weight_position = 1 / 20.f;
p.std_weight_velocity = 1 / 160.f;
(std::array<float, 3>&)p.smooth_params = {0.007, 1., 1.};
}
} // namespace mmdeploy::mmpose::_pose_tracker
namespace mmdeploy {
MMDEPLOY_REGISTER_TYPE_ID(mmdeploy_pose_tracker_param_t*, 0x32bc6919d5287035);
MMDEPLOY_REGISTER_TYPE_ID(mmpose::_pose_tracker::TrackerResult, 0xacb6ddb7dc1ffbca);
} // namespace mmdeploy
#endif // MMDEPLOY_COMMON_H
// Copyright (c) OpenMMLab. All rights reserved.
#include "mmdeploy/archive/value_archive.h"
#include "mmdeploy/core/mat.h"
#include "mmdeploy/core/module.h"
#include "mmdeploy/experimental/module_adapter.h"
#include "pose_tracker/common.h"
#include "pose_tracker/pose_tracker.h"
namespace mmdeploy {
MMDEPLOY_REGISTER_TYPE_ID(mmpose::_pose_tracker::Tracker, 0xcfe87980aa895d3a);
}
namespace mmdeploy::mmpose::_pose_tracker {
#define REGISTER_SIMPLE_MODULE(name, fn) \
MMDEPLOY_REGISTER_FACTORY_FUNC(Module, (name, 0), [](const Value&) { return CreateTask(fn); });
Value Prepare(const Value& data, const Value& use_det, Value state) {
auto& tracker = state.get_ref<Tracker&>();
// set frame size for the first frame
if (tracker.frame_id() == 0) {
auto& frame = data["ori_img"].get_ref<const framework::Mat&>();
tracker.SetFrameSize(frame.height(), frame.width());
}
// use_det is set to non-auto mode
if (use_det.get<int>() != -1) {
return use_det;
}
auto interval = tracker.params().det_interval;
return interval > 0 && tracker.frame_id() % interval == 0;
}
REGISTER_SIMPLE_MODULE(pose_tracker::Prepare, Prepare);
std::tuple<Value, Value> ProcessBboxes(const Value& det_val, const Value& data,
Value state) noexcept {
auto& tracker = state.get_ref<Tracker&>();
std::optional<Tracker::Detections> dets;
if (det_val.is_array()) { // has detections
auto& [bboxes, scores, labels] = dets.emplace();
for (const auto& det : det_val.array()) {
bboxes.push_back(from_value<Bbox>(det["bbox"]));
scores.push_back(det["score"].get<float>());
labels.push_back(det["label_id"].get<int>());
}
}
auto [bboxes, ids] = tracker.ProcessBboxes(dets);
Value::Array bbox_array;
Value track_ids_array;
// attach bboxes to image data
for (auto& bbox : bboxes) {
cv::Rect rect(cv::Rect2f(cv::Point2f(bbox[0], bbox[1]), cv::Point2f(bbox[2], bbox[3])));
bbox_array.push_back({
{"img", data["img"]}, // img
{"bbox", {rect.x, rect.y, rect.width, rect.height}}, // bbox
});
}
track_ids_array = to_value(ids);
return {std::move(bbox_array), std::move(track_ids_array)};
}
REGISTER_SIMPLE_MODULE(pose_tracker::ProcessBboxes, ProcessBboxes);
Value TrackStep(const Value& poses, const Value& track_indices, Value state) noexcept {
assert(poses.is_array());
vector<Points> keypoints;
vector<Scores> scores;
for (auto& output : poses.array()) {
auto& k = keypoints.emplace_back();
auto& s = scores.emplace_back();
float avg = 0.f;
for (auto& kpt : output["key_points"].array()) {
k.emplace_back(kpt["bbox"][0].get<float>(), kpt["bbox"][1].get<float>());
s.push_back(kpt["score"].get<float>());
avg += s.back();
}
}
vector<int64_t> track_ids;
from_value(track_indices, track_ids);
auto& tracker = state.get_ref<Tracker&>();
tracker.TrackStep(keypoints, scores, track_ids);
TrackerResult result;
for (const auto& track : tracker.tracks()) {
if (track->missing()) {
continue;
}
vector<mmdeploy_point_t> kpts;
kpts.reserve(track->smoothed_kpts().size());
for (const auto& kpt : track->smoothed_kpts()) {
kpts.push_back({kpt.x, kpt.y});
}
result.keypoints.push_back(std::move(kpts));
result.scores.push_back(track->scores());
auto& bbox = track->smoothed_bbox();
result.bboxes.push_back({bbox[0], bbox[1], bbox[2], bbox[3]});
result.track_ids.push_back(track->track_id());
}
result.pose_input_bboxes = tracker.pose_input_bboxes();
result.pose_output_bboxes = tracker.pose_output_bboxes();
return result;
}
REGISTER_SIMPLE_MODULE(pose_tracker::TrackStep, TrackStep);
// MSVC toolset v143 keeps ICEing when using a lambda here
static Value CreateTracker(mmdeploy_pose_tracker_param_t* param) {
return make_pointer(Tracker{*param});
}
REGISTER_SIMPLE_MODULE(pose_tracker::Create, CreateTracker);
} // namespace mmdeploy::mmpose::_pose_tracker
// Copyright (c) OpenMMLab. All rights reserved.
#include "pose_tracker/pose_tracker.h"
#include <array>
#include <cmath>
#include <numeric>
#include "mmdeploy/core/utils/formatter.h"
#include "pose_tracker/utils.h"
namespace mmdeploy::mmpose::_pose_tracker {
Tracker::Tracker(const mmdeploy_pose_tracker_param_t& _params) : params_(_params) {
if (params_.keypoint_sigmas && params_.keypoint_sigmas_size) {
std::copy_n(params_.keypoint_sigmas, params_.keypoint_sigmas_size,
std::back_inserter(key_point_sigmas_));
params_.keypoint_sigmas = key_point_sigmas_.data();
}
}
void Tracker::SuppressOverlappingBoxes(const vector<Bbox>& bboxes,
vector<std::pair<int, float>>& ranks,
vector<int>& is_valid_bbox) const {
vector<float> iou(ranks.size() * ranks.size());
for (int i = 0; i < bboxes.size(); ++i) {
for (int j = 0; j < i; ++j) {
iou[i * bboxes.size() + j] = iou[j * bboxes.size() + i] =
intersection_over_union(bboxes[i], bboxes[j]);
}
}
suppress_non_maximum(ranks, iou, is_valid_bbox, params_.det_nms_thr);
}
void Tracker::SuppressOverlappingPoses(const vector<Points>& keypoints,
const vector<Scores>& scores, const vector<Bbox>& bboxes,
const vector<int64_t>& track_ids, vector<int>& is_valid,
float oks_thr) {
assert(keypoints.size() == is_valid.size());
assert(scores.size() == is_valid.size());
assert(bboxes.size() == is_valid.size());
const auto size = is_valid.size();
vector<float> similarity(size * size);
for (int i = 0; i < size; ++i) {
if (is_valid[i]) {
for (int j = 0; j < i; ++j) {
if (is_valid[j]) {
similarity[i * size + j] = similarity[j * size + i] =
GetPosePoseSimilarity(bboxes[i], keypoints[i], bboxes[j], keypoints[j]);
}
}
}
}
vector<std::pair<bool, float>> ranks;
ranks.reserve(size);
for (int i = 0; i < size; ++i) {
bool is_visible = false;
for (const auto& track : tracks_) {
if (track->track_id() == track_ids[i]) {
is_visible = track->missing() == 0;
break;
}
}
auto score = std::accumulate(scores[i].begin(), scores[i].end(), 0.f);
// prevents bboxes from missing tracks to suppress visible tracks
ranks.emplace_back(is_visible, score);
}
suppress_non_maximum(ranks, similarity, is_valid, oks_thr);
}
std::tuple<vector<Bbox>, vector<int64_t>> Tracker::ProcessBboxes(
const std::optional<Detections>& dets) {
vector<Bbox> bboxes;
vector<int64_t> prev_ids;
// 2 - visible tracks
// 1 - detection
// 0 - missing tracks
vector<int> types;
GetDetectedObjects(dets, bboxes, prev_ids, types);
GetTrackedObjects(bboxes, prev_ids, types);
vector<int> is_valid_bboxes(bboxes.size(), 1);
auto count = [&] {
std::array<int, 3> acc{};
for (size_t i = 0; i < is_valid_bboxes.size(); ++i) {
if (is_valid_bboxes[i]) {
++acc[types[i]];
}
}
return acc;
};
POSE_TRACKER_DEBUG("frame {}, bboxes {}", frame_id_, count());
vector<std::pair<int, float>> ranks;
ranks.reserve(bboxes.size());
for (int i = 0; i < bboxes.size(); ++i) {
ranks.emplace_back(types[i], get_area(bboxes[i]));
}
SuppressOverlappingBoxes(bboxes, ranks, is_valid_bboxes);
POSE_TRACKER_DEBUG("frame {}, bboxes after nms: {}", frame_id_, count());
vector<int> idxs;
idxs.reserve(bboxes.size());
for (int i = 0; i < bboxes.size(); ++i) {
if (is_valid_bboxes[i]) {
idxs.push_back(i);
}
}
std::stable_sort(idxs.begin(), idxs.end(), [&](int i, int j) { return ranks[i] > ranks[j]; });
std::fill(is_valid_bboxes.begin(), is_valid_bboxes.end(), 0);
{
vector<Bbox> tmp_bboxes;
vector<int64_t> tmp_track_ids;
for (const auto& i : idxs) {
if (tmp_bboxes.size() >= params_.pose_max_num_bboxes) {
break;
}
tmp_bboxes.push_back(bboxes[i]);
tmp_track_ids.push_back(prev_ids[i]);
is_valid_bboxes[i] = 1;
}
bboxes = std::move(tmp_bboxes);
prev_ids = std::move(tmp_track_ids);
}
pose_input_bboxes_ = bboxes;
POSE_TRACKER_DEBUG("frame {}, bboxes after sort: {}", frame_id_, count());
return {bboxes, prev_ids};
}
void Tracker::TrackStep(vector<Points>& keypoints, vector<Scores>& scores,
const vector<int64_t>& prev_ids) noexcept {
vector<Bbox> bboxes(keypoints.size());
vector<int> is_unused_bbox(keypoints.size(), 1);
// key-points to bboxes
for (size_t i = 0; i < keypoints.size(); ++i) {
if (auto bbox = KeypointsToBbox(keypoints[i], scores[i])) {
bboxes[i] = *bbox;
} else {
is_unused_bbox[i] = false;
}
}
pose_output_bboxes_ = bboxes;
SuppressOverlappingPoses(keypoints, scores, bboxes, prev_ids, is_unused_bbox,
params_.pose_nms_thr);
assert(is_unused_bbox.size() == bboxes.size());
vector<float> similarity0; // average mahalanobis dist - proportion of tracked key-points
vector<float> similarity1; // iou
vector<vector<bool>> gating; // key-point gating result
GetSimilarityMatrices(bboxes, keypoints, prev_ids, similarity0, similarity1, gating);
vector<int> is_unused_track(tracks_.size(), 1);
// disable missing tracks in the #1 assignment
for (int i = 0; i < tracks_.size(); ++i) {
if (tracks_[i]->missing()) {
is_unused_track[i] = 0;
}
}
const auto assignment_visible =
greedy_assignment(similarity0, is_unused_bbox, is_unused_track, -kInf / 10);
POSE_TRACKER_DEBUG("frame {}, assignment for visible {}", frame_id_, assignment_visible);
// enable missing tracks in the #2 assignment
for (int i = 0; i < tracks_.size(); ++i) {
if (tracks_[i]->missing()) {
is_unused_track[i] = 1;
}
}
const auto assignment_missing =
greedy_assignment(similarity1, is_unused_bbox, is_unused_track, params_.track_iou_thr);
POSE_TRACKER_DEBUG("frame {}, assignment for missing {}", frame_id_, assignment_missing);
// update assigned tracks
for (auto [i, j, _] : assignment_visible) {
tracks_[j]->UpdateVisible(bboxes[i], keypoints[i], scores[i], gating[i * tracks_.size() + j]);
}
// update recovered tracks
for (auto [i, j, _] : assignment_missing) {
tracks_[j]->UpdateRecovered(bboxes[i], keypoints[i], scores[i]);
}
// generating new tracks from detected bboxes
for (size_t i = 0; i < is_unused_bbox.size(); ++i) {
if (is_unused_bbox[i] && prev_ids[i] == -1) {
CreateTrack(bboxes[i], keypoints[i], scores[i]);
}
}
// update missing tracks
for (size_t i = 0; i < is_unused_track.size(); ++i) {
if (is_unused_track[i]) {
tracks_[i]->UpdateMissing();
}
}
// diagnostic for missing tracks
DiagnosticMissingTracks(is_unused_track, is_unused_bbox, similarity0, similarity1);
RemoveMissingTracks();
for (auto& track : tracks_) {
track->Predict();
}
++frame_id_;
// print track summary
// SummaryTracks();
}
void Tracker::GetTrackedObjects(vector<Bbox>& bboxes, vector<int64_t>& track_ids,
vector<int>& types) const {
for (auto& track : tracks_) {
std::optional<Bbox> bbox;
if (track->missing()) {
bbox = track->predicted_bbox();
} else {
bbox = keypoints_to_bbox(track->predicted_kpts(), track->scores(), frame_h_, frame_w_,
params_.pose_bbox_scale, params_.pose_kpt_thr,
params_.pose_min_keypoints);
}
if (bbox) {
auto& b = *bbox;
cv::Rect_<float> img_rect(0, 0, frame_w_, frame_h_);
cv::Rect_<float> box_rect(b[0], b[1], b[2] - b[0], b[3] - b[1]);
auto roi = img_rect & box_rect;
if (roi.area() > 0 && get_area(b) > params_.pose_min_bbox_size * params_.pose_min_bbox_size) {
bboxes.push_back(*bbox);
track_ids.push_back(track->track_id());
types.push_back(track->missing() ? 0 : 2);
}
}
}
}
void Tracker::GetDetectedObjects(const std::optional<Detections>& dets, vector<Bbox>& _bboxes,
vector<int64_t>& track_ids, vector<int>& types) const {
if (dets) {
auto& [bboxes, scores, labels] = *dets;
for (size_t i = 0; i < bboxes.size(); ++i) {
if (labels[i] == params_.det_label && scores[i] > params_.det_thr &&
get_area(bboxes[i]) >= params_.det_min_bbox_size * params_.det_min_bbox_size) {
_bboxes.push_back(bboxes[i]);
track_ids.push_back(-1);
types.push_back(1);
}
}
}
}
std::tuple<float, float, vector<bool>> Tracker::GetTrackPoseSimilarity(Track& track,
const Bbox& bbox,
const Points& kpts) const {
static constexpr const std::array chi2inv95{0.f, 3.8415f, 5.9915f, 7.8147f, 9.4877f,
11.070f, 12.592f, 14.067f, 15.507f, 16.919f};
auto dists = track.KeyPointDistance(kpts);
vector<bool> gating;
gating.reserve(dists.size());
float dist = 0.f;
int count = 0;
for (const auto& d : dists) {
if (d < chi2inv95[2]) {
dist += d;
++count;
gating.push_back(true);
} else {
gating.push_back(false);
}
}
auto count_thr =
params_.pose_min_keypoints >= 0 ? params_.pose_min_keypoints : (dists.size() + 1) / 2;
if (count >= count_thr) {
auto fcount = static_cast<float>(count);
dist = dist / fcount - fcount / static_cast<float>(dists.size());
} else {
dist = kInf;
}
auto iou = intersection_over_union(track.predicted_bbox(), bbox);
if (key_point_sigmas_.empty()) {
return {dist, iou, gating};
}
return {dist, iou, gating};
}
void Tracker::GetSimilarityMatrices(const vector<Bbox>& bboxes, const vector<Points>& keypoints,
const vector<int64_t>& prev_ids, vector<float>& similarity0,
vector<float>& similarity1, vector<vector<bool>>& gating) {
const auto n_rows = static_cast<int>(bboxes.size());
const auto n_cols = static_cast<int>(tracks_.size());
// generate similarity matrix
similarity0 = vector<float>(n_rows * n_cols, -kInf);
similarity1 = vector<float>(n_rows * n_cols, -kInf);
gating = vector<vector<bool>>(n_rows * n_cols);
for (size_t i = 0; i < n_rows; ++i) {
const auto& bbox = bboxes[i];
const auto& kpts = keypoints[i];
for (size_t j = 0; j < n_cols; ++j) {
auto& track = *tracks_[j];
if (prev_ids[i] != -1 && prev_ids[i] != track.track_id()) {
continue;
}
const auto index = i * n_cols + j;
auto&& [dist, iou, gate] = GetTrackPoseSimilarity(track, bbox, kpts);
similarity0[index] = -dist;
similarity1[index] = iou;
gating.push_back(std::move(gate));
}
}
}
float Tracker::GetPosePoseSimilarity(const Bbox& bbox0, const Points& kpts0, const Bbox& bbox1,
const Points& kpts1) {
if (key_point_sigmas_.empty()) {
return intersection_over_union(bbox0, bbox1);
}
// symmetric
return object_keypoint_similarity(kpts0, bbox0, kpts1, bbox1, key_point_sigmas_);
}
void Tracker::CreateTrack(const Bbox& bbox, const Points& kpts, const Scores& scores) {
*tracks_.emplace_back(std::make_unique<Track>(&params_, bbox, kpts, scores, next_id_++));
}
std::optional<Bbox> Tracker::KeypointsToBbox(const Points& kpts, const Scores& scores) const {
return keypoints_to_bbox(kpts, scores, frame_h_, frame_w_, params_.pose_bbox_scale,
params_.pose_kpt_thr, params_.pose_min_keypoints);
}
void Tracker::RemoveMissingTracks() {
size_t count{};
for (auto& track : tracks_) {
if (track->missing() <= params_.track_max_missing) {
tracks_[count++] = std::move(track);
}
}
tracks_.resize(count);
}
void Tracker::DiagnosticMissingTracks(const vector<int>& is_unused_track,
const vector<int>& is_unused_bbox,
const vector<float>& similarity0,
const vector<float>& similarity1) {
int missing = 0;
const auto n_cols = static_cast<int>(is_unused_track.size());
const auto n_rows = static_cast<int>(is_unused_bbox.size());
for (int i = 0; i < is_unused_track.size(); ++i) {
if (is_unused_track[i]) {
float best_s0 = 0.f;
float best_s1 = 0.f;
for (int j = 0; j < is_unused_bbox.size(); ++j) {
if (is_unused_bbox[j]) {
best_s0 = std::max(similarity0[j * n_cols + i], best_s0);
best_s1 = std::max(similarity1[j * n_cols + i], best_s1);
}
}
POSE_TRACKER_DEBUG("frame {}: track missing {}, best_s0={}, best_s1={}", frame_id_,
tracks_[i]->track_id(), best_s0, best_s1);
++missing;
}
}
if (missing) {
std::stringstream ss;
ss << cv::Mat_<float>(n_rows, n_cols, const_cast<float*>(similarity0.data()));
POSE_TRACKER_DEBUG("frame {}, similarity#0: \n{}", frame_id_, ss.str());
ss = std::stringstream{};
ss << cv::Mat_<float>(n_rows, n_cols, const_cast<float*>(similarity1.data()));
POSE_TRACKER_DEBUG("frame {}, similarity#1: \n{}", frame_id_, ss.str());
}
}
void Tracker::SummaryTracks() {
vector<std::tuple<int64_t, int>> summary;
for (const auto& track : tracks_) {
summary.emplace_back(track->track_id(), track->missing());
}
POSE_TRACKER_DEBUG("frame {}, track summary {}", frame_id_, summary);
for (const auto& track : tracks_) {
if (!track->missing()) {
POSE_TRACKER_DEBUG("frame {}, track {}, scores {}", frame_id_, track->track_id(),
track->scores());
}
}
}
} // namespace mmdeploy::mmpose::_pose_tracker
// Copyright (c) OpenMMLab. All rights reserved.
#ifndef MMDEPLOY_CODEBASE_MMPOSE_POSE_TRACKER_HPP
#define MMDEPLOY_CODEBASE_MMPOSE_POSE_TRACKER_HPP
#include "mmdeploy/pose_tracker.h"
#include "pose_tracker/common.h"
#include "pose_tracker/track.h"
namespace mmdeploy::mmpose::_pose_tracker {
class Tracker {
public:
explicit Tracker(const mmdeploy_pose_tracker_param_t& _params);
Tracker(const Tracker&) { assert(0); }
Tracker(Tracker&& o) noexcept = default;
struct Detections {
Bboxes bboxes;
Scores scores;
vector<int> labels;
};
void SetFrameSize(int height, int width) {
frame_h_ = static_cast<float>(height);
frame_w_ = static_cast<float>(width);
}
const mmdeploy_pose_tracker_param_t& params() const noexcept { return params_; }
int64_t frame_id() const noexcept { return frame_id_; }
const vector<std::unique_ptr<Track>>& tracks() const noexcept { return tracks_; }
std::tuple<vector<Bbox>, vector<int64_t>> ProcessBboxes(const std::optional<Detections>& dets);
void TrackStep(vector<Points>& keypoints, vector<Scores>& scores,
const vector<int64_t>& prev_ids) noexcept;
private:
void GetDetectedObjects(const std::optional<Detections>& dets, vector<Bbox>& _bboxes,
vector<int64_t>& track_ids, vector<int>& types) const;
void GetTrackedObjects(vector<Bbox>& bboxes, vector<int64_t>& track_ids,
vector<int>& types) const;
void SuppressOverlappingBoxes(const vector<Bbox>& bboxes, vector<std::pair<int, float>>& ranks,
vector<int>& is_valid_bbox) const;
void SuppressOverlappingPoses(const vector<Points>& keypoints, const vector<Scores>& scores,
const vector<Bbox>& bboxes, const vector<int64_t>& track_ids,
vector<int>& is_valid, float oks_thr);
std::optional<Bbox> KeypointsToBbox(const Points& kpts, const Scores& scores) const;
float GetPosePoseSimilarity(const Bbox& bbox0, const Points& kpts0, const Bbox& bbox1,
const Points& kpts1);
void GetSimilarityMatrices(const vector<Bbox>& bboxes, const vector<Points>& keypoints,
const vector<int64_t>& prev_ids, vector<float>& similarity0,
vector<float>& similarity1, vector<vector<bool>>& gating);
std::tuple<float, float, vector<bool>> GetTrackPoseSimilarity(Track& track, const Bbox& bbox,
const Points& kpts) const;
void CreateTrack(const Bbox& bbox, const Points& kpts, const Scores& scores);
void RemoveMissingTracks();
void DiagnosticMissingTracks(const vector<int>& is_unused_track,
const vector<int>& is_unused_bbox, const vector<float>& similarity0,
const vector<float>& similarity1);
void SummaryTracks();
private:
static constexpr const auto kInf = 1000.f;
float frame_h_ = 0;
float frame_w_ = 0;
vector<std::unique_ptr<Track>> tracks_;
int64_t next_id_{0};
std::vector<float> key_point_sigmas_;
mmdeploy_pose_tracker_param_t params_;
vector<Bbox> pose_input_bboxes_;
vector<Bbox> pose_output_bboxes_;
int64_t frame_id_ = 0;
public:
const vector<Bbox>& pose_input_bboxes() const noexcept { return pose_input_bboxes_; }
const vector<Bbox>& pose_output_bboxes() const noexcept { return pose_output_bboxes_; }
};
} // namespace mmdeploy::mmpose::_pose_tracker
#endif // MMDEPLOY_CODEBASE_MMPOSE_POSE_TRACKER_HPP
// Copyright (c) OpenMMLab. All rights reserved.
#include "smoothing_filter.h"
namespace mmdeploy::mmpose::_pose_tracker {
SmoothingFilter::SmoothingFilter(const Bbox& bbox, const Points& pts,
const SmoothingFilter::Params& params)
: params_(params),
pts_v_(pts.size()),
pts_x_(pts),
center_v_{},
center_x_{get_center(bbox)},
scale_v_{},
scale_x_{get_scale(bbox)} {}
std::pair<Bbox, Points> SmoothingFilter::Step(const Bbox& bbox, const Points& kpts) {
constexpr auto abs = [](const Point& p) { return std::sqrt(p.dot(p)); };
// filter key-points
step<Point>(pts_x_, pts_v_, kpts, params_, abs);
// filter bbox center
std::array c{get_center(bbox)};
step<Point>(center_x_, center_v_, c, params_, abs);
// filter bbox scales
auto s = get_scale(bbox);
step<float>(scale_x_, scale_v_, s, params_, [](auto x) { return x; });
return {get_bbox(center_x_[0], scale_x_), pts_x_};
}
void SmoothingFilter::Reset(const Bbox& bbox, const Points& pts) {
pts_v_ = Points(pts_v_.size());
center_v_ = {};
scale_v_ = {};
pts_x_ = pts;
center_v_ = {get_center(bbox)};
scale_v_ = get_scale(bbox);
}
float SmoothingFilter::smoothing_factor(float cutoff) {
static constexpr float kPi = 3.1415926;
auto r = 2.f * kPi * cutoff;
return r / (r + 1.f);
}
} // namespace mmdeploy::mmpose::_pose_tracker
// Copyright (c) OpenMMLab. All rights reserved.
#ifndef MMDEPLOY_CODEBASE_MMPOSE_POSE_TRACKER_SMOOTHING_FILTER_H
#define MMDEPLOY_CODEBASE_MMPOSE_POSE_TRACKER_SMOOTHING_FILTER_H
#include "mmdeploy/core/mpl/span.h"
#include "pose_tracker/utils.h"
namespace mmdeploy::mmpose::_pose_tracker {
template <typename T>
using span = mmdeploy::Span<T>;
class SmoothingFilter {
public:
struct Params {
float beta;
float fc_min;
float fc_v;
};
explicit SmoothingFilter(const Bbox& bbox, const Points& pts, const Params& params);
std::pair<Bbox, Points> Step(const Bbox& bbox, const Points& kpts);
void Reset(const Bbox& bbox, const Points& pts);
private:
static float smoothing_factor(float cutoff);
template <typename T, typename Norm>
static void step(span<T> x, span<T> v, span<const T> x1, const Params& params, Norm norm) {
auto a_v = smoothing_factor(params.fc_v);
for (int i = 0; i < v.size(); ++i) {
v[i] = smooth(a_v, v[i], x1[i] - x[i]);
auto fc = params.fc_min + params.beta * norm(v[i]);
auto a_x = smoothing_factor(fc);
x[i] = smooth(a_x, x[i], x1[i]);
}
}
template <typename T>
static T smooth(float a, const T& x0, const T& x1) {
return (1.f - a) * x0 + a * x1;
}
private:
Params params_;
std::vector<Point> pts_v_;
std::vector<Point> pts_x_;
std::array<Point, 1> center_v_;
std::array<Point, 1> center_x_;
std::array<float, 2> scale_v_;
std::array<float, 2> scale_x_;
};
} // namespace mmdeploy::mmpose::_pose_tracker
#endif // MMDEPLOY_SMOOTHING_FILTER_H
// Copyright (c) OpenMMLab. All rights reserved.
#include "pose_tracker/track.h"
namespace mmdeploy::mmpose::_pose_tracker {
Track::Track(const mmdeploy_pose_tracker_param_t* params, const Bbox& bbox, const Points& kpts,
const Scores& ss, int64_t id)
: params_(params),
filter_(CreateFilter(bbox, kpts)),
smoother_(CreateSmoother(bbox, kpts)),
track_id_(id) {
POSE_TRACKER_DEBUG("new track {}", track_id_);
Add(bbox, kpts, ss);
}
Track::~Track() { POSE_TRACKER_DEBUG("track lost {}", track_id_); }
void Track::UpdateVisible(const Bbox& bbox, const Points& kpts, const Scores& scores,
const vector<bool>& tracked) {
auto [bbox_corr, kpts_corr] = filter_.Correct(bbox, kpts, tracked);
Add(bbox_corr, kpts_corr, scores);
}
void Track::UpdateRecovered(const Bbox& bbox, const Points& kpts, const Scores& scores) {
POSE_TRACKER_DEBUG("track recovered {}", track_id_);
filter_ = CreateFilter(bbox, kpts);
smoother_.Reset(bbox, kpts);
Add(bbox, kpts, scores);
missing_ = 0;
}
void Track::UpdateMissing() {
missing_++;
if (missing_ <= params_->track_max_missing) {
// use predicted state to update the missing tracks
Add(bbox_predict_, kpts_predict_, vector<float>(kpts_predict_.size()));
}
}
void Track::Predict() {
// TODO: velocity decay for missing tracks
std::tie(bbox_predict_, kpts_predict_) = filter_.Predict();
}
void Track::Add(const Bbox& bbox, const Points& kpts, const Scores& ss) {
bboxes_.push_back(bbox);
keypoints_.push_back(kpts);
scores_.push_back(ss);
if (bboxes_.size() > params_->track_history_size) {
std::rotate(bboxes_.begin(), bboxes_.begin() + 1, bboxes_.end());
std::rotate(keypoints_.begin(), keypoints_.begin() + 1, keypoints_.end());
std::rotate(scores_.begin(), scores_.begin() + 1, scores_.end());
bboxes_.pop_back();
keypoints_.pop_back();
scores_.pop_back();
}
std::tie(bbox_smooth_, kpts_smooth_) = smoother_.Step(bbox, kpts);
}
TrackingFilter Track::CreateFilter(const Bbox& bbox, const Points& pts) {
return {bbox, pts, params_->std_weight_position, params_->std_weight_velocity};
}
SmoothingFilter Track::CreateSmoother(const Bbox& bbox, const Points& pts) {
return SmoothingFilter(
bbox, pts, {params_->smooth_params[0], params_->smooth_params[1], params_->smooth_params[2]});
}
} // namespace mmdeploy::mmpose::_pose_tracker
//
// Created by zhangli on 1/9/23.
//
#ifndef MMDEPLOY_CODEBASE_MMPOSE_POSE_TRACKER_TRACK_H
#define MMDEPLOY_CODEBASE_MMPOSE_POSE_TRACKER_TRACK_H
#include "pose_tracker/common.h"
#include "pose_tracker/smoothing_filter.h"
#include "pose_tracker/tracking_filter.h"
#include "pose_tracker/utils.h"
namespace mmdeploy::mmpose::_pose_tracker {
class Track {
public:
Track(const mmdeploy_pose_tracker_param_t* params, const Bbox& bbox, const Points& kpts,
const Scores& ss, int64_t id);
~Track();
void UpdateVisible(const Bbox& bbox, const Points& kpts, const Scores& scores,
const vector<bool>& tracked);
void UpdateRecovered(const Bbox& bbox, const Points& kpts, const Scores& scores);
void UpdateMissing();
void Predict();
float BboxDistance(const Bbox& bbox) { return filter_.BboxDistance(bbox); }
vector<float> KeyPointDistance(const Points& kpts) { return filter_.KeyPointDistance(kpts); }
uint32_t track_id() const noexcept { return track_id_; }
uint32_t missing() const noexcept { return missing_; }
const Bbox& predicted_bbox() const noexcept { return bbox_predict_; }
const Bbox& smoothed_bbox() const noexcept { return bbox_smooth_; }
const Points& predicted_kpts() const noexcept { return kpts_predict_; }
const Points& smoothed_kpts() const noexcept { return kpts_smooth_; }
const Scores& scores() const noexcept { return scores_.back(); }
private:
void Add(const Bbox& bbox, const Points& kpts, const Scores& ss);
TrackingFilter CreateFilter(const Bbox& bbox, const Points& pts);
SmoothingFilter CreateSmoother(const Bbox& bbox, const Points& pts);
private:
const mmdeploy_pose_tracker_param_t* params_;
vector<Bbox> bboxes_;
vector<Points> keypoints_;
vector<Scores> scores_;
uint32_t track_id_{};
Bbox bbox_predict_{};
Bbox bbox_smooth_{};
Points kpts_predict_;
Points kpts_smooth_;
uint32_t missing_{0};
TrackingFilter filter_;
SmoothingFilter smoother_;
};
} // namespace mmdeploy::mmpose::_pose_tracker
#endif // MMDEPLOY_TRACK_H
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment