Commit 546b4279 authored by limm's avatar limm
Browse files

add csrc and mmdeploy module

parent 502f4fb9
Pipeline #2810 canceled with stages
// Copyright (c) OpenMMLab. All rights reserved.
#ifndef MMDEPLOY_CODEBASE_MMDET_BASE_DENSE_HEAD_H_
#define MMDEPLOY_CODEBASE_MMDET_BASE_DENSE_HEAD_H_
#include "mmdeploy/codebase/mmdet/mmdet.h"
#include "mmdeploy/core/tensor.h"
namespace mmdeploy::mmdet {
class BaseDenseHead : public MMDetection {
public:
explicit BaseDenseHead(const Value& cfg);
Result<Value> operator()(const Value& prep_res, const Value& infer_res);
Result<Detections> GetBBoxes(const Value& prep_res, const Tensor& dets,
const Tensor& scores) const;
private:
float score_thr_{0.4f};
int nms_pre_{1000};
float iou_threshold_{0.45f};
int min_bbox_size_{0};
};
} // namespace mmdeploy::mmdet
#endif // MMDEPLOY_CODEBASE_MMDET_BASE_DENSE_HEAD_H_
// Copyright (c) OpenMMLab. All rights reserved.
#include "mmdeploy/core/registry.h"
#include "mmdeploy/core/utils/device_utils.h"
#include "mmdeploy/experimental/module_adapter.h"
#include "mmdeploy/operation/managed.h"
#include "mmdeploy/operation/vision.h"
#include "object_detection.h"
#include "opencv2/imgproc/imgproc.hpp"
#include "opencv_utils.h"
namespace mmdeploy::mmdet {
class ResizeInstanceMask : public ResizeBBox {
public:
explicit ResizeInstanceMask(const Value& cfg) : ResizeBBox(cfg) {
if (cfg.contains("params")) {
mask_thr_binary_ = cfg["params"].value("mask_thr_binary", mask_thr_binary_);
is_rcnn_ = cfg["params"].contains("rcnn");
is_resize_mask_ = cfg["params"].value("is_resize_mask", is_resize_mask_);
}
operation::Context ctx(device_, stream_);
warp_affine_ = operation::Managed<operation::WarpAffine>::Create("bilinear");
permute_ = operation::Managed<::mmdeploy::operation::Permute>::Create();
}
// TODO: remove duplication
Result<Value> operator()(const Value& prep_res, const Value& infer_res) {
MMDEPLOY_DEBUG("prep_res: {}\ninfer_res: {}", prep_res, infer_res);
try {
DeviceGuard guard(device_);
auto dets = infer_res["dets"].get<Tensor>();
auto labels = infer_res["labels"].get<Tensor>();
auto masks = infer_res["masks"].get<Tensor>();
MMDEPLOY_DEBUG("dets.shape: {}", dets.shape());
MMDEPLOY_DEBUG("labels.shape: {}", labels.shape());
MMDEPLOY_DEBUG("masks.shape: {}", masks.shape());
// `dets` is supposed to have 3 dims. They are 'batch', 'bboxes_number'
// and 'channels' respectively
if (!(dets.shape().size() == 3 && dets.data_type() == DataType::kFLOAT)) {
MMDEPLOY_ERROR("unsupported `dets` tensor, shape: {}, dtype: {}", dets.shape(),
(int)dets.data_type());
return Status(eNotSupported);
}
// `labels` is supposed to have 2 dims, which are 'batch' and
// 'bboxes_number'
if (labels.shape().size() != 2) {
MMDEPLOY_ERROR("unsupported `labels`, tensor, shape: {}, dtype: {}", labels.shape(),
(int)labels.data_type());
return Status(eNotSupported);
}
if (!(masks.shape().size() == 4 && masks.data_type() == DataType::kFLOAT)) {
MMDEPLOY_ERROR("unsupported `mask` tensor, shape: {}, dtype: {}", masks.shape(),
(int)masks.data_type());
return Status(eNotSupported);
}
OUTCOME_TRY(auto _dets, MakeAvailableOnDevice(dets, kHost, stream()));
OUTCOME_TRY(auto _labels, MakeAvailableOnDevice(labels, kHost, stream()));
// Note: `masks` are kept on device to avoid data copy overhead from device to host.
// refer to https://github.com/open-mmlab/mmdeploy/issues/1849
// OUTCOME_TRY(auto _masks, MakeAvailableOnDevice(masks, kHost, stream()));
// OUTCOME_TRY(stream().Wait());
OUTCOME_TRY(auto result, DispatchGetBBoxes(prep_res["img_metas"], _dets, _labels));
auto ori_w = prep_res["img_metas"]["ori_shape"][2].get<int>();
auto ori_h = prep_res["img_metas"]["ori_shape"][1].get<int>();
from_value(prep_res["img_metas"]["scale_factor"], scale_factor_);
ProcessMasks(result, masks, _dets, ori_w, ori_h);
return to_value(result);
} catch (const std::exception& e) {
MMDEPLOY_ERROR("{}", e.what());
return Status(eFail);
}
}
protected:
Result<void> ProcessMasks(Detections& result, Tensor d_mask, Tensor cpu_dets, int img_w,
int img_h) {
d_mask.Squeeze(0);
cpu_dets.Squeeze(0);
::mmdeploy::operation::Context ctx(device_, stream_);
std::vector<Tensor> warped_masks;
warped_masks.reserve(result.size());
std::vector<Tensor> h_warped_masks;
h_warped_masks.reserve(result.size());
if (is_rcnn_) { // mask r-cnn
for (auto& det : result) {
auto mask = d_mask.Slice(det.index);
auto mask_height = (int)mask.shape(1);
auto mask_width = (int)mask.shape(2);
mask.Reshape({1, mask_height, mask_width, 1});
// resize masks to origin image shape instead of input image shape
// default is true
if (is_resize_mask_) {
auto& bbox = det.bbox;
// same as mmdet with skip_empty = True
auto x0 = std::max(std::floor(bbox[0]) - 1, 0.f);
auto y0 = std::max(std::floor(bbox[1]) - 1, 0.f);
auto x1 = std::min(std::ceil(bbox[2]) + 1, (float)img_w);
auto y1 = std::min(std::ceil(bbox[3]) + 1, (float)img_h);
auto width = static_cast<int>(x1 - x0);
auto height = static_cast<int>(y1 - y0);
// params align_corners = False
float fx;
float fy;
float tx;
float ty;
fx = (float)mask_width / (bbox[2] - bbox[0]);
fy = (float)mask_height / (bbox[3] - bbox[1]);
tx = (x0 + .5f - bbox[0]) * fx - .5f;
ty = (y0 + .5f - bbox[1]) * fy - .5f;
float affine_matrix[] = {fx, 0, tx, 0, fy, ty};
cv::Mat_<float> m(2, 3, affine_matrix);
cv::invertAffineTransform(m, m);
Tensor& warped_mask = warped_masks.emplace_back();
OUTCOME_TRY(warp_affine_.Apply(mask, warped_mask, affine_matrix, height, width));
OUTCOME_TRY(CopyToHost(warped_mask, h_warped_masks.emplace_back()));
} else {
OUTCOME_TRY(CopyToHost(mask, h_warped_masks.emplace_back()));
}
}
} else { // rtmdet-inst
auto mask_channel = (int)d_mask.shape(0);
auto mask_height = (int)d_mask.shape(1);
auto mask_width = (int)d_mask.shape(2);
// (C, H, W) -> (H, W, C)
std::vector<int> axes = {1, 2, 0};
OUTCOME_TRY(permute_.Apply(d_mask, d_mask, axes));
Device host{"cpu"};
OUTCOME_TRY(auto cpu_mask, MakeAvailableOnDevice(d_mask, host, stream_));
OUTCOME_TRY(stream().Wait());
cv::Mat mask_mat(mask_height, mask_width, CV_32FC(mask_channel), cpu_mask.data());
int resize_height = int(mask_height / scale_factor_[1] + 0.5);
int resize_width = int(mask_width / scale_factor_[0] + 0.5);
// skip resize if scale_factor is 1.0
if (resize_height != mask_height || resize_width != mask_width) {
cv::resize(mask_mat, mask_mat, cv::Size(resize_width, resize_height), cv::INTER_LINEAR);
}
// crop masks
mask_mat = mask_mat(cv::Range(0, img_h), cv::Range(0, img_w)).clone();
for (int i = 0; i < (int)result.size(); i++) {
cv::Mat mask_;
cv::extractChannel(mask_mat, mask_, i);
Tensor mask_t = cpu::CVMat2Tensor(mask_);
h_warped_masks.emplace_back(mask_t);
}
}
OUTCOME_TRY(stream_.Wait());
for (size_t i = 0; i < h_warped_masks.size(); ++i) {
result[i].mask = ThresholdMask(h_warped_masks[i]);
}
return success();
}
Result<void> CopyToHost(const Tensor& src, Tensor& dst) {
if (src.device() == kHost) {
dst = src;
return success();
}
dst = TensorDesc{kHost, src.data_type(), src.shape()};
OUTCOME_TRY(stream_.Copy(src.buffer(), dst.buffer(), dst.byte_size()));
return success();
}
Mat ThresholdMask(const Tensor& h_mask) const {
cv::Mat warped_mat = cpu::Tensor2CVMat(h_mask);
warped_mat = warped_mat > mask_thr_binary_;
return {warped_mat.rows, warped_mat.cols, PixelFormat::kGRAYSCALE, DataType::kINT8,
std::shared_ptr<void>(warped_mat.data, [mat = warped_mat](void*) {})};
}
private:
operation::Managed<operation::WarpAffine> warp_affine_;
::mmdeploy::operation::Managed<::mmdeploy::operation::Permute> permute_;
float mask_thr_binary_{.5f};
bool is_rcnn_{true};
bool is_resize_mask_{true};
std::vector<float> scale_factor_{1.0, 1.0, 1.0, 1.0};
};
MMDEPLOY_REGISTER_CODEBASE_COMPONENT(MMDetection, ResizeInstanceMask);
} // namespace mmdeploy::mmdet
// Copyright (c) OpenMMLab. All rights reserved.
#include "mmdeploy/codebase/mmdet/mmdet.h"
namespace mmdeploy::mmdet {
MMDEPLOY_REGISTER_CODEBASE(MMDetection);
} // namespace mmdeploy::mmdet
// Copyright (c) OpenMMLab. All rights reserved.
#ifndef MMDEPLOY_SRC_CODEBASE_MMDET_MMDET_H_
#define MMDEPLOY_SRC_CODEBASE_MMDET_MMDET_H_
#include <array>
#include "mmdeploy/codebase/common.h"
#include "mmdeploy/core/device.h"
#include "mmdeploy/core/mat.h"
#include "mmdeploy/core/module.h"
#include "mmdeploy/core/registry.h"
#include "mmdeploy/core/serialization.h"
namespace mmdeploy::mmdet {
struct Detection {
int index;
int label_id;
float score;
std::array<float, 4> bbox; // left, top, right, bottom
Mat mask;
MMDEPLOY_ARCHIVE_MEMBERS(index, label_id, score, bbox, mask);
};
using Detections = std::vector<Detection>;
MMDEPLOY_DECLARE_CODEBASE(MMDetection, mmdet);
} // namespace mmdeploy::mmdet
#endif // MMDEPLOY_SRC_CODEBASE_MMDET_MMDET_H_
// Copyright (c) OpenMMLab. All rights reserved.
#include "object_detection.h"
#include "mmdeploy/core/registry.h"
#include "mmdeploy/core/utils/device_utils.h"
#include "mmdeploy/core/utils/formatter.h"
#include "mmdeploy/experimental/module_adapter.h"
#include "utils.h"
using namespace std;
namespace mmdeploy::mmdet {
ResizeBBox::ResizeBBox(const Value& cfg) : MMDetection(cfg) {
if (cfg.contains("params")) {
if (cfg["params"].contains("conf_thr")) {
// for mobilev2yolov3
score_thr_ = cfg["params"].value("conf_thr", 0.f);
} else {
score_thr_ = cfg["params"].value("score_thr", 0.f);
}
min_bbox_size_ = cfg["params"].value("min_bbox_size", 0.f);
}
}
std::vector<Tensor> ResizeBBox::GetDetsLabels(const Value& prep_res, const Value& infer_res) {
std::vector<Tensor> results;
if (infer_res.contains("dets") && infer_res.contains("labels")) {
results.push_back(infer_res["dets"].get<Tensor>());
results.push_back(infer_res["labels"].get<Tensor>());
return results;
} else if (infer_res.contains("detection_output") && (!infer_res.contains("dets")) &&
(!infer_res.contains("labels"))) {
int img_width = prep_res["img_metas"]["img_shape"][2].get<int>();
int img_height = prep_res["img_metas"]["img_shape"][1].get<int>();
auto detection_output = infer_res["detection_output"].get<Tensor>();
auto* detection_output_ptr = detection_output.data<float>();
// detection_output: (1, num_det, 6)
TensorDesc labeldesc = detection_output.desc();
int batch_size = detection_output.shape()[0];
int num_det = detection_output.shape()[1];
labeldesc.shape = {batch_size, num_det};
Tensor labels(labeldesc);
TensorDesc detdesc = detection_output.desc();
detdesc.shape = {batch_size, num_det, 5};
Tensor dets(detdesc);
auto* dets_ptr = dets.data<float>();
auto* labels_ptr = labels.data<float>();
for (int i = 0; i < batch_size * num_det; ++i) {
*labels_ptr++ = detection_output_ptr[0] - 1;
dets_ptr[4] = detection_output_ptr[1];
dets_ptr[0] = detection_output_ptr[2] * img_width;
dets_ptr[1] = detection_output_ptr[3] * img_height;
dets_ptr[2] = detection_output_ptr[4] * img_width;
dets_ptr[3] = detection_output_ptr[5] * img_height;
dets_ptr += 5;
detection_output_ptr += 6;
}
results.push_back(dets);
results.push_back(labels);
return results;
} else {
MMDEPLOY_ERROR("No support for another key of detection results!");
return results;
}
}
Result<Value> ResizeBBox::operator()(const Value& prep_res, const Value& infer_res) {
MMDEPLOY_DEBUG("prep_res: {}\ninfer_res: {}", prep_res, infer_res);
try {
Tensor dets, labels;
vector<Tensor> outputs = GetDetsLabels(prep_res, infer_res);
dets = outputs[0];
labels = outputs[1];
MMDEPLOY_DEBUG("dets.shape: {}", dets.shape());
MMDEPLOY_DEBUG("labels.shape: {}", labels.shape());
// `dets` is supposed to have 3 dims. They are 'batch', 'bboxes_number'
// and 'channels' respectively
if (!(dets.shape().size() == 3 && dets.data_type() == DataType::kFLOAT)) {
MMDEPLOY_ERROR("unsupported `dets` tensor, shape: {}, dtype: {}", dets.shape(),
(int)dets.data_type());
return Status(eNotSupported);
}
// `labels` is supposed to have 2 dims, which are 'batch' and
// 'bboxes_number'
if (labels.shape().size() != 2) {
MMDEPLOY_ERROR("unsupported `labels`, tensor, shape: {}, dtype: {}", labels.shape(),
(int)labels.data_type());
return Status(eNotSupported);
}
OUTCOME_TRY(auto _dets, MakeAvailableOnDevice(dets, kHost, stream()));
OUTCOME_TRY(auto _labels, MakeAvailableOnDevice(labels, kHost, stream()));
OUTCOME_TRY(stream().Wait());
OUTCOME_TRY(auto result, DispatchGetBBoxes(prep_res["img_metas"], _dets, _labels));
return to_value(result);
} catch (...) {
return Status(eFail);
}
}
Result<Detections> ResizeBBox::DispatchGetBBoxes(const Value& prep_res, const Tensor& dets,
const Tensor& labels) {
auto data_type = labels.data_type();
switch (data_type) {
case DataType::kFLOAT:
return GetBBoxes<float>(prep_res, dets, labels);
case DataType::kINT32:
return GetBBoxes<int32_t>(prep_res, dets, labels);
case DataType::kINT64:
return GetBBoxes<int64_t>(prep_res, dets, labels);
default:
return Status(eNotSupported);
}
}
template <typename T>
Result<Detections> ResizeBBox::GetBBoxes(const Value& prep_res, const Tensor& dets,
const Tensor& labels) {
Detections objs;
auto* dets_ptr = dets.data<float>();
auto* labels_ptr = labels.data<T>();
vector<float> scale_factor;
if (prep_res.contains("scale_factor")) {
from_value(prep_res["scale_factor"], scale_factor);
} else {
scale_factor = {1.f, 1.f, 1.f, 1.f};
}
int top_padding = 0;
int left_padding = 0;
if (prep_res.contains("pad_param")) {
top_padding = prep_res["pad_param"][0].get<int>();
left_padding = prep_res["pad_param"][1].get<int>();
}
float w_offset = 0.f;
float h_offset = 0.f;
if (prep_res.contains("border")) {
w_offset = -prep_res["border"][1].get<int>();
h_offset = -prep_res["border"][0].get<int>();
}
int ori_width = prep_res["ori_shape"][2].get<int>();
int ori_height = prep_res["ori_shape"][1].get<int>();
// `dets` has shape(1, n, 4) or shape(1, n, 5). The latter one has `score`
auto bboxes_number = dets.shape()[1];
auto channels = dets.shape()[2];
for (auto i = 0; i < bboxes_number; ++i, dets_ptr += channels, ++labels_ptr) {
float score = 0.f;
if (channels > 4 && dets_ptr[4] <= score_thr_) {
continue;
}
score = channels > 4 ? dets_ptr[4] : score;
auto left = dets_ptr[0];
auto top = dets_ptr[1];
auto right = dets_ptr[2];
auto bottom = dets_ptr[3];
MMDEPLOY_DEBUG("ori left {}, top {}, right {}, bottom {}, label {}", left, top, right, bottom,
*labels_ptr);
auto rect = MapToOriginImage(left, top, right, bottom, scale_factor.data(), w_offset, h_offset,
ori_width, ori_height, top_padding, left_padding);
if (rect[2] - rect[0] < min_bbox_size_ || rect[3] - rect[1] < min_bbox_size_) {
MMDEPLOY_DEBUG("ignore small bbox with width '{}' and height '{}", rect[2] - rect[0],
rect[3] - rect[1]);
continue;
}
MMDEPLOY_DEBUG("remap left {}, top {}, right {}, bottom {}", rect[0], rect[1], rect[2],
rect[3]);
Detection det{};
det.index = i;
det.label_id = static_cast<int>(*labels_ptr);
det.score = score;
det.bbox = rect;
objs.push_back(std::move(det));
}
return objs;
}
MMDEPLOY_REGISTER_CODEBASE_COMPONENT(MMDetection, ResizeBBox);
} // namespace mmdeploy::mmdet
// Copyright (c) OpenMMLab. All rights reserved.
#ifndef MMDEPLOY_SRC_CODEBASE_MMDET_OBJECT_DETECTION_H_
#define MMDEPLOY_SRC_CODEBASE_MMDET_OBJECT_DETECTION_H_
#include "mmdeploy/codebase/mmdet/mmdet.h"
#include "mmdeploy/core/tensor.h"
#include "mmdeploy/core/utils/formatter.h"
namespace mmdeploy::mmdet {
class ResizeBBox : public MMDetection {
public:
explicit ResizeBBox(const Value& cfg);
Result<Value> operator()(const Value& prep_res, const Value& infer_res);
protected:
Result<Detections> DispatchGetBBoxes(const Value& prep_res, const Tensor& dets,
const Tensor& labels);
template <typename T>
Result<Detections> GetBBoxes(const Value& prep_res, const Tensor& dets, const Tensor& labels);
std::vector<Tensor> GetDetsLabels(const Value& prep_res, const Value& infer_res);
protected:
constexpr static Device kHost{0, 0};
float score_thr_{0.f};
float min_bbox_size_{0.f};
};
} // namespace mmdeploy::mmdet
#endif // MMDEPLOY_SRC_CODEBASE_MMDET_OBJECT_DETECTION_H_
// Copyright (c) OpenMMLab. All rights reserved.
#include "rtmdet_head.h"
#include <math.h>
#include <algorithm>
#include <numeric>
#include "mmdeploy/core/model.h"
#include "mmdeploy/core/utils/device_utils.h"
#include "mmdeploy/core/utils/formatter.h"
#include "utils.h"
namespace mmdeploy::mmdet {
RTMDetSepBNHead::RTMDetSepBNHead(const Value& cfg) : MMDetection(cfg) {
auto init = [&]() -> Result<void> {
auto model = cfg["context"]["model"].get<Model>();
if (cfg.contains("params")) {
nms_pre_ = cfg["params"].value("nms_pre", -1);
score_thr_ = cfg["params"].value("score_thr", 0.02f);
min_bbox_size_ = cfg["params"].value("min_bbox_size", 0);
max_per_img_ = cfg["params"].value("max_per_img", 100);
iou_threshold_ = cfg["params"].contains("nms")
? cfg["params"]["nms"].value("iou_threshold", 0.45f)
: 0.45f;
if (cfg["params"].contains("anchor_generator")) {
offset_ = cfg["params"]["anchor_generator"].value("offset", 0);
from_value(cfg["params"]["anchor_generator"]["strides"], strides_);
}
}
return success();
};
init().value();
}
Result<Value> RTMDetSepBNHead::operator()(const Value& prep_res, const Value& infer_res) {
MMDEPLOY_DEBUG("prep_res: {}\ninfer_res: {}", prep_res, infer_res);
try {
std::vector<Tensor> cls_scores;
std::vector<Tensor> bbox_preds;
const Device kHost{0, 0};
int i = 0;
int divisor = infer_res.size() / 2;
for (auto iter = infer_res.begin(); iter != infer_res.end(); iter++) {
auto pred_map = iter->get<Tensor>();
OUTCOME_TRY(auto _pred_map, MakeAvailableOnDevice(pred_map, kHost, stream()));
if (i < divisor)
cls_scores.push_back(_pred_map);
else
bbox_preds.push_back(_pred_map);
i++;
}
OUTCOME_TRY(stream().Wait());
OUTCOME_TRY(auto result, GetBBoxes(prep_res["img_metas"], bbox_preds, cls_scores));
return to_value(result);
} catch (...) {
return Status(eFail);
}
}
static float sigmoid(float x) { return 1.0 / (1.0 + expf(-x)); }
Result<Detections> RTMDetSepBNHead::GetBBoxes(const Value& prep_res,
const std::vector<Tensor>& bbox_preds,
const std::vector<Tensor>& cls_scores) const {
MMDEPLOY_DEBUG("bbox_pred: {}, {}", bbox_preds[0].shape(), dets[0].data_type());
MMDEPLOY_DEBUG("cls_score: {}, {}", scores[0].shape(), scores[0].data_type());
std::vector<float> filter_boxes;
std::vector<float> obj_probs;
std::vector<int> class_ids;
for (int i = 0; i < bbox_preds.size(); i++) {
RTMDetFeatDeocde(bbox_preds[i], cls_scores[i], strides_[i], offset_, filter_boxes, obj_probs,
class_ids);
}
std::vector<int> indexArray;
for (int i = 0; i < obj_probs.size(); ++i) {
indexArray.push_back(i);
}
Sort(obj_probs, class_ids, indexArray);
Tensor dets(TensorDesc{Device{0, 0}, DataType::kFLOAT,
TensorShape{int(filter_boxes.size() / 4), 4}, "dets"});
std::copy(filter_boxes.begin(), filter_boxes.end(), dets.data<float>());
NMS(dets, iou_threshold_, indexArray);
Detections objs;
std::vector<float> scale_factor;
if (prep_res.contains("scale_factor")) {
from_value(prep_res["scale_factor"], scale_factor);
} else {
scale_factor = {1.f, 1.f, 1.f, 1.f};
}
int ori_width = prep_res["ori_shape"][2].get<int>();
int ori_height = prep_res["ori_shape"][1].get<int>();
auto det_ptr = dets.data<float>();
for (int i = 0; i < indexArray.size(); ++i) {
if (indexArray[i] == -1) {
continue;
}
int j = indexArray[i];
auto x1 = det_ptr[j * 4 + 0];
auto y1 = det_ptr[j * 4 + 1];
auto x2 = det_ptr[j * 4 + 2];
auto y2 = det_ptr[j * 4 + 3];
int label_id = class_ids[i];
float score = obj_probs[i];
MMDEPLOY_DEBUG("{}-th box: ({}, {}, {}, {}), {}, {}", i, x1, y1, x2, y2, label_id, score);
auto rect =
MapToOriginImage(x1, y1, x2, y2, scale_factor.data(), 0, 0, ori_width, ori_height, 0, 0);
if (rect[2] - rect[0] < min_bbox_size_ || rect[3] - rect[1] < min_bbox_size_) {
MMDEPLOY_DEBUG("ignore small bbox with width '{}' and height '{}", rect[2] - rect[0],
rect[3] - rect[1]);
continue;
}
Detection det{};
det.index = i;
det.label_id = label_id;
det.score = score;
det.bbox = rect;
objs.push_back(std::move(det));
}
return objs;
}
int RTMDetSepBNHead::RTMDetFeatDeocde(const Tensor& bbox_pred, const Tensor& cls_score,
const float stride, const float offset,
std::vector<float>& filter_boxes,
std::vector<float>& obj_probs,
std::vector<int>& class_ids) const {
int cls_param_num = cls_score.shape(1);
int feat_h = bbox_pred.shape(2);
int feat_w = bbox_pred.shape(3);
int feat_size = feat_h * feat_w;
auto bbox_ptr = bbox_pred.data<float>();
auto score_ptr = cls_score.data<float>(); // (b, c, h, w)
int valid_count = 0;
for (int i = 0; i < feat_h; i++) {
for (int j = 0; j < feat_w; j++) {
float max_score = score_ptr[i * feat_w + j];
int class_id = 0;
for (int k = 0; k < cls_param_num; k++) {
float score = score_ptr[k * feat_size + i * feat_w + j];
if (score > max_score) {
max_score = score;
class_id = k;
}
}
max_score = sigmoid(max_score);
if (max_score < score_thr_) continue;
obj_probs.push_back(max_score);
class_ids.push_back(class_id);
float tl_x = bbox_ptr[0 * feat_size + i * feat_w + j];
float tl_y = bbox_ptr[1 * feat_size + i * feat_w + j];
float br_x = bbox_ptr[2 * feat_size + i * feat_w + j];
float br_y = bbox_ptr[3 * feat_size + i * feat_w + j];
auto box = RTMDetdecode(tl_x, tl_y, br_x, br_y, stride, offset, j, i);
tl_x = box[0];
tl_y = box[1];
br_x = box[2];
br_y = box[3];
filter_boxes.push_back(tl_x);
filter_boxes.push_back(tl_y);
filter_boxes.push_back(br_x);
filter_boxes.push_back(br_y);
valid_count++;
}
}
return valid_count;
}
std::array<float, 4> RTMDetSepBNHead::RTMDetdecode(float tl_x, float tl_y, float br_x, float br_y,
float stride, float offset, int j, int i) const {
tl_x = (offset + j) * stride - tl_x;
tl_y = (offset + i) * stride - tl_y;
br_x = (offset + j) * stride + br_x;
br_y = (offset + i) * stride + br_y;
return std::array<float, 4>{tl_x, tl_y, br_x, br_y};
}
MMDEPLOY_REGISTER_CODEBASE_COMPONENT(MMDetection, RTMDetSepBNHead);
} // namespace mmdeploy::mmdet
// Copyright (c) OpenMMLab. All rights reserved.
#ifndef MMDEPLOY_CODEBASE_MMDET_RTMDET_HEAD_H_
#define MMDEPLOY_CODEBASE_MMDET_RTMDET_HEAD_H_
#include "mmdeploy/codebase/mmdet/mmdet.h"
#include "mmdeploy/core/tensor.h"
namespace mmdeploy::mmdet {
class RTMDetSepBNHead : public MMDetection {
public:
explicit RTMDetSepBNHead(const Value& cfg);
Result<Value> operator()(const Value& prep_res, const Value& infer_res);
Result<Detections> GetBBoxes(const Value& prep_res, const std::vector<Tensor>& bbox_preds,
const std::vector<Tensor>& cls_scores) const;
int RTMDetFeatDeocde(const Tensor& bbox_pred, const Tensor& cls_score, const float stride,
const float offset, std::vector<float>& filter_boxes,
std::vector<float>& obj_probs, std::vector<int>& class_ids) const;
std::array<float, 4> RTMDetdecode(float tl_x, float tl_y, float br_x, float br_y, float stride,
float offset, int j, int i) const;
private:
float score_thr_{0.4f};
int nms_pre_{1000};
float iou_threshold_{0.45f};
int min_bbox_size_{0};
int max_per_img_{100};
float offset_{0.0f};
std::vector<float> strides_;
};
} // namespace mmdeploy::mmdet
#endif // MMDEPLOY_CODEBASE_MMDET_RTMDET_HEAD_H_
// Copyright (c) OpenMMLab. All rights reserved.
#include "utils.h"
#include <algorithm>
#include <numeric>
using mmdeploy::framework::Tensor;
namespace mmdeploy::mmdet {
std::array<float, 4> MapToOriginImage(float left, float top, float right, float bottom,
const float* scale_factor, float x_offset, float y_offset,
int ori_width, int ori_height, int top_padding,
int left_padding) {
left = std::max((left - left_padding) / scale_factor[0] + x_offset, 0.f);
top = std::max((top - top_padding) / scale_factor[1] + y_offset, 0.f);
right = std::min((right - left_padding) / scale_factor[2] + x_offset, (float)ori_width - 1.f);
bottom = std::min((bottom - top_padding) / scale_factor[3] + y_offset, (float)ori_height - 1.f);
return {left, top, right, bottom};
}
void FilterScoresAndTopk(const Tensor& scores, float score_thr, int topk, std::vector<float>& probs,
std::vector<int>& label_ids, std::vector<int>& anchor_idxs) {
auto kDets = scores.shape(1);
auto kClasses = scores.shape(2);
auto score_ptr = scores.data<float>();
for (auto i = 0; i < kDets; ++i, score_ptr += kClasses) {
auto iter = std::max_element(score_ptr, score_ptr + kClasses);
auto max_score = *iter;
if (*iter < score_thr) {
continue;
}
probs.push_back(*iter);
label_ids.push_back(iter - score_ptr);
anchor_idxs.push_back(i);
}
}
float IOU(float xmin0, float ymin0, float xmax0, float ymax0, float xmin1, float ymin1, float xmax1,
float ymax1) {
auto w = std::max(0.f, std::min(xmax0, xmax1) - std::max(xmin0, xmin1));
auto h = std::max(0.f, std::min(ymax0, ymax1) - std::max(ymin0, ymin1));
auto area = w * h;
auto sum = (xmax0 - xmin0) * (ymax0 - ymin0) + (xmax1 - xmin1) * (ymax1 - ymin1);
auto iou = area / (sum - area);
return iou <= 0.f ? 0.f : iou;
}
void NMS(const Tensor& dets, float iou_threshold, std::vector<int>& keep_idxs) {
auto det_ptr = dets.data<float>();
for (auto i = 0; i < keep_idxs.size(); ++i) {
auto n = keep_idxs[i];
for (auto j = i + 1; j < keep_idxs.size(); ++j) {
auto m = keep_idxs[j];
// `delta_xywh_bbox_coder` decode return tl_x, tl_y, br_x, br_y
float xmin0 = det_ptr[n * 4 + 0];
float ymin0 = det_ptr[n * 4 + 1];
float xmax0 = det_ptr[n * 4 + 2];
float ymax0 = det_ptr[n * 4 + 3];
float xmin1 = det_ptr[m * 4 + 0];
float ymin1 = det_ptr[m * 4 + 1];
float xmax1 = det_ptr[m * 4 + 2];
float ymax1 = det_ptr[m * 4 + 3];
float iou = IOU(xmin0, ymin0, xmax0, ymax0, xmin1, ymin1, xmax1, ymax1);
if (iou > iou_threshold) {
keep_idxs[j] = -1;
}
}
}
}
void Sort(std::vector<float>& probs, std::vector<int>& label_ids, std::vector<int>& anchor_idxs) {
std::vector<int> prob_idxs(probs.size());
std::iota(prob_idxs.begin(), prob_idxs.end(), 0);
std::sort(prob_idxs.begin(), prob_idxs.end(), [&](int i, int j) { return probs[i] > probs[j]; });
std::vector<float> _probs;
std::vector<int> _label_ids;
std::vector<int> _keep_idxs;
for (auto idx : prob_idxs) {
_probs.push_back(probs[idx]);
_label_ids.push_back(label_ids[idx]);
_keep_idxs.push_back(anchor_idxs[idx]);
}
probs = std::move(_probs);
label_ids = std::move(_label_ids);
anchor_idxs = std::move(_keep_idxs);
}
} // namespace mmdeploy::mmdet
// Copyright (c) OpenMMLab. All rights reserved.
#ifndef MMDEPLOY_CODEBASE_MMDET_UTILS_H_
#define MMDEPLOY_CODEBASE_MMDET_UTILS_H_
#include <array>
#include <vector>
#include "mmdeploy/core/tensor.h"
namespace mmdeploy::mmdet {
std::array<float, 4> MapToOriginImage(float left, float top, float right, float bottom,
const float* scale_factor, float x_offset, float y_offset,
int ori_width, int ori_height, int top_padding,
int left_padding);
// @brief Filter results using score threshold and topk candidates.
// scores (Tensor): The scores, shape (num_bboxes, K).
// probs: The scores after being filtered
// label_ids: The class labels
// anchor_idxs: The anchor indexes
void FilterScoresAndTopk(const mmdeploy::framework::Tensor& scores, float score_thr, int topk,
std::vector<float>& probs, std::vector<int>& label_ids,
std::vector<int>& anchor_idxs);
float IOU(float xmin0, float ymin0, float xmax0, float ymax0, float xmin1, float ymin1, float xmax1,
float ymax1);
void Sort(std::vector<float>& probs, std::vector<int>& label_ids, std::vector<int>& anchor_idxs);
void NMS(const mmdeploy::framework::Tensor& dets, float iou_threshold, std::vector<int>& keep_idxs);
} // namespace mmdeploy::mmdet
#endif // MMDEPLOY_CODEBASE_MMDET_UTILS_H_
// Copyright (c) OpenMMLab. All rights reserved.
#include "yolo_head.h"
#include <math.h>
#include <algorithm>
#include <numeric>
#include "mmdeploy/core/model.h"
#include "mmdeploy/core/utils/device_utils.h"
#include "mmdeploy/core/utils/formatter.h"
#include "utils.h"
namespace mmdeploy::mmdet {
YOLOHead::YOLOHead(const Value& cfg) : MMDetection(cfg) {
auto init = [&]() -> Result<void> {
auto model = cfg["context"]["model"].get<Model>();
if (cfg.contains("params")) {
nms_pre_ = cfg["params"].value("nms_pre", -1);
score_thr_ = cfg["params"].value("score_thr", 0.02f);
min_bbox_size_ = cfg["params"].value("min_bbox_size", 0);
iou_threshold_ = cfg["params"].contains("nms")
? cfg["params"]["nms"].value("iou_threshold", 0.45f)
: 0.45f;
if (cfg["params"].contains("anchor_generator")) {
from_value(cfg["params"]["anchor_generator"]["base_sizes"], anchors_);
from_value(cfg["params"]["anchor_generator"]["strides"], strides_);
}
}
return success();
};
init().value();
}
Result<Value> YOLOHead::operator()(const Value& prep_res, const Value& infer_res) {
MMDEPLOY_DEBUG("prep_res: {}\ninfer_res: {}", prep_res, infer_res);
try {
const Device kHost{0, 0};
std::vector<Tensor> pred_maps;
for (auto iter = infer_res.begin(); iter != infer_res.end(); iter++) {
auto pred_map = iter->get<Tensor>();
OUTCOME_TRY(auto _pred_map, MakeAvailableOnDevice(pred_map, kHost, stream()));
pred_maps.push_back(_pred_map);
}
OUTCOME_TRY(stream().Wait());
// reorder pred_maps according to strides and anchors, mainly for rknpu yolov3
if ((pred_maps.size() > 1) &&
!((strides_[0] < strides_[1]) ^ (pred_maps[0].shape(3) < pred_maps[1].shape(3)))) {
std::reverse(pred_maps.begin(), pred_maps.end());
}
OUTCOME_TRY(auto result, GetBBoxes(prep_res["img_metas"], pred_maps));
return to_value(result);
} catch (...) {
return Status(eFail);
}
}
inline static int clamp(float val, int min, int max) {
return val > min ? (val < max ? val : max) : min;
}
static float sigmoid(float x) { return 1.0 / (1.0 + expf(-x)); }
static float unsigmoid(float y) { return -1.0 * logf((1.0 / y) - 1.0); }
int YOLOHead::YOLOFeatDecode(const Tensor& feat_map, const std::vector<std::vector<float>>& anchor,
int grid_h, int grid_w, int height, int width, int stride,
std::vector<float>& boxes, std::vector<float>& obj_probs,
std::vector<int>& class_id, float threshold) const {
auto input = const_cast<float*>(feat_map.data<float>());
auto prop_box_size = feat_map.shape(1) / anchor.size();
const int kClasses = prop_box_size - 5;
int valid_count = 0;
int grid_len = grid_h * grid_w;
float thres = unsigmoid(threshold);
for (int a = 0; a < anchor.size(); a++) {
for (int i = 0; i < grid_h; i++) {
for (int j = 0; j < grid_w; j++) {
float box_confidence = input[(prop_box_size * a + 4) * grid_len + i * grid_w + j];
if (box_confidence >= thres) {
int offset = (prop_box_size * a) * grid_len + i * grid_w + j;
float* in_ptr = input + offset;
float box_x = sigmoid(*in_ptr);
float box_y = sigmoid(in_ptr[grid_len]);
float box_w = in_ptr[2 * grid_len];
float box_h = in_ptr[3 * grid_len];
auto box = yolo_decode(box_x, box_y, box_w, box_h, stride, anchor, j, i, a);
box_x = box[0];
box_y = box[1];
box_w = box[2];
box_h = box[3];
box_x -= (box_w / 2.0);
box_y -= (box_h / 2.0);
boxes.push_back(box_x);
boxes.push_back(box_y);
boxes.push_back(box_x + box_w);
boxes.push_back(box_y + box_h);
float max_class_probs = in_ptr[5 * grid_len];
int max_class_id = 0;
for (int k = 1; k < kClasses; ++k) {
float prob = in_ptr[(5 + k) * grid_len];
if (prob > max_class_probs) {
max_class_id = k;
max_class_probs = prob;
}
}
obj_probs.push_back(sigmoid(max_class_probs) * sigmoid(box_confidence));
class_id.push_back(max_class_id);
valid_count++;
}
}
}
}
return valid_count;
}
Result<Detections> YOLOHead::GetBBoxes(const Value& prep_res,
const std::vector<Tensor>& pred_maps) const {
std::vector<float> filter_boxes;
std::vector<float> obj_probs;
std::vector<int> class_id;
int model_in_h = prep_res["img_shape"][1].get<int>();
int model_in_w = prep_res["img_shape"][2].get<int>();
for (int i = 0; i < pred_maps.size(); i++) {
int stride = strides_[i];
int grid_h = model_in_h / stride;
int grid_w = model_in_w / stride;
YOLOFeatDecode(pred_maps[i], anchors_[i], grid_h, grid_w, model_in_h, model_in_w, stride,
filter_boxes, obj_probs, class_id, score_thr_);
}
std::vector<int> indexArray;
for (int i = 0; i < obj_probs.size(); ++i) {
indexArray.push_back(i);
}
Sort(obj_probs, class_id, indexArray);
Tensor dets(TensorDesc{Device{0, 0}, DataType::kFLOAT,
TensorShape{int(filter_boxes.size() / 4), 4}, "dets"});
std::copy(filter_boxes.begin(), filter_boxes.end(), dets.data<float>());
NMS(dets, iou_threshold_, indexArray);
Detections objs;
std::vector<float> scale_factor;
if (prep_res.contains("scale_factor")) {
from_value(prep_res["scale_factor"], scale_factor);
} else {
scale_factor = {1.f, 1.f, 1.f, 1.f};
}
int top_padding = 0;
int left_padding = 0;
if (prep_res.contains("pad_param")) {
top_padding = prep_res["pad_param"][0].get<int>();
left_padding = prep_res["pad_param"][1].get<int>();
}
int ori_width = prep_res["ori_shape"][2].get<int>();
int ori_height = prep_res["ori_shape"][1].get<int>();
auto det_ptr = dets.data<float>();
for (int i = 0; i < indexArray.size(); ++i) {
if (indexArray[i] == -1) {
continue;
}
int j = indexArray[i];
auto x1 = clamp(det_ptr[j * 4 + 0], 0, model_in_w);
auto y1 = clamp(det_ptr[j * 4 + 1], 0, model_in_h);
auto x2 = clamp(det_ptr[j * 4 + 2], 0, model_in_w);
auto y2 = clamp(det_ptr[j * 4 + 3], 0, model_in_h);
int label_id = class_id[i];
float score = obj_probs[i];
MMDEPLOY_DEBUG("{}-th box: ({}, {}, {}, {}), {}, {}", i, x1, y1, x2, y2, label_id, score);
auto rect = MapToOriginImage(x1, y1, x2, y2, scale_factor.data(), 0, 0, ori_width, ori_height,
top_padding, left_padding);
if (rect[2] - rect[0] < min_bbox_size_ || rect[3] - rect[1] < min_bbox_size_) {
MMDEPLOY_DEBUG("ignore small bbox with width '{}' and height '{}", rect[2] - rect[0],
rect[3] - rect[1]);
continue;
}
Detection det{};
det.index = i;
det.label_id = label_id;
det.score = score;
det.bbox = rect;
objs.push_back(std::move(det));
}
return objs;
}
std::array<float, 4> YOLOV3Head::yolo_decode(float box_x, float box_y, float box_w, float box_h,
float stride,
const std::vector<std::vector<float>>& anchor, int j,
int i, int a) const {
box_x = (box_x + j) * stride;
box_y = (box_y + i) * stride;
box_w = expf(box_w) * anchor[a][0];
box_h = expf(box_h) * anchor[a][1];
return std::array<float, 4>{box_x, box_y, box_w, box_h};
}
std::array<float, 4> YOLOv5Head::yolo_decode(float box_x, float box_y, float box_w, float box_h,
float stride,
const std::vector<std::vector<float>>& anchor, int j,
int i, int a) const {
box_x = box_x * 2 - 0.5;
box_y = box_y * 2 - 0.5;
box_w = sigmoid(box_w) * 2;
box_h = sigmoid(box_h) * 2;
box_x = (box_x + j) * stride;
box_y = (box_y + i) * stride;
box_w = box_w * box_w * anchor[a][0];
box_h = box_h * box_h * anchor[a][1];
return std::array<float, 4>{box_x, box_y, box_w, box_h};
}
MMDEPLOY_REGISTER_CODEBASE_COMPONENT(MMDetection, YOLOV3Head);
MMDEPLOY_REGISTER_CODEBASE_COMPONENT(MMDetection, YOLOv5Head);
} // namespace mmdeploy::mmdet
// Copyright (c) OpenMMLab. All rights reserved.
#ifndef MMDEPLOY_CODEBASE_MMDET_YOLO_HEAD_H_
#define MMDEPLOY_CODEBASE_MMDET_YOLO_HEAD_H_
#include "mmdeploy/codebase/mmdet/mmdet.h"
#include "mmdeploy/core/tensor.h"
namespace mmdeploy::mmdet {
class YOLOHead : public MMDetection {
public:
explicit YOLOHead(const Value& cfg);
Result<Value> operator()(const Value& prep_res, const Value& infer_res);
int YOLOFeatDecode(const Tensor& feat_map, const std::vector<std::vector<float>>& anchor,
int grid_h, int grid_w, int height, int width, int stride,
std::vector<float>& boxes, std::vector<float>& obj_probs,
std::vector<int>& class_id, float threshold) const;
Result<Detections> GetBBoxes(const Value& prep_res, const std::vector<Tensor>& pred_maps) const;
virtual std::array<float, 4> yolo_decode(float box_x, float box_y, float box_w, float box_h,
float stride,
const std::vector<std::vector<float>>& anchor, int j,
int i, int a) const = 0;
private:
float score_thr_{0.4f};
int nms_pre_{1000};
float iou_threshold_{0.45f};
int min_bbox_size_{0};
std::vector<std::vector<std::vector<float>>> anchors_;
std::vector<float> strides_;
};
class YOLOV3Head : public YOLOHead {
public:
using YOLOHead::YOLOHead;
std::array<float, 4> yolo_decode(float box_x, float box_y, float box_w, float box_h, float stride,
const std::vector<std::vector<float>>& anchor, int j, int i,
int a) const override;
};
class YOLOv5Head : public YOLOHead {
public:
using YOLOHead::YOLOHead;
std::array<float, 4> yolo_decode(float box_x, float box_y, float box_w, float box_h, float stride,
const std::vector<std::vector<float>>& anchor, int j, int i,
int a) const override;
};
} // namespace mmdeploy::mmdet
#endif // MMDEPLOY_CODEBASE_MMDET_YOLO_HEAD_H_
# Copyright (c) OpenMMLab. All rights reserved.
project(mmdeploy_mmedit)
file(GLOB_RECURSE SRCS ${CMAKE_CURRENT_SOURCE_DIR} "*.cpp")
mmdeploy_add_module(${PROJECT_NAME} "${SRCS}")
target_link_libraries(${PROJECT_NAME}
PRIVATE mmdeploy_opencv_utils)
add_library(mmdeploy::mmedit ALIAS ${PROJECT_NAME})
set(MMDEPLOY_TASKS ${MMDEPLOY_TASKS} restorer CACHE INTERNAL "")
// Copyright (c) OpenMMLab. All rights reserved.
#include "mmdeploy/codebase/mmedit/mmedit.h"
#include "mmdeploy/core/registry.h"
namespace mmdeploy::mmedit {
MMDEPLOY_REGISTER_CODEBASE(MMEdit);
} // namespace mmdeploy::mmedit
// Copyright (c) OpenMMLab. All rights reserved.
#ifndef MMDEPLOY_SRC_CODEBASE_MMEDIT_MMEDIT_H_
#define MMDEPLOY_SRC_CODEBASE_MMEDIT_MMEDIT_H_
#include "mmdeploy/codebase/common.h"
#include "mmdeploy/core/device.h"
#include "mmdeploy/core/mat.h"
#include "mmdeploy/core/module.h"
#include "mmdeploy/core/serialization.h"
namespace mmdeploy::mmedit {
using RestorerOutput = Mat;
MMDEPLOY_DECLARE_CODEBASE(MMEdit, mmedit);
} // namespace mmdeploy::mmedit
#endif // MMDEPLOY_SRC_CODEBASE_MMEDIT_MMEDIT_H_
// Copyright (c) OpenMMLab. All rights reserved.
#include <opencv2/core.hpp>
#include "mmdeploy/codebase/mmedit/mmedit.h"
#include "mmdeploy/core/tensor.h"
#include "mmdeploy/core/utils/device_utils.h"
namespace mmdeploy::mmedit {
class TensorToImg : public MMEdit {
public:
explicit TensorToImg(const Value& cfg) : MMEdit(cfg) {}
Result<Value> operator()(const Value& input) {
auto upscale = input["output"].get<Tensor>();
OUTCOME_TRY(auto upscale_cpu, MakeAvailableOnDevice(upscale, kHOST, stream()));
OUTCOME_TRY(stream().Wait());
if (upscale.shape().size() == 4 && upscale.data_type() == DataType::kFLOAT) {
auto channels = static_cast<int>(upscale.shape(1));
auto height = static_cast<int>(upscale.shape(2));
auto width = static_cast<int>(upscale.shape(3));
// TODO: handle BGR <-> RGB conversion
OUTCOME_TRY(auto format, ChannelsToFormat(channels));
Mat mat(height, width, format, DataType::kINT8, kHOST);
cv::Mat_<float> mat_chw(channels, height * width, upscale_cpu.data<float>());
cv::Mat mat_hwc(height * width, channels, CV_32F);
cv::transpose(mat_chw, mat_hwc);
cv::Mat rescale_uint8(height, width, CV_8UC(channels), mat.data<uint8_t>());
mat_hwc = mat_hwc.reshape(channels, height);
// convert has saturate_cast inside
mat_hwc.convertTo(rescale_uint8, CV_8UC(channels), 255.f);
return mat;
} else {
MMDEPLOY_ERROR("unsupported `output` tensor, shape: {}, dtype: {}", upscale.shape(),
(int)upscale.data_type());
return Status(eNotSupported);
}
}
protected:
static Result<PixelFormat> ChannelsToFormat(int channels) {
switch (channels) {
case 1:
return PixelFormat::kGRAYSCALE;
case 3:
return PixelFormat::kRGB;
default:
return Status(eNotSupported);
}
}
static constexpr const Device kHOST{0, 0};
};
MMDEPLOY_REGISTER_CODEBASE_COMPONENT(MMEdit, TensorToImg);
} // namespace mmdeploy::mmedit
# Copyright (c) OpenMMLab. All rights reserved.
project(mmdeploy_mmocr)
aux_source_directory(${CMAKE_CURRENT_SOURCE_DIR} OCR_SRCS)
aux_source_directory(${CMAKE_SOURCE_DIR}/third_party/clipper CLIPPER_SRCS)
set(SRCS ${OCR_SRCS} ${CLIPPER_SRCS})
mmdeploy_add_module(${PROJECT_NAME} "${SRCS}")
add_subdirectory(cpu)
add_subdirectory(cuda)
target_include_directories(${PROJECT_NAME} PRIVATE
${CMAKE_SOURCE_DIR}/third_party/clipper)
target_link_libraries(${PROJECT_NAME} PRIVATE
mmdeploy_opencv_utils
mmdeploy_operation
mmdeploy::transform
mmdeploy::core)
add_library(mmdeploy::mmocr ALIAS ${PROJECT_NAME})
set(MMDEPLOY_TASKS ${MMDEPLOY_TASKS} text_detector text_recognizer CACHE INTERNAL "")
// Copyright (c) OpenMMLab. All rights reserved.
#include <algorithm>
#include <sstream>
#include "base_convertor.h"
#include "mmdeploy/core/device.h"
#include "mmdeploy/core/model.h"
#include "mmdeploy/core/registry.h"
#include "mmdeploy/core/tensor.h"
#include "mmdeploy/core/utils/device_utils.h"
#include "mmdeploy/core/utils/formatter.h"
#include "mmdeploy/core/value.h"
#include "mmdeploy/experimental/module_adapter.h"
#include "mmocr.h"
namespace mmdeploy::mmocr {
using std::string;
using std::vector;
class AttnConvertor : public BaseConvertor {
public:
explicit AttnConvertor(const Value& cfg) : BaseConvertor(cfg) {}
Result<Value> operator()(const Value& _data, const Value& _prob) {
auto d_conf = _prob["output"].get<Tensor>();
if (!(d_conf.shape().size() == 3 && d_conf.data_type() == DataType::kFLOAT)) {
MMDEPLOY_ERROR("unsupported `output` tensor, shape: {}, dtype: {}", d_conf.shape(),
(int)d_conf.data_type());
return Status(eNotSupported);
}
OUTCOME_TRY(auto h_conf, MakeAvailableOnDevice(d_conf, Device{0}, stream()));
OUTCOME_TRY(stream().Wait());
auto data = h_conf.data<float>();
auto shape = d_conf.shape();
auto w = static_cast<int>(shape[1]);
auto c = static_cast<int>(shape[2]);
auto [indexes, scores] = Tensor2Idx(data, w, c);
auto text = Idx2Str(indexes);
MMDEPLOY_DEBUG("text: {}", text);
TextRecognition output{text, scores};
return make_pointer(to_value(output));
}
std::pair<vector<int>, vector<float> > Tensor2Idx(const float* data, int w, int c) {
auto decode_len = w;
vector<int> indexes;
indexes.reserve(decode_len);
vector<float> scores;
scores.reserve(decode_len);
for (int t = 0; t < decode_len; ++t, data += c) {
vector<float> prob(data, data + c);
auto iter = max_element(begin(prob), end(prob));
auto index = static_cast<int>(iter - begin(prob));
if (index == end_idx_) break;
if (std::find(ignore_indexes_.begin(), ignore_indexes_.end(), index) ==
ignore_indexes_.end()) {
indexes.push_back(index);
scores.push_back(*iter);
}
}
return {indexes, scores};
}
};
MMDEPLOY_REGISTER_CODEBASE_COMPONENT(MMOCR, AttnConvertor);
} // namespace mmdeploy::mmocr
// Copyright (c) OpenMMLab. All rights reserved.
#include "base_convertor.h"
namespace mmdeploy::mmocr {
using std::string;
using std::unordered_map;
using std::unordered_set;
using std::vector;
BaseConvertor::BaseConvertor(const Value& cfg) : MMOCR(cfg) {
auto model = cfg["context"]["model"].get<Model>();
if (!cfg.contains("params")) {
MMDEPLOY_ERROR("'params' is required, but it's not in the config");
throw_exception(eInvalidArgument);
}
// BaseConverter
auto& _cfg = cfg["params"];
if (_cfg.contains("dict_file")) {
auto filename = _cfg["dict_file"].get<std::string>();
auto content = model.ReadFile(filename).value();
idx2char_ = SplitLines(content);
} else if (_cfg.contains("dict_list")) {
from_value(_cfg["dict_list"], idx2char_);
} else if (_cfg.contains("dict_type")) {
auto dict_type = _cfg["dict_type"].get<std::string>();
if (dict_type == "DICT36") {
idx2char_ = SplitChars(DICT36);
} else if (dict_type == "DICT90") {
idx2char_ = SplitChars(DICT90);
} else {
MMDEPLOY_ERROR("unknown dict_type: {}", dict_type);
throw_exception(eInvalidArgument);
}
} else {
MMDEPLOY_ERROR("either dict_file, dict_list or dict_type must be specified");
throw_exception(eInvalidArgument);
}
model_ = model;
// Update Dictionary
bool with_start = _cfg.value("with_start", false);
bool with_end = _cfg.value("with_end", false);
bool same_start_end = _cfg.value("same_start_end", false);
bool with_padding = _cfg.value("with_padding", false);
bool with_unknown = _cfg.value("with_unknown", false);
if (with_start && with_end && same_start_end) {
start_idx_ = static_cast<int>(idx2char_.size());
end_idx_ = start_idx_;
string start_end_token = _cfg.value("start_end_token", string("<BOS/EOS>"));
idx2char_.emplace_back(std::move(start_end_token));
} else {
if (with_start) {
start_idx_ = static_cast<int>(idx2char_.size());
string start_token = _cfg.value("start_token", string("<BOS>"));
idx2char_.emplace_back(std::move(start_token));
}
if (with_end) {
end_idx_ = static_cast<int>(idx2char_.size());
string end_token = _cfg.value("end_token", string("<EOS>"));
idx2char_.emplace_back(std::move(end_token));
}
}
if (with_padding) {
padding_idx_ = static_cast<int>(idx2char_.size());
string padding_token = _cfg.value("padding_token", string("<PAD>"));
idx2char_.emplace_back(std::move(padding_token));
}
if (with_unknown && _cfg.contains("unknown_token") && !_cfg["unknown_token"].is_null()) {
unknown_idx_ = static_cast<int>(idx2char_.size());
string unknown_token = _cfg.value("unknown_token", string("<UKN>"));
idx2char_.emplace_back(unknown_token);
}
// char2idx
for (int i = 0; i < (int)idx2char_.size(); i++) {
char2idx_[idx2char_[i]] = i;
}
vector<string> ignore_chars;
if (cfg.contains("ignore_chars")) {
for (int i = 0; i < cfg["ignore_chars"].size(); i++)
ignore_chars.emplace_back(cfg["ignore_chars"][i].get<string>());
} else {
ignore_chars.emplace_back("padding");
}
std::map<string, int> mapping_table = {
{"padding", padding_idx_}, {"end", end_idx_}, {"unknown", unknown_idx_}};
for (int i = 0; i < ignore_chars.size(); i++) {
const auto& ignore_char = ignore_chars[i];
int ignore_idx = -1;
if (auto it_default = mapping_table.find(ignore_char); it_default != mapping_table.end()) {
ignore_idx = it_default->second;
} else if (auto it_candidate = char2idx_.find(ignore_char); it_candidate != char2idx_.end()) {
ignore_idx = it_candidate->second;
} else if (with_unknown) {
ignore_idx = unknown_idx_;
}
if (ignore_idx == -1 || (ignore_idx == unknown_idx_ && ignore_char != "unknown")) {
MMDEPLOY_WARN("{} does not exist in the dictionary", ignore_char);
continue;
}
ignore_indexes_.insert(ignore_idx);
}
}
string BaseConvertor::Idx2Str(const vector<int>& indexes) {
size_t count = 0;
for (const auto& idx : indexes) {
if (idx >= idx2char_.size()) {
MMDEPLOY_ERROR("idx exceeds array bounds {} {}", idx, idx2char_.size());
}
count += idx2char_[idx].size();
}
std::string text;
text.reserve(count);
for (const auto& idx : indexes) {
text += idx2char_[idx];
}
return text;
}
vector<string> BaseConvertor::SplitLines(const string& s) {
std::istringstream is(s);
vector<string> ret;
string line;
while (std::getline(is, line)) {
ret.push_back(std::move(line));
}
return ret;
}
vector<string> BaseConvertor::SplitChars(const string& s) {
vector<string> ret;
ret.reserve(s.size());
for (char c : s) {
ret.push_back({c});
}
return ret;
}
} // namespace mmdeploy::mmocr
// Copyright (c) OpenMMLab. All rights reserved.
#include <algorithm>
#include <sstream>
#include <unordered_map>
#include <unordered_set>
#include "mmdeploy/core/device.h"
#include "mmdeploy/core/model.h"
#include "mmdeploy/core/registry.h"
#include "mmdeploy/core/tensor.h"
#include "mmdeploy/core/utils/device_utils.h"
#include "mmdeploy/core/utils/formatter.h"
#include "mmdeploy/core/value.h"
#include "mmdeploy/experimental/module_adapter.h"
#include "mmocr.h"
namespace mmdeploy::mmocr {
using std::string;
using std::unordered_map;
using std::unordered_set;
using std::vector;
class BaseConvertor : public MMOCR {
public:
explicit BaseConvertor(const Value& cfg);
string Idx2Str(const vector<int>& indexes);
virtual Result<Value> operator()(const Value& _data, const Value& _prob) = 0;
protected:
static vector<string> SplitLines(const string& s);
static vector<string> SplitChars(const string& s);
static constexpr const auto DICT36 = R"(0123456789abcdefghijklmnopqrstuvwxyz)";
static constexpr const auto DICT90 = R"(0123456789abcdefghijklmnopqrstuvwxyz)"
R"(ABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&'())"
R"(*+,-./:;<=>?@[\]_`~)";
static constexpr const auto kHost = Device(0);
Model model_;
int padding_idx_{-1};
int end_idx_{-1};
int start_idx_{-1};
int unknown_idx_{-1};
unordered_set<int> ignore_indexes_;
unordered_map<string, int> char2idx_;
vector<string> idx2char_;
}; // class BaseConvertor
} // namespace mmdeploy::mmocr
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment