Commit 546b4279 authored by limm's avatar limm
Browse files

add csrc and mmdeploy module

parent 502f4fb9
Pipeline #2810 canceled with stages
// Copyright (c) OpenMMLab. All rights reserved
// Modified from https://github.com/whai362/PSENet
// and
// https://github.com/open-mmlab/mmcv/blob/master/mmcv/ops/csrc/pytorch/contour_expand.cpp
#include <cmath>
#include <iostream>
#include <queue>
#include <vector>
#include "mmdeploy/core/tensor.h"
#include "opencv2/imgproc/imgproc.hpp"
namespace mmdeploy::mmocr {
using namespace std;
using cv::Mat_;
class Point2d {
public:
int x;
int y;
Point2d() : x(0), y(0) {}
Point2d(int _x, int _y) : x(_x), y(_y) {}
};
void kernel_dilate(const uint8_t* data, int kernel_num, int height, int width, const int* label_map,
int label_num, const float* score_map, int min_area, Mat_<int32_t>& text_labels,
vector<int>& text_areas, vector<float>& text_scores,
vector<vector<int>>& text_points) {
text_labels = Mat_<int32_t>::zeros(height, width);
text_areas = vector<int>(label_num);
text_scores = vector<float>(label_num);
text_points = vector<vector<int>>(label_num);
for (int x = 0; x < height; ++x) {
for (int y = 0; y < width; ++y) {
int label = label_map[x * width + y];
if (label == 0) continue;
text_areas[label] += 1;
text_scores[label] += score_map[x * width + y];
text_points[label].push_back(y);
text_points[label].push_back(x);
}
}
queue<Point2d> queue, next_queue;
for (int x = 0; x < height; ++x) {
auto row = text_labels[x];
for (int y = 0; y < width; ++y) {
int label = label_map[x * width + y];
if (label == 0) continue;
if (text_areas[label] < min_area) continue;
Point2d point(x, y);
queue.push(point);
row[y] = label;
}
}
const int dx[] = {-1, 1, 0, 0};
const int dy[] = {0, 0, -1, 1};
vector<int> kernel_step(kernel_num);
std::for_each(kernel_step.begin(), kernel_step.end(), [=](int& k) { return k * height * width; });
for (int kernel_id = kernel_num - 2; kernel_id >= 0; --kernel_id) {
while (!queue.empty()) {
Point2d point = queue.front();
queue.pop();
int x = point.x;
int y = point.y;
int label = text_labels[x][y];
bool is_edge = true;
for (int d = 0; d < 4; ++d) {
int tmp_x = x + dx[d];
int tmp_y = y + dy[d];
if (tmp_x < 0 || tmp_x >= height) continue;
if (tmp_y < 0 || tmp_y >= width) continue;
int kernel_value = data[kernel_step[kernel_id] + tmp_x * width + tmp_y];
if (kernel_value == 0) continue;
if (text_labels[tmp_x][tmp_y] > 0) continue;
Point2d point(tmp_x, tmp_y);
queue.push(point);
text_labels[tmp_x][tmp_y] = label;
text_areas[label] += 1;
text_scores[label] += score_map[tmp_x * width + tmp_y];
text_points[label].push_back(tmp_y);
text_points[label].push_back(tmp_x);
is_edge = false;
}
if (is_edge) {
next_queue.push(point);
}
}
swap(queue, next_queue);
}
for (int i = 1; i < label_num; ++i) {
if (text_areas[i]) {
text_scores[i] /= static_cast<float>(text_areas[i]);
}
}
}
void contour_expand(const Mat_<uint8_t>& kernel_masks, const Mat_<int32_t>& kernel_label,
const Mat_<float>& score, int min_kernel_area, int kernel_num,
vector<int>& text_areas, vector<float>& text_scores,
vector<vector<int>>& text_points) {
assert(kernel_masks.cols == kernel_label.total());
assert(score.size() == kernel_label.size());
auto ptr_data = kernel_masks.ptr<uint8_t>();
auto data_score_map = score.ptr<float>();
auto data_label_map = kernel_label.ptr<int32_t>();
vector<vector<int>> text_line;
Mat_<int32_t> text_labels;
kernel_dilate(ptr_data, kernel_masks.rows, kernel_label.rows, kernel_label.cols, data_label_map,
kernel_num, data_score_map, min_kernel_area, text_labels, text_areas, text_scores,
text_points);
}
} // namespace mmdeploy::mmocr
# Copyright (c) OpenMMLab. All rights reserved.
project(mmdeploy_mmocr_cpu_impl CXX)
if ("cpu" IN_LIST MMDEPLOY_TARGET_DEVICES)
add_library(${PROJECT_NAME} OBJECT dbnet.cpp panet.cpp psenet.cpp)
set_target_properties(${PROJECT_NAME} PROPERTIES POSITION_INDEPENDENT_CODE 1)
if (NOT (MMDEPLOY_SHARED_LIBS OR MSVC))
target_compile_options(${PROJECT_NAME} PRIVATE $<$<COMPILE_LANGUAGE:CXX>:-fvisibility=hidden>)
endif ()
target_link_libraries(${PROJECT_NAME} PRIVATE
mmdeploy_opencv_utils
mmdeploy::core)
target_link_libraries(mmdeploy_mmocr PRIVATE ${PROJECT_NAME})
mmdeploy_export(${PROJECT_NAME})
endif ()
// Copyright (c) OpenMMLab. All rights reserved.
#include "mmdeploy/codebase/mmocr/dbnet.h"
#include "mmdeploy/core/utils/device_utils.h"
#include "opencv2/imgproc.hpp"
namespace mmdeploy::mmocr {
class DbHeadCpuImpl : public DbHeadImpl {
public:
void Init(const Stream& stream) override {
DbHeadImpl::Init(stream);
device_ = Device("cpu");
}
Result<void> Process(Tensor prob, float mask_thr, int max_candidates,
std::vector<std::vector<cv::Point>>& points,
std::vector<float>& scores) override {
OUTCOME_TRY(auto conf, MakeAvailableOnDevice(prob, device_, stream_));
OUTCOME_TRY(stream_.Wait());
auto h = conf.shape(1);
auto w = conf.shape(2);
auto data = conf.data<float>();
cv::Mat score_map((int)h, (int)w, CV_32F, data);
cv::Mat text_mask = score_map >= mask_thr;
std::vector<std::vector<cv::Point>> contours;
cv::findContours(text_mask, contours, cv::RETR_LIST, cv::CHAIN_APPROX_SIMPLE);
if (contours.size() > max_candidates) {
contours.resize(max_candidates);
}
for (auto& poly : contours) {
auto epsilon = 0.01 * cv::arcLength(poly, true);
std::vector<cv::Point> approx;
cv::approxPolyDP(poly, approx, epsilon, true);
if (approx.size() < 4) {
continue;
}
auto score = box_score_fast(score_map, approx);
points.push_back(approx);
scores.push_back(score);
}
return success();
}
static float box_score_fast(const cv::Mat& bitmap, const std::vector<cv::Point>& box) noexcept {
auto rect = cv::boundingRect(box) & cv::Rect({}, bitmap.size());
cv::Mat mask(rect.size(), CV_8U, cv::Scalar(0));
cv::fillPoly(mask, std::vector<std::vector<cv::Point>>{box}, 1, cv::LINE_8, 0, -rect.tl());
auto mean = cv::mean(bitmap(rect), mask)[0];
return static_cast<float>(mean);
}
Device device_;
};
MMDEPLOY_REGISTER_FACTORY_FUNC(DbHeadImpl, (cpu, 0),
[] { return std::make_unique<DbHeadCpuImpl>(); });
} // namespace mmdeploy::mmocr
// Copyright (c) OpenMMLab. All rights reserved.
#include "mmdeploy/codebase/mmocr/panet.h"
#include "opencv2/imgproc.hpp"
namespace mmdeploy::mmocr {
class PaHeadCpuImpl : public PaHeadImpl {
public:
Result<void> Process(Tensor text_pred, //
Tensor kernel_pred, //
Tensor embed_pred, //
float min_text_confidence, //
float min_kernel_confidence, //
cv::Mat_<float>& text_score, //
cv::Mat_<uint8_t>& text, //
cv::Mat_<uint8_t>& kernel, //
cv::Mat_<int>& label, //
cv::Mat_<float>& embed, //
int& region_num) override {
OUTCOME_TRY(stream_.Wait());
auto height = static_cast<int>(text_pred.shape(1));
auto width = static_cast<int>(text_pred.shape(2));
text_score = cv::Mat_<float>(height, width, text_pred.data<float>());
sigmoid(text_score);
text = text_score > min_text_confidence;
cv::Mat_<float> kernel_score(height, width, kernel_pred.data<float>());
sigmoid(kernel_score);
kernel = kernel_score > min_kernel_confidence & text;
auto n_embed_channels = static_cast<int>(embed_pred.shape(0));
embed = cv::Mat_<float>(n_embed_channels, height * width, embed_pred.data<float>());
cv::transpose(embed, embed);
region_num = cv::connectedComponents(kernel, label, 4, CV_32S);
return success();
}
static void sigmoid(cv::Mat_<float>& score) {
cv::exp(-score, score);
score = 1 / (1 + score);
}
};
MMDEPLOY_REGISTER_FACTORY_FUNC(PaHeadImpl, (cpu, 0),
[] { return std::make_unique<PaHeadCpuImpl>(); });
} // namespace mmdeploy::mmocr
// Copyright (c) OpenMMLab. All rights reserved.
#include "mmdeploy/codebase/mmocr/psenet.h"
#include "mmdeploy/core/utils/device_utils.h"
#include "opencv2/imgproc.hpp"
namespace mmdeploy::mmocr {
class PseHeadCpuImpl : public PseHeadImpl {
public:
PseHeadCpuImpl() : device_(0) {}
Result<void> Process(Tensor preds, //
float min_kernel_confidence, //
cv::Mat_<float>& score, //
cv::Mat_<uint8_t>& masks, //
cv::Mat_<int>& label, //
int& region_num) override {
OUTCOME_TRY(preds, MakeAvailableOnDevice(preds, device_, stream_));
OUTCOME_TRY(stream_.Wait());
auto channels = static_cast<int>(preds.shape(0));
auto height = static_cast<int>(preds.shape(1));
auto width = static_cast<int>(preds.shape(2));
cv::Mat_<float> probs(channels, height * width, preds.data<float>());
sigmoid(probs);
probs.row(0).reshape(1, height).copyTo(score);
masks = probs > min_kernel_confidence;
for (int i = 1; i < channels; ++i) {
masks.row(i) &= masks.row(0);
}
cv::Mat_<uint8_t> kernel = masks.row(channels - 1).reshape(1, height);
region_num = cv::connectedComponents(kernel, label, 4, CV_32S);
return success();
}
static void sigmoid(cv::Mat_<float>& score) {
cv::exp(-score, score);
score = 1 / (1 + score);
}
private:
Device device_;
};
MMDEPLOY_REGISTER_FACTORY_FUNC(PseHeadImpl, (cpu, 0),
[] { return std::make_unique<PseHeadCpuImpl>(); });
} // namespace mmdeploy::mmocr
// Copyright (c) OpenMMLab. All rights reserved.
#include <algorithm>
#include <sstream>
#include "base_convertor.h"
#include "mmdeploy/core/device.h"
#include "mmdeploy/core/model.h"
#include "mmdeploy/core/registry.h"
#include "mmdeploy/core/tensor.h"
#include "mmdeploy/core/utils/device_utils.h"
#include "mmdeploy/core/utils/formatter.h"
#include "mmdeploy/core/value.h"
#include "mmdeploy/experimental/module_adapter.h"
#include "mmocr.h"
namespace mmdeploy::mmocr {
using std::string;
using std::vector;
class CTCConvertor : public BaseConvertor {
public:
explicit CTCConvertor(const Value& cfg) : BaseConvertor(cfg) {}
Result<Value> operator()(const Value& _data, const Value& _prob) {
auto d_conf = _prob["output"].get<Tensor>();
if (!(d_conf.shape().size() == 3 && d_conf.data_type() == DataType::kFLOAT)) {
MMDEPLOY_ERROR("unsupported `output` tensor, shape: {}, dtype: {}", d_conf.shape(),
(int)d_conf.data_type());
return Status(eNotSupported);
}
OUTCOME_TRY(auto h_conf, MakeAvailableOnDevice(d_conf, Device{0}, stream()));
OUTCOME_TRY(stream().Wait());
auto data = h_conf.data<float>();
auto shape = d_conf.shape();
auto w = static_cast<int>(shape[1]);
auto c = static_cast<int>(shape[2]);
auto valid_ratio = _data["img_metas"]["valid_ratio"].get<float>();
auto [indexes, scores] = Tensor2Idx(data, w, c, valid_ratio);
auto text = Idx2Str(indexes);
MMDEPLOY_DEBUG("text: {}", text);
TextRecognition output{text, scores};
return make_pointer(to_value(output));
}
std::pair<vector<int>, vector<float> > Tensor2Idx(const float* data, int w, int c,
float valid_ratio) {
auto decode_len = std::min(w, static_cast<int>(std::ceil(w * valid_ratio)));
vector<int> indexes;
indexes.reserve(decode_len);
vector<float> scores;
scores.reserve(decode_len);
int prev = padding_idx_;
for (int t = 0; t < decode_len; ++t, data += c) {
vector<float> prob(data, data + c);
auto iter = max_element(begin(prob), end(prob));
auto index = static_cast<int>(iter - begin(prob));
if (index != prev && ignore_indexes_.find(index) == ignore_indexes_.end()) {
indexes.push_back(index);
scores.push_back(*iter);
}
prev = index;
}
return {indexes, scores};
}
};
MMDEPLOY_REGISTER_CODEBASE_COMPONENT(MMOCR, CTCConvertor);
} // namespace mmdeploy::mmocr
# Copyright (c) OpenMMLab. All rights reserved.
project(mmdeploy_mmocr_cuda_impl)
if ("cuda" IN_LIST MMDEPLOY_TARGET_DEVICES)
add_library(${PROJECT_NAME} OBJECT
connected_component.cu
utils.cu
dbnet.cpp
panet.cpp
psenet.cpp)
set_target_properties(${PROJECT_NAME} PROPERTIES POSITION_INDEPENDENT_CODE 1)
target_include_directories(${PROJECT_NAME} PRIVATE
${CUDA_INCLUDE_DIRS})
target_link_libraries(${PROJECT_NAME} PRIVATE
mmdeploy_opencv_utils
mmdeploy::core
${CUDA_LIBRARIES}
cuda)
target_link_libraries(mmdeploy_mmocr PRIVATE
${PROJECT_NAME})
mmdeploy_export(${PROJECT_NAME})
endif ()
// Copyright (c) OpenMMLab. All rights reserved
// implementation based on "A new Direct Connected Component Labeling and Analysis Algorithms for
// GPUs"
// https://ieeexplore.ieee.org/document/8596835
#include <vector>
#include "connected_component.h"
#include "thrust/for_each.h"
#include "thrust/iterator/counting_iterator.h"
namespace mmdeploy {
__device__ int start_distance(unsigned pixels, int tx) {
unsigned v = ~(pixels << (32 - tx));
return __clz(reinterpret_cast<int&>(v));
}
__device__ int end_distance(unsigned pixels, int tx) {
unsigned v = ~(pixels >> (tx + 1));
return __ffs(reinterpret_cast<int&>(v));
}
template <typename T>
__device__ void swap(T& x, T& y) {
T tmp = x;
x = y;
y = tmp;
}
__device__ void merge(int* label, int u, int v) {
// find root of u
while (u != v && u != label[u]) {
u = label[u];
}
// find root of v
while (u != v && v != label[v]) {
v = label[v];
}
while (u != v) {
// post-condition: u > v
if (u < v) swap(u, v);
// try to set label[u] = v
auto w = atomicMin(label + u, v);
// if u is modified by other threads, try again
u = u == w ? v : w;
}
}
__host__ __device__ int div_up(int x, int y) { return (x + y - 1) / y; }
__host__ __device__ int round_up(int x, int y) { return div_up(x, y) * y; }
template <int block_w, int block_h>
__global__ void LabelStripsKernel(const uint8_t* mask, int h, int w, int* label) {
__shared__ unsigned shared_pixels[block_h];
auto tx = static_cast<int>(threadIdx.x);
auto ty = static_cast<int>(threadIdx.y);
auto x0 = tx + static_cast<int>(blockIdx.x * blockDim.x);
auto y0 = ty + static_cast<int>(blockIdx.y * blockDim.y);
auto w_32 = round_up(w, 32);
for (auto y = y0; y < h; y += blockDim.y * gridDim.y) {
//* 0 -> current line
//* 1 -> line above
int distance0 = 0;
int distance1 = 0;
for (auto x = x0; x < w_32; x += blockDim.x * gridDim.x) {
unsigned active = __ballot_sync(0xffffffff, x < w);
if (x < w) {
auto key = y * w + x;
auto p0 = mask[y * w + x];
auto pixels0 = __ballot_sync(active, p0);
auto s_dist0 = start_distance(pixels0, tx);
if (p0 && s_dist0 == 0) {
auto l = tx ? key : key - distance0;
label[y * w + x] = static_cast<int>(l);
}
if (tx == 0) {
shared_pixels[ty] = pixels0;
}
__syncthreads();
auto pixels1 = ty ? shared_pixels[ty - 1] : 0;
int p1 = (pixels1 & (1 << tx));
int s_dist1 = start_distance(pixels1, tx);
if (tx == 0) {
s_dist0 = distance0;
s_dist1 = distance1;
}
if (p0 && p1 && (s_dist0 == 0 || s_dist1 == 0)) {
int label0 = key - s_dist0;
int label1 = key - w - s_dist1;
merge(label, label0, label1);
}
auto d1 = start_distance(pixels1, 32);
distance1 = d1 == 32 ? d1 + distance1 : d1;
auto d0 = start_distance(pixels0, 32);
distance0 = d0 == 32 ? d0 + distance0 : d0;
}
}
}
}
__global__ void MergeStripsKernel(const uint8_t* mask, int h, int w, int* label) {
auto tx = threadIdx.x;
auto ty = threadIdx.y;
auto x0 = tx + blockIdx.x * blockDim.x;
auto y0 = ty + blockIdx.y * blockDim.y;
auto w_32 = round_up(w, 32);
for (auto y = y0; y < h; y += blockDim.y * gridDim.y) {
if (y > 0) {
for (auto x = x0; x < w_32; x += blockDim.x * gridDim.x) {
unsigned active = __ballot_sync(0xffffffff, x < w);
if (x < w) {
auto key0 = y * w + x;
auto key1 = key0 - w;
auto p0 = mask[key0];
auto p1 = mask[key1];
auto pixels0 = __ballot_sync(active, p0);
auto pixels1 = __ballot_sync(active, p1);
if (p0 && p1) {
auto s_dist0 = start_distance(pixels0, tx);
auto s_dist1 = start_distance(pixels1, tx);
if (s_dist0 == 0 || s_dist1 == 0) {
merge(label, key0 - s_dist0, key1 - s_dist1);
}
}
}
}
}
}
}
__device__ int encode(int label) { return -2 - label; }
__device__ int decode(int label) { return -2 - label; }
struct _discretize_label_op {
int* label;
int* n_comp;
__device__ void operator()(int index) const {
if (label[index] == index) {
auto comp = atomicAdd(n_comp, 1);
label[index] = encode(comp);
}
}
};
struct _decode_label_op {
const int* label;
int* output;
__device__ void operator()(int index) const {
auto comp = label[index];
output[index] = comp < -1 ? decode(comp) + 1 : 0;
}
};
__global__ void RelabelStripsKernel(const uint8_t* mask, int h, int w, int* label) {
auto tx = threadIdx.x;
auto ty = threadIdx.y;
auto x0 = tx + blockIdx.x * blockDim.x;
auto y0 = ty + blockIdx.y * blockDim.y;
const auto stride_x = static_cast<int>(blockDim.x * gridDim.x);
const auto stride_y = static_cast<int>(blockDim.y * gridDim.y);
const auto w_32 = round_up(w, 32);
for (auto y = y0; y < h; y += stride_y) {
for (auto x = x0; x < w_32; x += stride_x) {
unsigned active = __ballot_sync(0xffffffff, x < w);
if (x < w) {
auto k = y * w + x;
auto p = mask[k];
auto pixels = __ballot_sync(active, p);
auto s_dist = start_distance(pixels, tx);
auto idx = 0;
if (p && s_dist == 0) {
idx = label[k];
while (idx > 0) {
idx = label[idx];
}
}
idx = __shfl_sync(active, idx, tx - s_dist);
if (p) {
label[k] = idx;
}
}
}
}
}
__global__ void ComputeStatsKernel_v2(const uint8_t* mask, const int* label, const float* score,
int h, int w, float* comp_score, int* comp_area) {
auto tx = threadIdx.x;
auto ty = threadIdx.y;
auto x0 = tx + blockIdx.x * blockDim.x;
auto y0 = ty + blockIdx.y * blockDim.y;
const auto stride_x = static_cast<int>(blockDim.x * gridDim.x);
const auto stride_y = static_cast<int>(blockDim.y * gridDim.y);
const auto w_32 = round_up(w, 32);
for (auto y = y0; y < h; y += stride_y) {
for (auto x = x0; x < w_32; x += stride_x) {
unsigned active = __ballot_sync(0xffffffff, x < w);
if (x < w) {
auto k = y * w + x;
auto p = mask[k];
auto pixels = __ballot_sync(active, p);
auto s_dist = start_distance(pixels, tx);
auto count = end_distance(pixels, tx);
float s = p ? score[k] : 0;
for (int offset = 16; offset > 0; offset /= 2) {
auto v = __shfl_down_sync(active, s, offset);
// mask out past-the-end items
s += offset < count ? v : 0.f;
}
if (p && s_dist == 0) {
auto idx = decode(label[k]);
atomicAdd(comp_area + idx, count);
atomicAdd(comp_score + idx, s);
}
}
}
}
}
__global__ void GetContoursKernel(const int* label, int h, int w, int2* contour, int* size) {
const auto x0 = static_cast<int>(threadIdx.x + blockIdx.x * blockDim.x);
const auto y0 = static_cast<int>(threadIdx.y + blockIdx.y * blockDim.y);
const auto stride_x = static_cast<int>(blockDim.x * gridDim.x);
const auto stride_y = static_cast<int>(blockDim.y * gridDim.y);
for (auto y = y0; y < h; y += stride_y) {
for (auto x = x0; x < w; x += stride_x) {
const auto index = y * w + x;
// encoded label
const auto comp = label[index];
if (comp < -1) {
// non-linear filters
const auto l = x > 0 && label[index - 1] == comp;
const auto t = y > 0 && label[index - w] == comp;
const auto r = x < w - 1 && label[index + 1] == comp;
const auto b = y < h - 1 && label[index + w] == comp;
const auto tl = y > 0 && x > 0 && label[index - w - 1] == comp;
const auto tr = y > 0 && x < w - 1 && label[index - w + 1] == comp;
const auto bl = y < h - 1 && x > 0 && label[index + w - 1] == comp;
const auto br = y < h - 1 && x < w - 1 && label[index + w + 1] == comp;
if (!((l && r) || (t && b) || (tl && br) || (tr && bl))) {
const auto p = atomicAdd(size, 1);
contour[p] = {index, decode(comp)};
}
}
}
}
}
struct ConnectedComponents::Impl {
public:
explicit Impl(cudaStream_t stream);
void Resize(int height, int width);
int GetComponents(const uint8_t* d_mask, int* h_label);
void GetContours(std::vector<std::vector<cv::Point>>& corners);
void GetStats(const uint8_t* d_mask, const float* d_score, std::vector<float>& scores,
std::vector<int>& areas);
~Impl();
int* d_label_{nullptr};
float* d_comp_score_{nullptr};
int* d_comp_area_{nullptr};
int* d_contour_{nullptr}; // int2
int* d_contour_size_{nullptr};
int* d_n_comp_{nullptr};
int n_comp_{0};
int height_{0};
int width_{0};
size_t size_{0};
size_t capacity_{0};
double growth_factor_{1.1};
cudaStream_t stream_{nullptr};
bool owned_stream_{false};
};
int ConnectedComponents::Impl::GetComponents(const uint8_t* d_mask, int* h_label) {
{
dim3 threads(32, 4);
dim3 blocks(1, div_up(height_, (int)threads.y));
cudaMemsetAsync(d_label_, -1, sizeof(int) * size_, stream_);
LabelStripsKernel<32, 4><<<blocks, threads, 0, stream_>>>(d_mask, height_, width_, d_label_);
}
{
dim3 threads(32, 4);
dim3 blocks(div_up(width_, (int)threads.x), div_up(height_, (int)threads.y));
MergeStripsKernel<<<blocks, threads, 0, stream_>>>(d_mask, height_, width_, d_label_);
cudaMemsetAsync(d_n_comp_, 0, sizeof(int), stream_);
thrust::for_each_n(thrust::cuda::par.on(stream_), thrust::counting_iterator<int>(0),
height_ * width_, _discretize_label_op{d_label_, d_n_comp_});
RelabelStripsKernel<<<blocks, threads, 0, stream_>>>(d_mask, height_, width_, d_label_);
}
cudaMemcpyAsync(&n_comp_, d_n_comp_, sizeof(int), cudaMemcpyDefault, stream_);
if (h_label) {
dim3 threads(32, 4);
dim3 blocks(div_up(width_, (int)threads.x), div_up(height_, (int)threads.y));
// reuse d_comp_area_, which is also an int buffer
thrust::for_each_n(thrust::cuda::par.on(stream_), thrust::counting_iterator<int>(0),
height_ * width_, _decode_label_op{d_label_, d_comp_area_});
cudaMemcpyAsync(h_label, d_comp_area_, sizeof(int) * size_, cudaMemcpyDefault, stream_);
}
cudaStreamSynchronize(stream_);
return n_comp_;
}
void ConnectedComponents::Impl::GetStats(const uint8_t* d_mask, const float* d_score,
std::vector<float>& scores, std::vector<int>& areas) {
cudaMemsetAsync(d_comp_score_, 0, sizeof(float) * size_, stream_);
cudaMemsetAsync(d_comp_area_, 0, sizeof(int) * size_, stream_);
dim3 threads(32, 4);
dim3 blocks(div_up(width_, (int)threads.x), div_up(height_, (int)threads.y));
ComputeStatsKernel_v2<<<blocks, threads, 0, stream_>>>(d_mask, d_label_, d_score, height_, width_,
d_comp_score_, d_comp_area_);
scores.resize(n_comp_);
areas.resize(n_comp_);
cudaMemcpyAsync(scores.data(), d_comp_score_, sizeof(float) * n_comp_, cudaMemcpyDefault,
stream_);
cudaMemcpyAsync(areas.data(), d_comp_area_, sizeof(int) * n_comp_, cudaMemcpyDefault, stream_);
cudaStreamSynchronize(stream_);
}
void ConnectedComponents::Impl::GetContours(std::vector<std::vector<cv::Point>>& corners) {
cudaMemsetAsync(d_contour_size_, 0, sizeof(int), stream_);
auto d_contour = reinterpret_cast<int2*>(d_contour_);
{
dim3 threads(32, 4);
dim3 blocks(div_up(width_, (int)threads.x), div_up(height_, (int)threads.y));
GetContoursKernel<<<blocks, threads, 0, stream_>>>(d_label_, height_, width_, d_contour,
d_contour_size_);
}
int contour_size{};
cudaMemcpyAsync(&contour_size, d_contour_size_, sizeof(int), cudaMemcpyDefault, stream_);
cudaStreamSynchronize(stream_);
std::vector<int2> index_comp(contour_size);
cudaMemcpyAsync(index_comp.data(), d_contour_, sizeof(int2) * contour_size, cudaMemcpyDefault,
stream_);
cudaStreamSynchronize(stream_);
corners.resize(n_comp_);
for (const auto& p : index_comp) {
auto comp = p.y;
assert(0 <= comp && comp < n_comp_);
corners[comp].emplace_back(p.x % width_, p.x / width_);
}
}
void ConnectedComponents::Impl::Resize(int height, int width) {
size_t size = height * width;
if (size > capacity_) {
if (!capacity_) {
capacity_ = size;
} else {
while (capacity_ < size) {
capacity_ *= growth_factor_;
}
}
cudaFree(d_label_);
cudaFree(d_comp_score_);
cudaFree(d_comp_area_);
cudaFree(d_contour_);
cudaMalloc(&d_label_, sizeof(int) * capacity_);
cudaMalloc(&d_comp_score_, sizeof(float) * capacity_);
cudaMalloc(&d_comp_area_, sizeof(int) * capacity_);
cudaMalloc(&d_contour_, sizeof(int2) * capacity_);
}
if (!d_contour_size_) {
cudaMalloc(&d_contour_size_, sizeof(int));
}
if (!d_n_comp_) {
cudaMalloc(&d_n_comp_, sizeof(int));
}
height_ = height;
width_ = width;
size_ = size;
}
ConnectedComponents::Impl::Impl(cudaStream_t stream) : stream_(stream) {
if (!stream_) {
cudaStreamCreate(&stream_);
owned_stream_ = true;
}
}
ConnectedComponents::Impl::~Impl() {
cudaFree(d_label_);
cudaFree(d_comp_score_);
cudaFree(d_comp_area_);
cudaFree(d_contour_);
cudaFree(d_contour_size_);
cudaFree(d_n_comp_);
if (owned_stream_) {
cudaStreamDestroy(stream_);
}
}
ConnectedComponents::ConnectedComponents(void* stream)
: impl_(std::make_unique<Impl>((cudaStream_t)stream)) {}
ConnectedComponents::~ConnectedComponents() = default;
void ConnectedComponents::Resize(int height, int width) { impl_->Resize(height, width); }
int ConnectedComponents::GetComponents(const uint8_t* d_mask, int* h_label) {
return impl_->GetComponents(d_mask, h_label);
}
void ConnectedComponents::GetContours(std::vector<std::vector<cv::Point>>& corners) {
return impl_->GetContours(corners);
}
void ConnectedComponents::GetStats(const uint8_t* d_mask, const float* d_score,
std::vector<float>& scores, std::vector<int>& areas) {
return impl_->GetStats(d_mask, d_score, scores, areas);
}
} // namespace mmdeploy
// Copyright (c) OpenMMLab. All rights reserved
#ifndef FAST_CC__CONNECTED_COMPONENT_H_
#define FAST_CC__CONNECTED_COMPONENT_H_
#include <cstdint>
#include <memory>
#include <utility>
#include <vector>
#include "opencv2/core.hpp"
namespace mmdeploy {
class ConnectedComponents {
public:
explicit ConnectedComponents(void* stream);
~ConnectedComponents();
void Resize(int height, int width);
int GetComponents(const uint8_t* d_mask, int* h_label);
void GetContours(std::vector<std::vector<cv::Point>>& corners);
void GetStats(const uint8_t* d_mask, const float* d_score, std::vector<float>& scores,
std::vector<int>& areas);
private:
struct Impl;
std::unique_ptr<Impl> impl_;
};
} // namespace mmdeploy
#endif // FAST_CC__CONNECTED_COMPONENT_H_
// Copyright (c) OpenMMLab. All rights reserved.
#include "mmdeploy/codebase/mmocr/dbnet.h"
#include "mmdeploy/codebase/mmocr/cuda/connected_component.h"
#include "mmdeploy/codebase/mmocr/cuda/utils.h"
#include "mmdeploy/core/utils/device_utils.h"
#include "mmdeploy/device/cuda/cuda_device.h"
#include "opencv2/imgproc.hpp"
namespace mmdeploy::mmocr {
class DbHeadCudaImpl : public DbHeadImpl {
public:
void Init(const Stream& stream) override {
DbHeadImpl::Init(stream);
device_ = stream_.GetDevice();
{
CudaDeviceGuard device_guard(device_);
cc_.emplace(GetNative<cudaStream_t>(stream_));
}
}
~DbHeadCudaImpl() override {
CudaDeviceGuard device_guard(device_);
cc_.reset();
}
Result<void> Process(Tensor score, float mask_thr, int max_candidates,
std::vector<std::vector<cv::Point>>& contours,
std::vector<float>& scores) override {
CudaDeviceGuard device_guard(device_);
auto height = static_cast<int>(score.shape(1));
auto width = static_cast<int>(score.shape(2));
Buffer mask(device_, score.size() * sizeof(uint8_t));
auto score_data = score.data<float>();
auto mask_data = GetNative<uint8_t*>(mask);
dbnet::Threshold(score_data, height * width, mask_thr, mask_data,
GetNative<cudaStream_t>(stream_));
cc_->Resize(height, width);
cc_->GetComponents(mask_data, nullptr);
std::vector<std::vector<cv::Point>> points;
cc_->GetContours(points);
std::vector<float> _scores;
std::vector<int> _areas;
cc_->GetStats(mask_data, score_data, _scores, _areas);
if (points.size() > max_candidates) {
points.resize(max_candidates);
}
for (int i = 0; i < points.size(); ++i) {
std::vector<cv::Point> hull;
cv::convexHull(points[i], hull);
if (hull.size() < 4) {
continue;
}
contours.push_back(hull);
scores.push_back(_scores[i] / (float)_areas[i]);
}
return success();
}
private:
Device device_;
std::optional<ConnectedComponents> cc_;
};
MMDEPLOY_REGISTER_FACTORY_FUNC(DbHeadImpl, (cuda, 0),
[] { return std::make_unique<DbHeadCudaImpl>(); });
} // namespace mmdeploy::mmocr
// Copyright (c) OpenMMLab. All rights reserved.
#include "mmdeploy/codebase/mmocr/panet.h"
#include "mmdeploy/codebase/mmocr/cuda/connected_component.h"
#include "mmdeploy/codebase/mmocr/cuda/utils.h"
#include "mmdeploy/device/cuda/cuda_device.h"
namespace mmdeploy::mmocr {
class PaHeadCudaImpl : public PaHeadImpl {
public:
void Init(const Stream& stream) override {
PaHeadImpl::Init(stream);
device_ = stream.GetDevice();
{
CudaDeviceGuard device_guard(device_);
cc_.emplace(GetNative<cudaStream_t>(stream_));
}
}
~PaHeadCudaImpl() override {
CudaDeviceGuard device_guard(device_);
cc_.reset();
}
Result<void> Process(Tensor text_pred, //
Tensor kernel_pred, //
Tensor embed_pred, //
float min_text_confidence, //
float min_kernel_confidence, //
cv::Mat_<float>& text_score, //
cv::Mat_<uint8_t>& text, //
cv::Mat_<uint8_t>& kernel, //
cv::Mat_<int>& label, //
cv::Mat_<float>& embed, //
int& region_num) override {
CudaDeviceGuard device_guard(device_);
auto height = static_cast<int>(text_pred.shape(1));
auto width = static_cast<int>(text_pred.shape(2));
Buffer text_buf(device_, height * width);
Buffer text_score_buf(device_, height * width * sizeof(float));
Buffer kernel_buf(device_, height * width);
auto text_buf_data = GetNative<uint8_t*>(text_buf);
auto text_score_buf_data = GetNative<float*>(text_score_buf);
auto kernel_buf_data = GetNative<uint8_t*>(kernel_buf);
auto stream = GetNative<cudaStream_t>(stream_);
panet::ProcessMasks(text_pred.data<float>(), //
kernel_pred.data<float>(), //
min_text_confidence, //
min_kernel_confidence, //
height * width, //
text_buf_data, //
kernel_buf_data, //
text_score_buf_data, //
stream);
auto n_embed_channels = embed_pred.shape(0);
Buffer embed_buf(device_, embed_pred.byte_size());
panet::Transpose(embed_pred.data<float>(), //
n_embed_channels, //
height * width, //
GetNative<float*>(embed_buf), //
stream);
label = cv::Mat_<int>(height, width);
cc_->Resize(height, width);
region_num = cc_->GetComponents(kernel_buf_data, label.ptr<int>()) + 1;
text_score = cv::Mat_<float>(label.size());
OUTCOME_TRY(stream_.Copy(text_score_buf, text_score.data));
text = cv::Mat_<uint8_t>(label.size());
OUTCOME_TRY(stream_.Copy(text_buf, text.data));
kernel = cv::Mat_<uint8_t>(label.size());
OUTCOME_TRY(stream_.Copy(kernel_buf, kernel.data));
embed = cv::Mat_<float>(height * width, n_embed_channels);
OUTCOME_TRY(stream_.Copy(embed_buf, embed.data));
OUTCOME_TRY(stream_.Wait());
return success();
}
private:
Device device_;
std::optional<ConnectedComponents> cc_;
};
MMDEPLOY_REGISTER_FACTORY_FUNC(PaHeadImpl, (cuda, 0),
[] { return std::make_unique<PaHeadCudaImpl>(); });
} // namespace mmdeploy::mmocr
// Copyright (c) OpenMMLab. All rights reserved.
#include "mmdeploy/codebase/mmocr/psenet.h"
#include "mmdeploy/codebase/mmocr/cuda/connected_component.h"
#include "mmdeploy/codebase/mmocr/cuda/utils.h"
#include "mmdeploy/core/utils/device_utils.h"
#include "mmdeploy/device/cuda/cuda_device.h"
#include "opencv2/imgproc.hpp"
namespace mmdeploy::mmocr {
class PseHeadCudaImpl : public PseHeadImpl {
public:
void Init(const Stream& stream) override {
PseHeadImpl::Init(stream);
device_ = stream.GetDevice();
{
CudaDeviceGuard device_guard(device_);
cc_.emplace(GetNative<cudaStream_t>(stream_));
}
}
~PseHeadCudaImpl() override {
CudaDeviceGuard device_guard(device_);
cc_.reset();
}
Result<void> Process(Tensor preds, //
float min_kernel_confidence, //
cv::Mat_<float>& score, //
cv::Mat_<uint8_t>& masks, //
cv::Mat_<int>& label, //
int& region_num) override {
CudaDeviceGuard device_guard(device_);
OUTCOME_TRY(preds, MakeAvailableOnDevice(preds, device_, stream_));
OUTCOME_TRY(stream_.Wait());
auto channels = static_cast<int>(preds.shape(0));
auto height = static_cast<int>(preds.shape(1));
auto width = static_cast<int>(preds.shape(2));
Buffer masks_buf(device_, preds.size());
Buffer score_buf(device_, height * width * sizeof(float));
auto masks_data = GetNative<uint8_t*>(masks_buf);
auto score_data = GetNative<float*>(score_buf);
psenet::ProcessMasks(preds.data<float>(), channels, height * width, min_kernel_confidence,
masks_data, score_data, GetNative<cudaStream_t>(stream_));
cc_->Resize(height, width);
label = cv::Mat_<int>(height, width);
auto kernel_mask_data = masks_data + height * width * (channels - 1);
region_num = cc_->GetComponents(kernel_mask_data, label.ptr<int>()) + 1;
score = cv::Mat_<float>(label.size());
OUTCOME_TRY(stream_.Copy(score_buf, score.ptr<float>()));
masks = cv::Mat_<uint8_t>(channels, height * width);
OUTCOME_TRY(stream_.Copy(masks_buf, masks.ptr<uint8_t>()));
return success();
}
private:
Device device_;
std::optional<ConnectedComponents> cc_;
};
MMDEPLOY_REGISTER_FACTORY_FUNC(PseHeadImpl, (cuda, 0),
[] { return std::make_unique<PseHeadCudaImpl>(); });
} // namespace mmdeploy::mmocr
// Copyright (c) OpenMMLab. All rights reserved.
#include "mmdeploy/codebase/mmocr/cuda/utils.h"
#include "thrust/iterator/counting_iterator.h"
#include "thrust/transform.h"
namespace mmdeploy {
namespace mmocr {
__device__ float sigmoid(float x) { return 1.f / (1.f + expf(-x)); }
namespace panet {
struct _process_masks_op {
const float* text_pred;
const float* kernel_pred;
float text_thr;
float kernel_thr;
uint8_t* text_mask;
uint8_t* kernel_mask;
float* text_score;
__device__ void operator()(int index) const {
auto text_sigmoid = sigmoid(text_pred[index]);
auto kernel_sigmoid = sigmoid(kernel_pred[index]);
text_score[index] = text_sigmoid;
auto text_valid = text_sigmoid > text_thr;
text_mask[index] = text_valid ? 255 : 0;
kernel_mask[index] = (text_valid && kernel_sigmoid > kernel_thr) ? 255 : 0;
}
};
void ProcessMasks(const float* d_text_pred, const float* d_kernel_pred, float text_thr,
float kernel_thr, int n, uint8_t* d_text_mask, uint8_t* d_kernel_mask,
float* d_text_score, cudaStream_t stream) {
thrust::for_each_n(thrust::cuda::par.on(stream), thrust::counting_iterator<int>(0), n,
_process_masks_op{d_text_pred, d_kernel_pred, text_thr, kernel_thr,
d_text_mask, d_kernel_mask, d_text_score});
}
struct _transpose_op {
const float* input;
float* output;
int h;
int w;
__device__ void operator()(int index) const {
int i = index / w;
int j = index % w;
output[j * h + i] = input[index];
}
};
void Transpose(const float* d_input, int h, int w, float* d_output, cudaStream_t stream) {
thrust::for_each_n(thrust::cuda::par.on(stream), thrust::counting_iterator<int>(0), h * w,
_transpose_op{d_input, d_output, h, w});
}
} // namespace panet
namespace dbnet {
struct _threshold_op {
float thr;
__device__ bool operator()(float score) const { return score >= thr; }
};
void Threshold(const float* d_score, int n, float thr, uint8_t* d_mask, cudaStream_t stream) {
thrust::transform(thrust::cuda::par.on(stream), d_score, d_score + n, d_mask, _threshold_op{thr});
}
} // namespace dbnet
namespace psenet {
struct _process_masks_op {
const float* preds;
int c;
int n;
float thr;
uint8_t* masks;
float* score;
__device__ void operator()(int index) const {
bool m0 = false;
for (int i = 0; i < c; ++i) {
auto v = sigmoid(preds[i * n + index]);
if (i == 0) {
score[index] = v;
m0 = v > thr;
}
masks[i * n + index] = (m0 && v > thr) ? 255 : 0;
}
}
};
void ProcessMasks(const float* d_preds, int c, int n, float thr, uint8_t* d_masks, float* d_score,
cudaStream_t stream) {
thrust::for_each_n(thrust::cuda::par.on(stream), thrust::counting_iterator<int>(0), n,
_process_masks_op{d_preds, c, n, thr, d_masks, d_score});
}
} // namespace psenet
} // namespace mmocr
} // namespace mmdeploy
// Copyright (c) OpenMMLab. All rights reserved.
#ifndef MMDEPLOY_CSRC_CODEBASE_MMOCR_CUDA_UTILS_H_
#define MMDEPLOY_CSRC_CODEBASE_MMOCR_CUDA_UTILS_H_
#include <cstdint>
#include "cuda_runtime.h"
namespace mmdeploy {
namespace mmocr {
namespace panet {
void ProcessMasks(const float* d_text_pred, const float* d_kernel_pred, float text_thr,
float kernel_thr, int n, uint8_t* d_text_mask, uint8_t* d_kernel_mask,
float* d_text_score, cudaStream_t stream);
void Transpose(const float* d_input, int h, int w, float* d_output, cudaStream_t stream);
} // namespace panet
namespace dbnet {
void Threshold(const float* d_score, int n, float thr, uint8_t* d_mask, cudaStream_t stream);
}
namespace psenet {
void ProcessMasks(const float* d_preds, int c, int n, float thr, uint8_t* d_masks, float* d_score,
cudaStream_t stream);
}
} // namespace mmocr
} // namespace mmdeploy
#endif // MMDEPLOY_CSRC_CODEBASE_MMOCR_CUDA_UTILS_H_
// Copyright (c) OpenMMLab. All rights reserved.
#include "mmdeploy/codebase/mmocr/dbnet.h"
#include <opencv2/imgproc.hpp>
#include "clipper.hpp"
#include "mmdeploy/core/device.h"
#include "mmdeploy/core/tensor.h"
#include "mmdeploy/core/utils/formatter.h"
#include "mmdeploy/experimental/module_adapter.h"
#include "mmocr.h"
namespace mmdeploy {
namespace mmocr {
using std::string;
using std::vector;
class DBHead : public MMOCR {
public:
explicit DBHead(const Value& config) : MMOCR(config) {
if (config.contains("params")) {
auto& params = config["params"];
text_repr_type_ = params.value("text_repr_type", text_repr_type_);
mask_thr_ = params.value("mask_thr", mask_thr_);
min_text_score_ = params.value("min_text_score", min_text_score_);
min_text_width_ = params.value("min_text_width", min_text_width_);
unclip_ratio_ = params.value("unclip_ratio", unclip_ratio_);
max_candidates_ = params.value("max_candidate", max_candidates_);
rescale_ = params.value("rescale", rescale_);
downsample_ratio_ = params.value("downsample_ratio", downsample_ratio_);
}
auto platform = Platform(device_.platform_id()).GetPlatformName();
auto creator = gRegistry<DbHeadImpl>().Get(platform);
if (!creator) {
MMDEPLOY_ERROR(
"DBHead: implementation for platform \"{}\" not found. Available platforms: {}", platform,
gRegistry<DbHeadImpl>().List());
throw_exception(eEntryNotFound);
}
impl_ = creator->Create();
impl_->Init(stream_);
}
Result<Value> operator()(const Value& _data, const Value& _prob) const {
auto conf = _prob["output"].get<Tensor>();
if (!(conf.shape().size() == 3 && conf.data_type() == DataType::kFLOAT)) {
MMDEPLOY_ERROR("unsupported `output` tensor, shape: {}, dtype: {}", conf.shape(),
(int)conf.data_type());
return Status(eNotSupported);
}
std::vector<std::vector<cv::Point>> contours;
std::vector<float> scores;
OUTCOME_TRY(impl_->Process(conf, mask_thr_, max_candidates_, contours, scores));
auto scale_w = 1.f;
auto scale_h = 1.f;
if (rescale_) {
scale_w /= downsample_ratio_ * _data["img_metas"]["scale_factor"][0].get<float>();
scale_h /= downsample_ratio_ * _data["img_metas"]["scale_factor"][1].get<float>();
}
TextDetections output;
for (int idx = 0; idx < contours.size(); ++idx) {
if (scores[idx] < min_text_score_) {
continue;
}
auto expanded = unclip(contours[idx], unclip_ratio_);
if (expanded.empty()) {
continue;
}
auto rect = cv::minAreaRect(expanded);
if ((int)rect.size.width <= min_text_width_) {
continue;
}
std::array<cv::Point2f, 4> box_points;
rect.points(box_points.data());
auto& det = output.emplace_back();
for (int i = 0; i < 4; ++i) {
// ! performance metrics drops without rounding here
det.bbox[i * 2] = cvRound(box_points[i].x * scale_w);
det.bbox[i * 2 + 1] = cvRound(box_points[i].y * scale_h);
}
det.score = scores[idx];
}
return to_value(output);
}
static std::vector<cv::Point> unclip(std::vector<cv::Point>& box, float unclip_ratio) {
namespace cl = ClipperLib;
auto area = cv::contourArea(box);
auto length = cv::arcLength(box, true);
auto distance = area * unclip_ratio / length;
cl::Path src;
transform(begin(box), end(box), back_inserter(src), [](auto p) {
return cl::IntPoint{p.x, p.y};
});
cl::ClipperOffset offset;
offset.AddPath(src, cl::jtRound, cl::etClosedPolygon);
std::vector<cl::Path> dst;
offset.Execute(dst, distance);
if (dst.size() != 1) {
return {};
}
std::vector<cv::Point> ret;
transform(begin(dst[0]), end(dst[0]), back_inserter(ret), [](auto p) {
return cv::Point{static_cast<int>(p.X), static_cast<int>(p.Y)};
});
return ret;
}
std::string text_repr_type_{"quad"};
float mask_thr_{.3};
float min_text_score_{.3};
int min_text_width_{5};
float unclip_ratio_{1.5};
int max_candidates_{3000};
bool rescale_{true};
float downsample_ratio_{1.};
std::unique_ptr<DbHeadImpl> impl_;
};
MMDEPLOY_REGISTER_CODEBASE_COMPONENT(MMOCR, DBHead);
MMDEPLOY_DEFINE_REGISTRY(DbHeadImpl);
} // namespace mmocr
} // namespace mmdeploy
// Copyright (c) OpenMMLab. All rights reserved.
#ifndef MMDEPLOY_CSRC_CODEBASE_MMOCR_DBNET_H_
#define MMDEPLOY_CSRC_CODEBASE_MMOCR_DBNET_H_
#include "mmdeploy/codebase/mmocr/mmocr.h"
#include "mmdeploy/core/device.h"
#include "mmdeploy/core/registry.h"
#include "mmdeploy/core/tensor.h"
#include "opencv2/core.hpp"
namespace mmdeploy::mmocr {
class DbHeadImpl {
public:
virtual ~DbHeadImpl() = default;
virtual void Init(const Stream& stream) { stream_ = stream; }
virtual Result<void> Process(Tensor prob, float mask_thr, int max_candidates,
std::vector<std::vector<cv::Point>>& points,
std::vector<float>& scores) = 0;
protected:
Stream stream_;
};
MMDEPLOY_DECLARE_REGISTRY(DbHeadImpl, std::unique_ptr<DbHeadImpl>());
} // namespace mmdeploy::mmocr
#endif // MMDEPLOY_CSRC_CODEBASE_MMOCR_DBNET_H_
// Copyright (c) OpenMMLab. All rights reserved.
#include "mmdeploy/codebase/mmocr/mmocr.h"
#include "mmdeploy/core/registry.h"
#include "mmdeploy/core/utils/formatter.h"
namespace mmdeploy::mmocr {
MMDEPLOY_REGISTER_CODEBASE(MMOCR);
} // namespace mmdeploy::mmocr
// Copyright (c) OpenMMLab. All rights reserved.
#ifndef MMDEPLOY_MMOCR_H
#define MMDEPLOY_MMOCR_H
#include <array>
#include "mmdeploy/codebase/common.h"
#include "mmdeploy/core/device.h"
#include "mmdeploy/core/module.h"
namespace mmdeploy::mmocr {
struct TextDetection {
std::array<float, 8> bbox;
float score;
MMDEPLOY_ARCHIVE_MEMBERS(bbox, score);
};
using TextDetections = std::vector<TextDetection>;
struct TextRecognition {
std::string text;
std::vector<float> score;
MMDEPLOY_ARCHIVE_MEMBERS(text, score);
};
MMDEPLOY_DECLARE_CODEBASE(MMOCR, mmocr);
} // namespace mmdeploy::mmocr
#endif // MMDEPLOY_MMOCR_H
// Copyright (c) OpenMMLab. All rights reserved.
#include "mmdeploy/codebase/mmocr/panet.h"
#include <algorithm>
#include "mmdeploy/codebase/mmocr/mmocr.h"
#include "mmdeploy/core/device.h"
#include "mmdeploy/core/registry.h"
#include "mmdeploy/core/serialization.h"
#include "mmdeploy/core/utils/device_utils.h"
#include "opencv2/imgproc/imgproc.hpp"
namespace mmdeploy::mmocr {
std::vector<std::vector<float>> pixel_group_cpu(const cv::Mat_<float>& score,
const cv::Mat_<uint8_t>& mask,
const cv::Mat_<float>& embedding,
const cv::Mat_<int32_t>& kernel_label,
const cv::Mat_<uint8_t>& kernel_contour,
int kernel_region_num, float dis_threshold);
class PANHead : public MMOCR {
public:
explicit PANHead(const Value& config) : MMOCR(config) {
if (config.contains("params")) {
auto& params = config["params"];
min_text_avg_confidence_ = params.value("min_text_avg_confidence", min_text_avg_confidence_);
min_kernel_confidence_ = params.value("min_kernel_confidence", min_kernel_confidence_);
min_text_avg_confidence_ = params.value("min_text_avg_confidence", min_text_avg_confidence_);
min_text_area_ = params.value("min_text_area", min_text_area_);
rescale_ = params.value("rescale", rescale_);
downsample_ratio_ = params.value("downsample_ratio", downsample_ratio_);
}
auto platform = Platform(device_.platform_id()).GetPlatformName();
auto creator = gRegistry<PaHeadImpl>().Get(platform);
if (!creator) {
MMDEPLOY_ERROR(
"PANHead: implementation for platform \"{}\" not found. Available platforms: {}",
platform, gRegistry<PaHeadImpl>().List());
throw_exception(eEntryNotFound);
}
impl_ = creator->Create();
impl_->Init(stream_);
}
Result<Value> operator()(const Value& _data, const Value& _pred) noexcept {
OUTCOME_TRY(auto pred, MakeAvailableOnDevice(_pred["output"].get<Tensor>(), device_, stream_));
OUTCOME_TRY(stream_.Wait());
if (pred.shape().size() != 4 || pred.shape(0) != 1 || pred.data_type() != DataType::kFLOAT) {
MMDEPLOY_ERROR("unsupported `output` tensor, shape: {}, dtype: {}", pred.shape(),
(int)pred.data_type());
return Status(eNotSupported);
}
// drop batch dimension
pred.Squeeze(0);
auto text_pred = pred.Slice(0);
auto kernel_pred = pred.Slice(1);
auto embed_pred = pred.Slice(2, pred.shape(0));
cv::Mat_<float> text_score;
cv::Mat_<uint8_t> text;
cv::Mat_<uint8_t> kernel;
cv::Mat_<int> labels;
cv::Mat_<float> embed;
int region_num = 0;
OUTCOME_TRY(impl_->Process(text_pred, kernel_pred, embed_pred, min_text_confidence_,
min_kernel_confidence_, text_score, text, kernel, labels, embed,
region_num));
auto text_points = pixel_group_cpu(text_score, text, embed, labels, kernel, region_num,
min_text_avg_confidence_);
auto scale_w = _data["img_metas"]["scale_factor"][0].get<float>();
auto scale_h = _data["img_metas"]["scale_factor"][1].get<float>();
TextDetections output;
for (auto& text_point : text_points) {
auto text_confidence = text_point[0];
auto area = text_point.size() - 2;
if (filter_instance(static_cast<float>(area), text_confidence, min_text_area_,
min_text_avg_confidence_)) {
continue;
}
cv::Mat_<float> points(text_point.size() / 2 - 1, 2, text_point.data() + 2);
cv::RotatedRect rect = cv::minAreaRect(points);
std::vector<cv::Point2f> vertices(4);
rect.points(vertices.data());
if (rescale_) {
for (auto& p : vertices) {
p.x /= scale_w * downsample_ratio_;
p.y /= scale_h * downsample_ratio_;
}
}
auto& det = output.emplace_back();
for (int i = 0; i < 4; ++i) {
det.bbox[i * 2] = vertices[i].x;
det.bbox[i * 2 + 1] = vertices[i].y;
}
det.score = text_confidence;
}
return to_value(output);
}
static bool filter_instance(float area, float confidence, float min_area, float min_confidence) {
return area < min_area || confidence < min_confidence;
}
float min_text_confidence_{.5f};
float min_kernel_confidence_{.5f};
float min_text_avg_confidence_{0.85};
float min_text_area_{16};
bool rescale_{true};
float downsample_ratio_{.25f};
std::unique_ptr<PaHeadImpl> impl_;
};
MMDEPLOY_REGISTER_CODEBASE_COMPONENT(MMOCR, PANHead);
MMDEPLOY_DEFINE_REGISTRY(PaHeadImpl);
} // namespace mmdeploy::mmocr
// Copyright (c) OpenMMLab. All rights reserved.
#ifndef MMDEPLOY_CSRC_CODEBASE_MMOCR_PANET_H_
#define MMDEPLOY_CSRC_CODEBASE_MMOCR_PANET_H_
#include "mmdeploy/codebase/mmocr/mmocr.h"
#include "mmdeploy/core/device.h"
#include "mmdeploy/core/registry.h"
#include "mmdeploy/core/tensor.h"
#include "opencv2/core.hpp"
namespace mmdeploy::mmocr {
class PaHeadImpl {
public:
virtual ~PaHeadImpl() = default;
virtual void Init(const Stream& stream) { stream_ = stream; }
virtual Result<void> Process(Tensor text_pred, //
Tensor kernel_pred, //
Tensor embed_pred, //
float min_text_confidence, //
float min_kernel_confidence, //
cv::Mat_<float>& text_score, //
cv::Mat_<uint8_t>& text, //
cv::Mat_<uint8_t>& kernel, //
cv::Mat_<int>& label, //
cv::Mat_<float>& embed, //
int& region_num) = 0;
protected:
Stream stream_;
};
MMDEPLOY_DECLARE_REGISTRY(PaHeadImpl, std::unique_ptr<PaHeadImpl>());
} // namespace mmdeploy::mmocr
#endif // MMDEPLOY_CSRC_CODEBASE_MMOCR_PANET_H_
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment