Commit 546b4279 authored by limm's avatar limm
Browse files

add csrc and mmdeploy module

parent 502f4fb9
Pipeline #2810 canceled with stages
// Copyright (c) OpenMMLab. All rights reserved.
#include "gather.h"
#include "../ncnn_ops_definer.h"
#include "assert.h"
namespace mmdeploy {
using namespace ncnn;
DEFINE_LAYER_CREATOR(Gather)
DEFINE_NCNN_OPS(Gather, Gather)
Gather::Gather() {
one_blob_only = false;
support_inplace = false;
}
int Gather::load_param(const ParamDict &pd) {
axis = pd.get(0, 0);
return 0;
}
// Gather only support 1-dim of indices, because the data and indices all has
// implicit batch in ncnn, this will lead to wrong shape to match onnx result.
// When indices dim equals to 1, after eliminating implicit batch, the indices
// dim still be 1. So there is only 1 implicit batch in data, this will make
// the shape match onnx result.
int Gather::forward(const std::vector<Mat> &bottom_blobs, std::vector<Mat> &top_blobs,
const Option &opt) const {
const Mat &bottom_blob = bottom_blobs[0];
const Mat &indices = bottom_blobs[1];
int dims = bottom_blob.dims;
int indices_dims = indices.dims;
size_t elemsize = bottom_blob.elemsize;
int positive_axis = axis < 0 ? dims + axis : axis;
Mat &top_blob = top_blobs[0];
assert(indices.dims == 1);
const float *indices_ptr = indices;
if (dims == 1 && indices_dims == 1) // positive_axis == 0
{
int w = indices.w;
top_blob.create(w, elemsize, opt.blob_allocator);
if (top_blob.empty()) {
return -100;
}
const float *ptr = bottom_blob;
float *outptr = top_blob;
for (int i = 0; i < w; i++) {
float indice = indices_ptr[i];
outptr[i] = ptr[(int)(indice + 0.5)];
}
return 0;
}
if (dims == 2 && positive_axis == 0 && indices_dims == 1) {
int w = bottom_blob.w;
int h = bottom_blob.h;
top_blob.create(w, indices.w, elemsize, opt.blob_allocator);
// w -> w
// h -> indices.w
// h * w -> indices.w * w
if (top_blob.empty()) {
return -100;
}
const float *ptr = bottom_blob;
float *outptr = top_blob;
for (int i = 0; i < indices.w; i++) {
const int selected = (int)(indices_ptr[i] + 0.5);
memcpy(top_blob.row(i), bottom_blob.row(selected), w * elemsize);
}
return 0;
}
if (dims == 2 && positive_axis == 1 && indices_dims == 1) {
int w = bottom_blob.w;
int h = bottom_blob.h;
top_blob.create(indices.w, h, elemsize, opt.blob_allocator);
// w -> h
// h -> indices.w
// h * w -> indices.w * h
if (top_blob.empty()) {
return -100;
}
const float *ptr = bottom_blob;
float *outptr = top_blob;
for (int j = 0; j < h; j++) {
for (int i = 0; i < indices.w; i++) {
int selected = (int)(indices_ptr[i] + 0.5);
outptr[j * indices.w + i] = ptr[j * w + selected];
}
}
return 0;
}
if (dims == 3 && positive_axis == 0 && indices_dims == 1) {
int w = bottom_blob.w;
int h = bottom_blob.h;
int channels = bottom_blob.c;
top_blob.create(w, h, indices.w, elemsize, opt.blob_allocator);
if (top_blob.empty()) {
return -100;
}
for (int i = 0; i < indices.w; i++) {
int selected = (int)(indices_ptr[i] + 0.5);
const unsigned char *ptr = bottom_blob.channel(selected);
unsigned char *outptr = top_blob.channel(i);
memcpy(outptr, ptr, w * h * elemsize);
}
return 0;
}
if (dims == 3 && positive_axis == 1 && indices_dims == 1) {
int w = bottom_blob.w;
int h = bottom_blob.h;
int channels = bottom_blob.c;
top_blob.create(w, indices.w, channels, elemsize, opt.blob_allocator);
#pragma omp parallel for num_threads(opt.num_threads)
// use parallel programming
for (int i = 0; i < channels; i++) {
float *outptr = top_blob.channel(i);
const float *ptr = bottom_blob.channel(i);
for (int j = 0; j < indices.w; j++) {
int selected = (int)(indices_ptr[j] + 0.5);
for (int k = 0; k < w; k++) {
outptr[j * w + k] = ptr[selected * w + k];
}
}
}
return 0;
}
if (dims == 3 && positive_axis == 2 && indices_dims == 1) {
int w = bottom_blob.w;
int h = bottom_blob.h;
int channels = bottom_blob.c;
top_blob.create(indices.w, h, channels, elemsize, opt.blob_allocator);
#pragma omp parallel for num_threads(opt.num_threads)
// use parallel programming
for (int i = 0; i < channels; i++) {
float *outptr = top_blob.channel(i);
const float *ptr = bottom_blob.channel(i);
for (int j = 0; j < h; j++) {
for (int k = 0; k < indices.w; k++) {
int selected = (int)(indices_ptr[k] + 0.5);
outptr[j * indices.w + k] = ptr[j * w + selected];
}
}
}
return 0;
}
return 0;
}
} // namespace mmdeploy
// Copyright (c) OpenMMLab. All rights reserved.
#ifndef LAYER_GATHER_H
#define LAYER_GATHER_H
#include "layer.h"
namespace mmdeploy {
class Gather : public ncnn::Layer {
public:
Gather();
virtual int load_param(const ncnn::ParamDict& pd);
virtual int forward(const std::vector<ncnn::Mat>& bottom_blobs, std::vector<ncnn::Mat>& top_blobs,
const ncnn::Option& opt) const;
public:
int axis;
};
} // namespace mmdeploy
#endif // LAYER_GATHER_H
// Copyright (c) OpenMMLab. All rights reserved.
#ifndef NCNN_OPS_DEFINER_H
#define NCNN_OPS_DEFINER_H
#include <string>
#include "layer.h"
#include "ncnn_ops_register.h"
namespace mmdeploy {
class NCNNOpsDefiner {
public:
NCNNOpsDefiner(const std::string& ops_name, const ncnn::layer_creator_func& creator_func = 0,
const ncnn::layer_destroyer_func& destroyer_func = 0)
: _ops_name(ops_name) {
get_mmdeploy_layer_creator()[_ops_name.c_str()] = creator_func;
}
private:
const std::string _ops_name;
};
#define DEFINE_NCNN_OPS(ops_name, OpsLayer) \
static mmdeploy::NCNNOpsDefiner NCNNOpsDefiner##ops_name{#ops_name, OpsLayer##_layer_creator};
} // namespace mmdeploy
#endif
// Copyright (c) OpenMMLab. All rights reserved.
#include "ncnn_ops_register.h"
#include <iostream>
std::map<const char *, ncnn::layer_creator_func> &get_mmdeploy_layer_creator() {
static std::map<const char *, ncnn::layer_creator_func> _layer_creator_map;
return _layer_creator_map;
}
std::map<const char *, ncnn::layer_destroyer_func> &get_mmdeploy_layer_destroyer() {
static std::map<const char *, ncnn::layer_destroyer_func> _layer_destroyer_map;
return _layer_destroyer_map;
}
int register_mmdeploy_custom_layers(ncnn::Net &net) {
auto &layer_creator_map = get_mmdeploy_layer_creator();
auto &layer_destroyer_map = get_mmdeploy_layer_destroyer();
for (auto const &creator_pair : layer_creator_map) {
auto creator_name = creator_pair.first;
auto creator_func = creator_pair.second;
ncnn::layer_destroyer_func destroyer_func = 0;
if (layer_destroyer_map.find(creator_name) != layer_destroyer_map.end()) {
destroyer_func = layer_destroyer_map[creator_name];
}
int ret = net.register_custom_layer(creator_name, creator_func, destroyer_func);
if (0 != ret) {
return ret;
}
}
return 0;
}
// Copyright (c) OpenMMLab. All rights reserved.
#ifndef NCNN_OPS_REGISTER_H
#define NCNN_OPS_REGISTER_H
#include <map>
#include <string>
#include "mmdeploy/core/macro.h"
#include "net.h"
MMDEPLOY_API std::map<const char*, ncnn::layer_creator_func>& get_mmdeploy_layer_creator();
MMDEPLOY_API std::map<const char*, ncnn::layer_destroyer_func>& get_mmdeploy_layer_destroyer();
MMDEPLOY_API int register_mmdeploy_custom_layers(ncnn::Net& net);
#endif
// Copyright (c) OpenMMLab. All rights reserved.
#include "shape.h"
#include "../ncnn_ops_definer.h"
namespace mmdeploy {
using namespace ncnn;
DEFINE_LAYER_CREATOR(Shape)
DEFINE_NCNN_OPS(Shape, Shape)
Shape::Shape() {
one_blob_only = true;
support_inplace = false;
}
int Shape::forward(const Mat &bottom_blob, Mat &top_blob, const Option &opt) const {
int dims = bottom_blob.dims;
int w = bottom_blob.w;
size_t elemsize = sizeof(float);
top_blob.create(dims + 1, elemsize, opt.blob_allocator);
if (top_blob.empty()) {
return -100;
}
float *outptr = top_blob;
if (dims == 1) {
outptr[0] = 1.0f;
outptr[1] = w;
} else if (dims == 2) {
int h = bottom_blob.h;
outptr[0] = 1.0f;
outptr[1] = h;
outptr[2] = w;
} else if (dims == 3) {
int h = bottom_blob.h;
int channels = bottom_blob.c;
outptr[0] = 1.0f;
outptr[1] = channels;
outptr[2] = h;
outptr[3] = w;
} else {
fprintf(stdout, "Unsupported dims=%d\n", dims);
}
return 0;
}
} // namespace mmdeploy
// Copyright (c) OpenMMLab. All rights reserved.
#ifndef LAYER_SHAPE_H
#define LAYER_SHAPE_H
#include "layer.h"
namespace mmdeploy {
class Shape : public ncnn::Layer {
public:
Shape();
virtual int forward(const ncnn::Mat& bottom_blob, ncnn::Mat& top_blob,
const ncnn::Option& opt) const;
};
} // namespace mmdeploy
#endif // LAYER_SHAPE_H
// Copyright (c) OpenMMLab. All rights reserved.
#include "tensorslice.h"
#include <math.h>
#include "../ncnn_ops_definer.h"
namespace mmdeploy {
using namespace ncnn;
DEFINE_LAYER_CREATOR(TensorSlice)
DEFINE_NCNN_OPS(TensorSlice, TensorSlice)
TensorSlice::TensorSlice() {
one_blob_only = true;
support_inplace = false;
}
int TensorSlice::load_param(const ParamDict& pd) {
starts = pd.get(0, Mat());
ends = pd.get(1, Mat());
axes = pd.get(2, Mat());
steps = pd.get(3, Mat());
if (axes.w == 0) {
axes.create(starts.w);
int* axes_ptr = axes;
for (int i = 0; i < starts.w; i++) {
axes_ptr[i] = i;
}
}
if (steps.w == 0) {
steps.create(axes.w);
steps.fill(1);
}
return 0;
}
static inline int get_shape_by_axes(const Mat& blob, int axes, int dims) {
switch (dims - axes) {
case 0:
return blob.w;
case 1:
return blob.h;
case 2:
return blob.c;
default:
fprintf(stderr, "wrong axes %d!\n", axes);
return -1;
}
return 0;
}
int TensorSlice::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const {
int dims = bottom_blob.dims;
size_t elemsize = bottom_blob.elemsize;
const int* start_ptr = starts;
const int* end_ptr = ends;
const int* axes_ptr = axes;
const int* step_ptr = steps;
if (starts.w > dims || ends.w > dims) {
fprintf(stderr, "start/end attributes shape error!\n");
return -100;
}
if (axes.w != 1) {
fprintf(stderr,
"axes.w must be 1 because any of multiaxes slice is regarded as "
"multi-staged onnx slice in pytorch2onnx.");
}
if (dims == 1) {
for (int i = 0; i < axes.w; i++) {
int positive_axis = axes_ptr[i] < 0 ? dims + axes_ptr[i] : axes_ptr[i];
int step = step_ptr[i];
std::vector<float> temp_val;
int start = start_ptr[i];
int end = end_ptr[i];
int cur = start;
if (step > 0) {
while (cur < end && cur < bottom_blob.w) {
temp_val.push_back(bottom_blob[cur]);
cur += step;
}
} else if (step < 0) {
while (cur > end && cur > 0) {
temp_val.push_back(bottom_blob[cur]);
cur += step;
}
} else {
fprintf(stderr, "step should not be 0!\n");
return -100;
}
top_blob.create(temp_val.size(), elemsize, opt.blob_allocator);
for (int i = 0; i < temp_val.size(); i++) {
top_blob[i] = temp_val[i];
}
}
return 0;
}
if (dims == 2) {
std::vector<std::vector<int> > active_indice;
std::vector<int> indices;
for (int i = 0; i < bottom_blob.h; i++) {
indices.push_back(i);
}
active_indice.push_back(indices);
indices.clear();
for (int i = 0; i < bottom_blob.w; i++) {
indices.push_back(i);
}
active_indice.push_back(indices);
for (int i = 0; i < axes.w; i++) {
int positive_axis = axes_ptr[i] < 0 ? dims + axes_ptr[i] : axes_ptr[i];
int step = step_ptr[i];
int start = start_ptr[i];
int end = end_ptr[i];
int dim_shape = get_shape_by_axes(bottom_blob, positive_axis, dims);
int dim_shape_test = get_shape_by_axes(bottom_blob, positive_axis, dims - 1);
if (dim_shape < 0) {
return -1;
}
end = end < dim_shape ? end : dim_shape;
int cur = start;
std::vector<int> temp_indice;
if (step > 0) {
while (cur < end && cur < dim_shape) {
temp_indice.push_back(cur);
cur += step;
}
} else if (step < 0) {
while (cur > end && cur > 0) {
temp_indice.push_back(cur);
cur += step;
}
} else {
fprintf(stderr, "step should not be 0!\n");
return -100;
}
active_indice[positive_axis - 1] = temp_indice;
active_indice[positive_axis - 1].resize(temp_indice.size());
}
top_blob.create((int)active_indice[1].size(), (int)active_indice[0].size(), elemsize,
opt.blob_allocator);
for (int i = 0; i < active_indice[0].size(); i++) {
for (int j = 0; j < active_indice[1].size(); j++) {
top_blob.row(i)[j] = bottom_blob.row(active_indice[0][i])[active_indice[1][j]];
}
}
return 0;
}
if (dims == 3) {
std::vector<std::vector<int> > active_indice;
std::vector<int> indices;
for (int i = 0; i < bottom_blob.c; i++) {
indices.push_back(i);
}
active_indice.push_back(indices);
indices.clear();
for (int i = 0; i < bottom_blob.h; i++) {
indices.push_back(i);
}
active_indice.push_back(indices);
indices.clear();
for (int i = 0; i < bottom_blob.w; i++) {
indices.push_back(i);
}
active_indice.push_back(indices);
for (int i = 0; i < axes.w; i++) {
int positive_axis = axes_ptr[i] < 0 ? dims + axes_ptr[i] : axes_ptr[i];
int step = step_ptr[i];
int start = start_ptr[i];
int end = end_ptr[i];
int cur = start;
std::vector<int> temp_indice;
if (step > 0) {
while (cur < end && cur < bottom_blob.w) {
temp_indice.push_back(cur);
cur += step;
}
} else if (step < 0) {
while (cur > end && cur > 0) {
temp_indice.push_back(cur);
cur += step;
}
} else {
fprintf(stderr, "step should not be 0!\n");
return -100;
}
active_indice[positive_axis - 1] = temp_indice;
active_indice[positive_axis - 1].resize(temp_indice.size());
}
top_blob.create((int)active_indice[2].size(), (int)active_indice[1].size(),
(int)active_indice[0].size(), elemsize, opt.blob_allocator);
for (int i = 0; i < active_indice[0].size(); i++) {
for (int j = 0; j < active_indice[1].size(); j++) {
for (int k = 0; k < active_indice[2].size(); k++) {
top_blob.channel(i).row(j)[k] = bottom_blob.channel(active_indice[0][i])
.row(active_indice[1][j])[active_indice[2][k]];
}
}
}
return 0;
}
return 0;
}
} // namespace mmdeploy
// Copyright (c) OpenMMLab. All rights reserved.
#ifndef LAYER_TENSORSLICE_H
#define LAYER_TENSORSLICE_H
#include "layer.h"
namespace mmdeploy {
class TensorSlice : public ncnn::Layer {
public:
TensorSlice();
virtual int load_param(const ncnn::ParamDict& pd);
virtual int forward(const ncnn::Mat& bottom_blob, ncnn::Mat& top_blob,
const ncnn::Option& opt) const;
public:
ncnn::Mat starts;
ncnn::Mat ends;
ncnn::Mat axes;
ncnn::Mat steps;
};
} // namespace mmdeploy
#endif // LAYER_TENSORSLICE_H
// Copyright (c) OpenMMLab. All rights reserved.
#include "topk.h"
#include <math.h>
#include <functional>
#include "../ncnn_ops_definer.h"
namespace mmdeploy {
using namespace ncnn;
DEFINE_LAYER_CREATOR(TopK)
DEFINE_NCNN_OPS(TopK, TopK)
TopK::TopK() {
one_blob_only = false;
support_inplace = false;
}
int TopK::load_param(const ParamDict& pd) {
axis = pd.get(0, -1);
largest = pd.get(1, 1);
sorted = pd.get(2, 1);
keep_dims = pd.get(3, 1);
return 0;
}
int TopK::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs,
const Option& opt) const {
int dims = bottom_blobs[0].dims;
int positive_axis = axis < 0 ? dims + axis : axis;
int topk;
if (bottom_blobs.size() == 2) {
const Mat& topk_blob = bottom_blobs[1];
topk = (int)(topk_blob[0] + 0.5);
} else if (bottom_blobs.size() == 1) {
topk = 1;
} else {
fprintf(stderr, "topk input blobs should be 1 or 2, but not %ld\n", bottom_blobs.size());
return -103;
}
// To do: Cut the top_val_blob after unit test. And we should change them in
// param files.
// Adaptive outputs. For onnx TopK, we output 2 blobs, for ArgMax, we output
// 1 blob.
Mat& top_val_blob = top_blobs[0];
Mat& top_ind_blob = top_blobs.size() == 2 ? top_blobs[1] : top_val_blob;
if (topk > 1) {
// real topk
if (keep_dims == 0) {
fprintf(stderr, "real topk should not reduce dims!\n");
return -102;
}
if (dims == 1 && positive_axis == 0) {
if (topk > bottom_blobs[0].w) {
fprintf(stderr, "topk should not greater than total items!\n");
return -100;
}
top_val_blob.create(topk, 4u, opt.blob_allocator);
if (top_val_blob.empty()) return -100;
top_ind_blob.create(topk, 4u, opt.blob_allocator);
if (top_ind_blob.empty()) return -100;
const float* ptr = bottom_blobs[0];
std::vector<std::pair<float, int> > vec;
vec.resize(bottom_blobs[0].w);
if (largest == 1) {
for (int i = 0; i < bottom_blobs[0].w; i++) {
vec[i] = std::make_pair(ptr[i], -i);
}
std::partial_sort(vec.begin(), vec.begin() + topk, vec.end(),
std::greater<std::pair<float, int> >());
} else if (largest == 0) {
for (int i = 0; i < bottom_blobs[0].w; i++) {
vec[i] = std::make_pair(ptr[i], i);
}
std::partial_sort(vec.begin(), vec.begin() + topk, vec.end(),
std::less<std::pair<float, int> >());
} else {
fprintf(stderr, "largest attribute should be 0 or 1, but not %d\n", largest);
return -100;
}
float* valptr = top_val_blob;
float* indptr = top_ind_blob;
if (sorted == 1) {
for (int i = 0; i < topk; i++) {
valptr[i] = vec[i].first;
indptr[i] = abs(vec[i].second);
}
} else if (sorted == 0) {
int cur = 0;
float valtarget = vec[topk - 1].first;
int indtarget = (int)(abs(vec[topk - 1].second) + 0.5);
// pair comparison
if (largest == 1) {
for (int i = 0; i < bottom_blobs[0].w; i++) {
if (cur >= topk) break;
if (bottom_blobs[0][i] > valtarget) {
valptr[cur] = bottom_blobs[0][i];
indptr[cur] = i;
cur++;
} else if (bottom_blobs[0][i] == valtarget && i <= indtarget) {
valptr[cur] = bottom_blobs[0][i];
indptr[cur] = i;
cur++;
}
}
} else {
for (int i = 0; i < bottom_blobs[0].w; i++) {
if (cur >= topk) break;
if (bottom_blobs[0][i] < valtarget) {
valptr[cur] = bottom_blobs[0][i];
indptr[cur] = i;
cur++;
} else if (bottom_blobs[0][i] == valtarget && i <= indtarget) {
valptr[cur] = bottom_blobs[0][i];
indptr[cur] = i;
cur++;
}
}
}
}
}
if (dims == 2 && positive_axis == 0) {
if (topk > bottom_blobs[0].h) {
fprintf(stderr, "topk should not greater than total items!\n");
return -100;
}
top_val_blob.create(bottom_blobs[0].w, topk, 4u, opt.blob_allocator);
if (top_val_blob.empty()) return -100;
top_ind_blob.create(bottom_blobs[0].w, topk, 4u, opt.blob_allocator);
if (top_ind_blob.empty()) return -100;
for (int col = 0; col < bottom_blobs[0].w; col++) {
std::vector<std::pair<float, int> > vec;
vec.resize(bottom_blobs[0].h);
if (largest == 1) {
for (int i = 0; i < bottom_blobs[0].h; i++) {
vec[i] = std::make_pair(bottom_blobs[0].row(i)[col], -i);
}
std::partial_sort(vec.begin(), vec.begin() + topk, vec.end(),
std::greater<std::pair<float, int> >());
} else if (largest == 0) {
for (int i = 0; i < bottom_blobs[0].h; i++) {
vec[i] = std::make_pair(bottom_blobs[0].row(i)[col], i);
}
std::partial_sort(vec.begin(), vec.begin() + topk, vec.end(),
std::less<std::pair<float, int> >());
} else {
fprintf(stderr, "largest attribute should be 0 or 1, but not %d\n", largest);
return -100;
}
if (sorted == 1) {
for (int i = 0; i < topk; i++) {
top_val_blob.row(i)[col] = vec[i].first;
top_ind_blob.row(i)[col] = abs(vec[i].second);
}
} else if (sorted == 0) {
int cur = 0;
float valtarget = vec[topk - 1].first;
int indtarget = (int)(abs(vec[topk - 1].second) + 0.5);
if (largest == 1) {
for (int i = 0; i < bottom_blobs[0].h; i++) {
if (cur >= topk) break;
if (bottom_blobs[0].row(i)[col] > valtarget) {
top_val_blob.row(cur)[col] = bottom_blobs[0].row(i)[col];
top_ind_blob.row(cur)[col] = i;
cur++;
} else if (bottom_blobs[0].row(i)[col] == valtarget && i <= indtarget) {
top_val_blob.row(cur)[col] = bottom_blobs[0].row(i)[col];
top_ind_blob.row(cur)[col] = i;
cur++;
}
}
} else {
for (int i = 0; i < bottom_blobs[0].h; i++) {
if (cur >= topk) break;
if (bottom_blobs[0].row(i)[col] < valtarget) {
top_val_blob.row(cur)[col] = bottom_blobs[0].row(i)[col];
top_ind_blob.row(cur)[col] = i;
cur++;
} else if (bottom_blobs[0].row(i)[col] == valtarget && i <= indtarget) {
top_val_blob.row(cur)[col] = bottom_blobs[0].row(i)[col];
top_ind_blob.row(cur)[col] = i;
cur++;
}
}
}
} else {
fprintf(stderr, "sorted attribute should be 0 or 1, but not %d\n", sorted);
return -100;
}
}
}
if (dims == 2 && positive_axis == 1) {
if (topk > bottom_blobs[0].w) {
fprintf(stderr, "topk should not greater than total items!\n");
return -100;
}
top_val_blob.create(topk, bottom_blobs[0].h, 4u, opt.blob_allocator);
if (top_val_blob.empty()) return -100;
top_ind_blob.create(topk, bottom_blobs[0].h, 4u, opt.blob_allocator);
if (top_ind_blob.empty()) return -100;
for (int r = 0; r < bottom_blobs[0].h; r++) {
std::vector<std::pair<float, int> > vec;
vec.resize(bottom_blobs[0].w);
if (largest == 1) {
for (int i = 0; i < bottom_blobs[0].w; i++) {
vec[i] = std::make_pair(bottom_blobs[0].row(r)[i], -i);
}
std::partial_sort(vec.begin(), vec.begin() + topk, vec.end(),
std::greater<std::pair<float, int> >());
} else if (largest == 0) {
for (int i = 0; i < bottom_blobs[0].w; i++) {
vec[i] = std::make_pair(bottom_blobs[0].row(r)[i], i);
}
std::partial_sort(vec.begin(), vec.begin() + topk, vec.end(),
std::less<std::pair<float, int> >());
} else {
fprintf(stderr, "largest attribute should be 0 or 1, but not %d\n", largest);
return -100;
}
if (sorted == 1) {
for (int i = 0; i < topk; i++) {
top_val_blob.row(r)[i] = vec[i].first;
top_ind_blob.row(r)[i] = abs(vec[i].second);
}
} else if (sorted == 0) {
int cur = 0;
float valtarget = vec[topk - 1].first;
int indtarget = (int)(abs(vec[topk - 1].second) + 0.5);
if (largest == 1) {
for (int i = 0; i < bottom_blobs[0].w; i++) {
if (cur >= topk) break;
if (bottom_blobs[0].row(r)[i] > valtarget) {
top_val_blob.row(r)[cur] = bottom_blobs[0].row(r)[i];
top_ind_blob.row(r)[cur] = i;
cur++;
} else if (bottom_blobs[0].row(r)[i] == valtarget && i <= indtarget) {
top_val_blob.row(r)[cur] = bottom_blobs[0].row(r)[i];
top_ind_blob.row(r)[cur] = i;
cur++;
}
}
} else {
for (int i = 0; i < bottom_blobs[0].w; i++) {
if (cur >= topk) break;
if (bottom_blobs[0].row(r)[i] < valtarget) {
top_val_blob.row(r)[cur] = bottom_blobs[0].row(r)[i];
top_ind_blob.row(r)[cur] = i;
cur++;
} else if (bottom_blobs[0].row(r)[i] == valtarget && i <= indtarget) {
top_val_blob.row(r)[cur] = bottom_blobs[0].row(r)[i];
top_ind_blob.row(r)[cur] = i;
cur++;
}
}
}
} else {
fprintf(stderr, "sorted attribute should be 0 or 1, but not %d\n", sorted);
return -100;
}
}
}
if (dims == 3 && positive_axis == 0) {
if (topk > bottom_blobs[0].c) {
fprintf(stderr, "topk should not greater than total items!\n");
return -100;
}
top_val_blob.create(bottom_blobs[0].w, bottom_blobs[0].h, topk, 4u, opt.blob_allocator);
if (top_val_blob.empty()) return -100;
top_ind_blob.create(bottom_blobs[0].w, bottom_blobs[0].h, topk, 4u, opt.blob_allocator);
if (top_ind_blob.empty()) return -100;
for (int r = 0; r < bottom_blobs[0].h; r++) {
for (int col = 0; col < bottom_blobs[0].w; col++) {
std::vector<std::pair<float, int> > vec;
vec.resize(bottom_blobs[0].c);
if (largest == 1) {
for (int i = 0; i < bottom_blobs[0].c; i++) {
vec[i] = std::make_pair(bottom_blobs[0].channel(i).row(r)[col], -i);
}
std::partial_sort(vec.begin(), vec.begin() + topk, vec.end(),
std::greater<std::pair<float, int> >());
} else if (largest == 0) {
for (int i = 0; i < bottom_blobs[0].c; i++) {
vec[i] = std::make_pair(bottom_blobs[0].channel(i).row(r)[col], i);
}
std::partial_sort(vec.begin(), vec.begin() + topk, vec.end(),
std::less<std::pair<float, int> >());
} else {
fprintf(stderr, "largest attribute should be 0 or 1, but not %d\n", largest);
return -100;
}
if (sorted == 1) {
for (int i = 0; i < topk; i++) {
top_val_blob.channel(i).row(r)[col] = vec[i].first;
top_ind_blob.channel(i).row(r)[col] = abs(vec[i].second);
}
} else if (sorted == 0) {
int cur = 0;
float valtarget = vec[topk - 1].first;
int indtarget = (int)(abs(vec[topk - 1].second) + 0.5);
if (largest == 1) {
for (int i = 0; i < bottom_blobs[0].c; i++) {
if (cur >= topk) break;
if (bottom_blobs[0].channel(i).row(r)[col] > valtarget) {
top_val_blob.channel(cur).row(r)[col] = bottom_blobs[0].channel(i).row(r)[col];
top_ind_blob.channel(cur).row(r)[col] = i;
cur++;
} else if (bottom_blobs[0].channel(i).row(r)[col] == valtarget && i <= indtarget) {
top_val_blob.channel(cur).row(r)[col] = bottom_blobs[0].channel(i).row(r)[col];
top_ind_blob.channel(cur).row(r)[col] = i;
cur++;
}
}
} else {
for (int i = 0; i < bottom_blobs[0].c; i++) {
if (cur >= topk) break;
if (bottom_blobs[0].channel(i).row(r)[col] < valtarget) {
top_val_blob.channel(cur).row(r)[col] = bottom_blobs[0].channel(i).row(r)[col];
top_ind_blob.channel(cur).row(r)[col] = i;
cur++;
} else if (bottom_blobs[0].channel(i).row(r)[col] == valtarget && i <= indtarget) {
top_val_blob.channel(cur).row(r)[col] = bottom_blobs[0].channel(i).row(r)[col];
top_ind_blob.channel(cur).row(r)[col] = i;
cur++;
}
}
}
} else {
fprintf(stderr, "sorted attribute should be 0 or 1, but not %d\n", sorted);
return -100;
}
}
}
}
if (dims == 3 && positive_axis == 1) {
if (topk > bottom_blobs[0].h) {
fprintf(stderr, "topk should not greater than total items!\n");
return -100;
}
top_val_blob.create(bottom_blobs[0].w, topk, bottom_blobs[0].c, 4u, opt.blob_allocator);
if (top_val_blob.empty()) return -100;
top_ind_blob.create(bottom_blobs[0].w, topk, bottom_blobs[0].c, 4u, opt.blob_allocator);
if (top_ind_blob.empty()) return -100;
for (int page = 0; page < bottom_blobs[0].c; page++) {
for (int col = 0; col < bottom_blobs[0].w; col++) {
std::vector<std::pair<float, int> > vec;
vec.resize(bottom_blobs[0].h);
if (largest == 1) {
for (int i = 0; i < bottom_blobs[0].h; i++) {
vec[i] = std::make_pair(bottom_blobs[0].channel(page).row(i)[col], -i);
}
std::partial_sort(vec.begin(), vec.begin() + topk, vec.end(),
std::greater<std::pair<float, int> >());
} else if (largest == 0) {
for (int i = 0; i < bottom_blobs[0].h; i++) {
vec[i] = std::make_pair(bottom_blobs[0].channel(page).row(i)[col], i);
}
std::partial_sort(vec.begin(), vec.begin() + topk, vec.end(),
std::less<std::pair<float, int> >());
} else {
fprintf(stderr, "largest attribute should be 0 or 1, but not %d\n", largest);
return -100;
}
if (sorted == 1) {
for (int i = 0; i < topk; i++) {
top_val_blob.channel(page).row(i)[col] = vec[i].first;
top_ind_blob.channel(page).row(i)[col] = abs(vec[i].second);
}
} else if (sorted == 0) {
int cur = 0;
float valtarget = vec[topk - 1].first;
int indtarget = (int)(abs(vec[topk - 1].second) + 0.5);
for (int i = 0; i < bottom_blobs[0].h; i++) {
if (cur >= topk) break;
if (largest == 1) {
if (bottom_blobs[0].channel(page).row(i)[col] > valtarget) {
top_val_blob.channel(page).row(cur)[col] =
bottom_blobs[0].channel(page).row(i)[col];
top_ind_blob.channel(page).row(cur)[col] = i;
cur++;
} else if (bottom_blobs[0].channel(page).row(i)[col] == valtarget &&
i <= indtarget) {
top_val_blob.channel(page).row(cur)[col] =
bottom_blobs[0].channel(page).row(i)[col];
top_ind_blob.channel(page).row(cur)[col] = i;
cur++;
}
} else {
if (bottom_blobs[0].channel(page).row(i)[col] < valtarget) {
top_val_blob.channel(page).row(cur)[col] =
bottom_blobs[0].channel(page).row(i)[col];
top_ind_blob.channel(page).row(cur)[col] = i;
cur++;
} else if (bottom_blobs[0].channel(page).row(i)[col] == valtarget &&
i <= indtarget) {
top_val_blob.channel(page).row(cur)[col] =
bottom_blobs[0].channel(page).row(i)[col];
top_ind_blob.channel(page).row(cur)[col] = i;
cur++;
}
}
}
} else {
fprintf(stderr, "sorted attribute should be 0 or 1, but not %d\n", sorted);
return -100;
}
}
}
}
if (dims == 3 && positive_axis == 2) {
if (topk > bottom_blobs[0].w) {
fprintf(stderr, "topk should not greater than total items!\n");
return -100;
}
top_val_blob.create(topk, bottom_blobs[0].h, bottom_blobs[0].c, 4u, opt.blob_allocator);
if (top_val_blob.empty()) return -100;
top_ind_blob.create(topk, bottom_blobs[0].h, bottom_blobs[0].c, 4u, opt.blob_allocator);
if (top_ind_blob.empty()) return -100;
for (int page = 0; page < bottom_blobs[0].c; page++) {
for (int r = 0; r < bottom_blobs[0].h; r++) {
std::vector<std::pair<float, int> > vec;
vec.resize(bottom_blobs[0].w);
if (largest == 1) {
for (int i = 0; i < bottom_blobs[0].w; i++) {
vec[i] = std::make_pair(bottom_blobs[0].channel(page).row(r)[i], -i);
}
std::partial_sort(vec.begin(), vec.begin() + topk, vec.end(),
std::greater<std::pair<float, int> >());
} else if (largest == 0) {
for (int i = 0; i < bottom_blobs[0].w; i++) {
vec[i] = std::make_pair(bottom_blobs[0].channel(page).row(r)[i], i);
}
std::partial_sort(vec.begin(), vec.begin() + topk, vec.end(),
std::less<std::pair<float, int> >());
} else {
fprintf(stderr, "largest attribute should be 0 or 1, but not %d\n", largest);
return -100;
}
if (sorted == 1) {
for (int i = 0; i < topk; i++) {
top_val_blob.channel(page).row(r)[i] = vec[i].first;
top_ind_blob.channel(page).row(r)[i] = abs(vec[i].second);
}
} else if (sorted == 0) {
int cur = 0;
float valtarget = vec[topk - 1].first;
int indtarget = (int)(abs(vec[topk - 1].second) + 0.5);
if (largest == 1) {
for (int i = 0; i < bottom_blobs[0].w; i++) {
if (cur >= topk) break;
if (bottom_blobs[0].channel(page).row(r)[i] > valtarget) {
top_val_blob.channel(page).row(r)[cur] = bottom_blobs[0].channel(page).row(r)[i];
top_ind_blob.channel(page).row(r)[cur] = i;
cur++;
} else if (bottom_blobs[0].channel(page).row(r)[i] == valtarget && i <= indtarget) {
top_val_blob.channel(page).row(r)[cur] = bottom_blobs[0].channel(page).row(r)[i];
top_ind_blob.channel(page).row(r)[cur] = i;
cur++;
}
}
} else {
for (int i = 0; i < bottom_blobs[0].w; i++) {
if (cur >= topk) break;
if (bottom_blobs[0].channel(page).row(r)[i] < valtarget) {
top_val_blob.channel(page).row(r)[cur] = bottom_blobs[0].channel(page).row(r)[i];
top_ind_blob.channel(page).row(r)[cur] = i;
cur++;
} else if (bottom_blobs[0].channel(page).row(r)[i] == valtarget && i <= indtarget) {
top_val_blob.channel(page).row(r)[cur] = bottom_blobs[0].channel(page).row(r)[i];
top_ind_blob.channel(page).row(r)[cur] = i;
cur++;
}
}
}
} else {
fprintf(stderr, "sorted attribute should be 0 or 1, but not %d\n", sorted);
return -100;
}
}
}
}
} else {
if (topk <= 0) {
fprintf(stderr, "topk should not <= 0!\n");
return -102;
}
if (dims == 1 && positive_axis == 0) {
if (topk > bottom_blobs[0].w) {
fprintf(stderr, "topk should not greater than total items!\n");
return -100;
}
top_val_blob.create(topk, 4u, opt.blob_allocator);
if (top_val_blob.empty()) return -100;
if (top_blobs.size() == 2) {
top_ind_blob.create(topk, 4u, opt.blob_allocator);
if (top_ind_blob.empty()) return -100;
}
const float* ptr = bottom_blobs[0];
std::vector<float> vec;
vec.resize(bottom_blobs[0].w);
float* valptr = top_val_blob;
float* indptr;
if (top_blobs.size() == 2) indptr = top_ind_blob;
for (int i = 0; i < bottom_blobs[0].w; i++) {
vec[i] = ptr[i];
}
if (largest == 1) {
auto index_iter = std::max_element(vec.begin(), vec.end());
valptr[0] = *index_iter;
if (top_blobs.size() == 2)
indptr[0] = std::distance(vec.begin(), index_iter);
else
valptr[0] = std::distance(vec.begin(), index_iter); // replace with index
} else if (largest == 0) {
auto index_iter = std::min_element(vec.begin(), vec.end());
valptr[0] = *index_iter;
if (top_blobs.size() == 2)
indptr[0] = std::distance(vec.begin(), index_iter);
else
valptr[0] = std::distance(vec.begin(), index_iter); // replace with index
} else {
fprintf(stderr, "largest attribute should be 0 or 1, but not %d\n", largest);
return -100;
}
}
if (dims == 2 && positive_axis == 0) {
if (keep_dims == 1) {
top_val_blob.create(bottom_blobs[0].w, topk, 4u, opt.blob_allocator);
if (top_val_blob.empty()) return -100;
if (top_blobs.size() == 2) {
top_ind_blob.create(bottom_blobs[0].w, topk, 4u, opt.blob_allocator);
if (top_ind_blob.empty()) return -100;
}
} else {
top_val_blob.create(bottom_blobs[0].w, 4u, opt.blob_allocator);
if (top_val_blob.empty()) return -100;
if (top_blobs.size() == 2) {
top_ind_blob.create(bottom_blobs[0].w, 4u, opt.blob_allocator);
if (top_ind_blob.empty()) return -100;
}
}
const float* ptr = bottom_blobs[0];
std::vector<float> vec;
vec.resize(bottom_blobs[0].h);
float* valptr = top_val_blob;
float* indptr;
if (top_blobs.size() == 2) indptr = top_ind_blob;
for (int col = 0; col < bottom_blobs[0].w; col++) {
for (int i = 0; i < bottom_blobs[0].h; i++) {
vec[i] = ptr[i * bottom_blobs[0].w + col];
}
if (largest == 1) {
auto index_iter = std::max_element(vec.begin(), vec.end());
valptr[col] = *index_iter;
if (top_blobs.size() == 2)
indptr[col] = std::distance(vec.begin(), index_iter);
else
valptr[col] = std::distance(vec.begin(), index_iter);
} else if (largest == 0) {
auto index_iter = std::min_element(vec.begin(), vec.end());
valptr[col] = *index_iter;
if (top_blobs.size() == 2)
indptr[col] = std::distance(vec.begin(), index_iter);
else
valptr[col] = std::distance(vec.begin(), index_iter);
} else {
fprintf(stderr, "largest attribute should be 0 or 1, but not %d\n", largest);
return -100;
}
}
}
if (dims == 2 && positive_axis == 1) {
if (keep_dims == 1) {
top_val_blob.create(topk, bottom_blobs[0].h, 4u, opt.blob_allocator);
if (top_val_blob.empty()) return -100;
if (top_blobs.size() == 2) {
top_ind_blob.create(topk, bottom_blobs[0].h, 4u, opt.blob_allocator);
if (top_ind_blob.empty()) return -100;
}
} else {
top_val_blob.create(bottom_blobs[0].h, 4u, opt.blob_allocator);
if (top_val_blob.empty()) return -100;
if (top_blobs.size() == 2) {
top_ind_blob.create(bottom_blobs[0].h, 4u, opt.blob_allocator);
if (top_ind_blob.empty()) return -100;
}
}
const float* ptr = bottom_blobs[0];
std::vector<float> vec;
vec.resize(bottom_blobs[0].w);
float* valptr = top_val_blob;
float* indptr;
if (top_blobs.size() == 2) indptr = top_ind_blob;
for (int r = 0; r < bottom_blobs[0].h; r++) {
for (int i = 0; i < bottom_blobs[0].w; i++) {
vec[i] = ptr[r * bottom_blobs[0].w + i];
}
if (largest == 1) {
auto index_iter = std::max_element(vec.begin(), vec.end());
valptr[r] = *index_iter;
if (top_blobs.size() == 2)
indptr[r] = std::distance(vec.begin(), index_iter);
else
valptr[r] = std::distance(vec.begin(), index_iter);
} else if (largest == 0) {
auto index_iter = std::min_element(vec.begin(), vec.end());
valptr[r] = *index_iter;
if (top_blobs.size() == 2)
indptr[r] = std::distance(vec.begin(), index_iter);
else
valptr[r] = std::distance(vec.begin(), index_iter);
} else {
fprintf(stderr, "largest attribute should be 0 or 1, but not %d\n", largest);
return -100;
}
}
}
if (dims == 3 && positive_axis == 0) {
if (keep_dims == 1) {
top_val_blob.create(bottom_blobs[0].w, bottom_blobs[0].h, topk, 4u, opt.blob_allocator);
if (top_val_blob.empty()) return -100;
if (top_blobs.size() == 2) {
top_ind_blob.create(bottom_blobs[0].w, bottom_blobs[0].h, topk, 4u, opt.blob_allocator);
if (top_ind_blob.empty()) return -100;
}
} else {
top_val_blob.create(bottom_blobs[0].w, bottom_blobs[0].h, 4u, opt.blob_allocator);
if (top_val_blob.empty()) return -100;
if (top_blobs.size() == 2) {
top_ind_blob.create(bottom_blobs[0].w, bottom_blobs[0].h, 4u, opt.blob_allocator);
if (top_ind_blob.empty()) return -100;
}
}
const float* ptr = bottom_blobs[0];
std::vector<float> vec;
vec.resize(bottom_blobs[0].c);
float* valptr = top_val_blob;
float* indptr;
if (top_blobs.size() == 2) indptr = top_ind_blob;
for (int r = 0; r < bottom_blobs[0].h; r++) {
for (int col = 0; col < bottom_blobs[0].w; col++) {
for (int i = 0; i < bottom_blobs[0].c; i++) {
ptr = bottom_blobs[0].channel(i);
vec[i] = ptr[r * bottom_blobs[0].w + col];
}
if (largest == 1) {
auto index_iter = std::max_element(vec.begin(), vec.end());
valptr[r * top_val_blob.w + col] = *index_iter;
if (top_blobs.size() == 2)
indptr[r * top_ind_blob.w + col] = std::distance(vec.begin(), index_iter);
else
valptr[r * top_ind_blob.w + col] = std::distance(vec.begin(), index_iter);
} else if (largest == 0) {
auto index_iter = std::min_element(vec.begin(), vec.end());
valptr[r * top_val_blob.w + col] = *index_iter;
if (top_blobs.size() == 2)
indptr[r * top_ind_blob.w + col] = std::distance(vec.begin(), index_iter);
else
valptr[r * top_ind_blob.w + col] = std::distance(vec.begin(), index_iter);
} else {
fprintf(stderr, "largest attribute should be 0 or 1, but not %d\n", largest);
return -100;
}
}
}
}
if (dims == 3 && positive_axis == 1) {
if (keep_dims == 1) {
top_val_blob.create(bottom_blobs[0].w, topk, bottom_blobs[0].c, 4u, opt.blob_allocator);
if (top_val_blob.empty()) return -100;
if (top_blobs.size() == 2) {
top_ind_blob.create(bottom_blobs[0].w, topk, bottom_blobs[0].c, 4u, opt.blob_allocator);
if (top_ind_blob.empty()) return -100;
}
std::vector<float> vec;
vec.resize(bottom_blobs[0].h);
for (int page = 0; page < bottom_blobs[0].c; page++) {
const float* ptr = bottom_blobs[0].channel(page);
float* valptr = top_val_blob.channel(page);
float* indptr;
if (top_blobs.size() == 2) indptr = top_ind_blob.channel(page);
for (int col = 0; col < bottom_blobs[0].w; col++) {
for (int i = 0; i < bottom_blobs[0].h; i++) {
vec[i] = ptr[i * bottom_blobs[0].w + col];
}
if (largest == 1) {
auto index_iter = std::max_element(vec.begin(), vec.end());
valptr[col] = *index_iter;
if (top_blobs.size() == 2)
indptr[col] = std::distance(vec.begin(), index_iter);
else
valptr[col] = std::distance(vec.begin(), index_iter);
} else if (largest == 0) {
auto index_iter = std::min_element(vec.begin(), vec.end());
valptr[col] = *index_iter;
if (top_blobs.size() == 2)
indptr[col] = std::distance(vec.begin(), index_iter);
else
valptr[col] = std::distance(vec.begin(), index_iter);
} else {
fprintf(stderr, "largest attribute should be 0 or 1, but not %d\n", largest);
return -100;
}
}
}
} else {
top_val_blob.create(bottom_blobs[0].w, bottom_blobs[0].c, 4u, opt.blob_allocator);
if (top_val_blob.empty()) return -100;
if (top_blobs.size() == 2) {
top_ind_blob.create(bottom_blobs[0].w, bottom_blobs[0].c, 4u, opt.blob_allocator);
if (top_ind_blob.empty()) return -100;
}
std::vector<float> vec;
vec.resize(bottom_blobs[0].h);
float* valptr = top_val_blob;
float* indptr;
if (top_blobs.size() == 2) indptr = top_ind_blob;
for (int page = 0; page < bottom_blobs[0].c; page++) {
const float* ptr = bottom_blobs[0].channel(page);
for (int col = 0; col < bottom_blobs[0].w; col++) {
for (int i = 0; i < bottom_blobs[0].h; i++) {
vec[i] = ptr[i * bottom_blobs[0].w + col];
}
if (largest == 1) {
auto index_iter = std::max_element(vec.begin(), vec.end());
valptr[page * top_val_blob.w + col] = *index_iter;
if (top_blobs.size() == 2)
indptr[page * top_ind_blob.w + col] = std::distance(vec.begin(), index_iter);
else
valptr[page * top_ind_blob.w + col] = std::distance(vec.begin(), index_iter);
} else if (largest == 0) {
auto index_iter = std::min_element(vec.begin(), vec.end());
valptr[page * top_val_blob.w + col] = *index_iter;
if (top_blobs.size() == 2)
indptr[page * top_ind_blob.w + col] = std::distance(vec.begin(), index_iter);
else
valptr[page * top_ind_blob.w + col] = std::distance(vec.begin(), index_iter);
} else {
fprintf(stderr, "largest attribute should be 0 or 1, but not %d\n", largest);
return -100;
}
}
}
}
}
if (dims == 3 && positive_axis == 2) {
if (keep_dims == 1) {
top_val_blob.create(topk, bottom_blobs[0].h, bottom_blobs[0].c, 4u, opt.blob_allocator);
if (top_val_blob.empty()) return -100;
if (top_blobs.size() == 2) {
top_ind_blob.create(topk, bottom_blobs[0].h, bottom_blobs[0].c, 4u, opt.blob_allocator);
if (top_ind_blob.empty()) return -100;
}
std::vector<float> vec;
vec.resize(bottom_blobs[0].w);
for (int page = 0; page < bottom_blobs[0].c; page++) {
const float* ptr = bottom_blobs[0].channel(page);
float* valptr = top_val_blob.channel(page);
float* indptr;
if (top_blobs.size() == 2) indptr = top_ind_blob.channel(page);
for (int r = 0; r < bottom_blobs[0].h; r++) {
for (int i = 0; i < bottom_blobs[0].w; i++) {
vec[i] = ptr[r * bottom_blobs[0].w + i];
}
if (largest == 1) {
auto index_iter = std::max_element(vec.begin(), vec.end());
valptr[r] = *index_iter;
if (top_blobs.size() == 2)
indptr[r] = std::distance(vec.begin(), index_iter);
else
valptr[r] = std::distance(vec.begin(), index_iter);
} else if (largest == 0) {
auto index_iter = std::min_element(vec.begin(), vec.end());
valptr[r] = *index_iter;
if (top_blobs.size() == 2)
indptr[r] = std::distance(vec.begin(), index_iter);
else
valptr[r] = std::distance(vec.begin(), index_iter);
} else {
fprintf(stderr, "largest attribute should be 0 or 1, but not %d\n", largest);
return -100;
}
}
}
} else {
top_val_blob.create(bottom_blobs[0].h, bottom_blobs[0].c, 4u, opt.blob_allocator);
if (top_val_blob.empty()) return -100;
if (top_blobs.size() == 2) {
top_ind_blob.create(bottom_blobs[0].h, bottom_blobs[0].c, 4u, opt.blob_allocator);
if (top_ind_blob.empty()) return -100;
}
std::vector<float> vec;
vec.resize(bottom_blobs[0].w);
float* valptr = top_val_blob;
float* indptr;
if (top_blobs.size() == 2) indptr = top_ind_blob;
for (int page = 0; page < bottom_blobs[0].c; page++) {
const float* ptr = bottom_blobs[0].channel(page);
for (int r = 0; r < bottom_blobs[0].h; r++) {
for (int i = 0; i < bottom_blobs[0].w; i++) {
vec[i] = ptr[r * bottom_blobs[0].w + i];
}
if (largest == 1) {
auto index_iter = std::max_element(vec.begin(), vec.end());
valptr[page * top_val_blob.w + r] = *index_iter;
if (top_blobs.size() == 2)
indptr[page * top_ind_blob.w + r] = std::distance(vec.begin(), index_iter);
else
valptr[page * top_ind_blob.w + r] = std::distance(vec.begin(), index_iter);
} else if (largest == 0) {
auto index_iter = std::min_element(vec.begin(), vec.end());
valptr[page * top_val_blob.w + r] = *index_iter;
if (top_blobs.size() == 2)
indptr[page * top_val_blob.w + r] = std::distance(vec.begin(), index_iter);
else
valptr[page * top_ind_blob.w + r] = std::distance(vec.begin(), index_iter);
} else {
fprintf(stderr, "largest attribute should be 0 or 1, but not %d\n", largest);
return -100;
}
}
}
}
}
}
return 0;
}
} // namespace mmdeploy
// Copyright (c) OpenMMLab. All rights reserved.
#ifndef LAYER_TOPK_H
#define LAYER_TOPK_H
#include "layer.h"
namespace mmdeploy {
class TopK : public ncnn::Layer {
public:
TopK();
virtual int load_param(const ncnn::ParamDict& pd);
virtual int forward(const std::vector<ncnn::Mat>& bottom_blobs, std::vector<ncnn::Mat>& top_blobs,
const ncnn::Option& opt) const;
public:
int axis;
int largest;
int sorted;
int keep_dims;
};
} // namespace mmdeploy
#endif // LAYER_TOPK_H
# Copyright (c) OpenMMLab. All rights reserved.
project(ncnn_ext)
# pybind11
if (NOT TARGET pybind11)
add_subdirectory(${CMAKE_SOURCE_DIR}/third_party/pybind11 pybind11)
endif ()
pybind11_add_module(ncnn_ext ncnn_ext.cpp)
target_link_libraries(ncnn_ext PUBLIC mmdeploy_ncnn_ops ncnn)
set(_NCNN_EXT_DIR ${CMAKE_SOURCE_DIR}/mmdeploy/backend/ncnn)
set_target_properties(ncnn_ext PROPERTIES
LIBRARY_OUTPUT_DIRECTORY ${_NCNN_EXT_DIR}
LIBRARY_OUTPUT_DIRECTORY_DEBUG ${_NCNN_EXT_DIR}
LIBRARY_OUTPUT_DIRECTORY_RELEASE ${_NCNN_EXT_DIR})
// Copyright (c) OpenMMLab. All rights reserved.
#include <pybind11/pybind11.h>
#include "ncnn_ops_register.h"
#include "net.h"
PYBIND11_MODULE(ncnn_ext, m) {
m.def(
"register_mmdeploy_custom_layers",
[](ncnn::Net &net) { return register_mmdeploy_custom_layers(net); },
"register mmdeploy custom ncnn layers.");
}
# Copyright (c) OpenMMLab. All rights reserved.
project(mmdeploy_onnxruntime_ops)
include(${CMAKE_SOURCE_DIR}/cmake/modules/FindONNXRUNTIME.cmake)
include(${CMAKE_SOURCE_DIR}/cmake/MMDeploy.cmake)
# add plugin source
file(GLOB_RECURSE ORT_OPS_SRCS *.cpp)
add_library(${PROJECT_NAME}_obj OBJECT "${ORT_OPS_SRCS}")
target_compile_definitions(${PROJECT_NAME}_obj PRIVATE -DMMDEPLOY_API_EXPORTS=1)
target_compile_options(${PROJECT_NAME}_obj PRIVATE
$<$<COMPILE_LANGUAGE:CXX>:-fvisibility=hidden>)
set_target_properties(${PROJECT_NAME}_obj PROPERTIES POSITION_INDEPENDENT_CODE 1)
mmdeploy_export(${PROJECT_NAME}_obj)
target_include_directories(${PROJECT_NAME}_obj PUBLIC
$<BUILD_INTERFACE:${ONNXRUNTIME_DIR}/include>
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/common>
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../common>
$<BUILD_INTERFACE:${CMAKE_SOURCE_DIR}/csrc>)
target_link_libraries(${PROJECT_NAME}_obj PUBLIC onnxruntime)
mmdeploy_add_library(${PROJECT_NAME} SHARED EXCLUDE "")
target_link_libraries(${PROJECT_NAME} PUBLIC ${PROJECT_NAME}_obj)
mmdeploy_add_rpath(${PROJECT_NAME})
add_library(mmdeploy::onnxruntime::ops ALIAS ${PROJECT_NAME})
set(_ORT_OPS_DIR ${CMAKE_SOURCE_DIR}/mmdeploy/lib)
install(TARGETS ${PROJECT_NAME} DESTINATION ${_ORT_OPS_DIR})
// Copyright (c) OpenMMLab. All rights reserved.
#ifndef ONNXRUNTIME_REGISTER_H
#define ONNXRUNTIME_REGISTER_H
#include <onnxruntime_c_api.h>
#include "mmdeploy/core/macro.h"
#ifdef __cplusplus
extern "C" {
#endif
MMDEPLOY_API OrtStatus *ORT_API_CALL RegisterCustomOps(OrtSessionOptions *options,
const OrtApiBase *api);
#ifdef __cplusplus
}
#endif
#endif // ONNXRUNTIME_REGISTER_H
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#ifndef ONNXRUNTIME_SESSION_OPTIONS_CONFIG_KEYS_H
#define ONNXRUNTIME_SESSION_OPTIONS_CONFIG_KEYS_H
/*
* This file defines SessionOptions Config Keys and format of the Config Values.
*
* The Naming Convention for a SessionOptions Config Key,
* "[Area][.[SubArea1].[SubArea2]...].[Keyname]"
* Such as "ep.cuda.use_arena"
* The Config Key cannot be empty
* The maximum length of the Config Key is 128
*
* The string format of a SessionOptions Config Value is defined individually
* for each Config. The maximum length of the Config Value is 1024
*/
// Key for disable PrePacking,
// If the config value is set to "1" then the prepacking is disabled, otherwise
// prepacking is enabled (default value)
static const char* const kOrtSessionOptionsConfigDisablePrepacking = "session.disable_prepacking";
// A value of "1" means allocators registered in the env will be used. "0" means
// the allocators created in the session will be used. Use this to override the
// usage of env allocators on a per session level.
static const char* const kOrtSessionOptionsConfigUseEnvAllocators = "session.use_env_allocators";
// Set to 'ORT' (case sensitive) to load an ORT format model.
// If unset, model type will default to ONNX unless inferred from filename
// ('.ort' == ORT format) or bytes to be ORT
static const char* const kOrtSessionOptionsConfigLoadModelFormat = "session.load_model_format";
// Set to 'ORT' (case sensitive) to save optimized model in ORT format when
// SessionOptions.optimized_model_path is set. If unset, format will default to
// ONNX unless optimized_model_filepath ends in '.ort'.
static const char* const kOrtSessionOptionsConfigSaveModelFormat = "session.save_model_format";
#endif // ONNXRUNTIME_SESSION_OPTIONS_CONFIG_KEYS_H
// Copyright (c) OpenMMLab. All rights reserved.
#include "ort_utils.h"
namespace mmdeploy {
CustomOpsTable& get_mmdeploy_custom_ops() {
static CustomOpsTable _custom_ops;
return _custom_ops;
}
} // namespace mmdeploy
// Copyright (c) OpenMMLab. All rights reserved.
#ifndef ORT_MMCV_UTILS_H
#define ORT_MMCV_UTILS_H
#include <onnxruntime_cxx_api.h>
#include <unordered_map>
#include <vector>
namespace mmdeploy {
typedef std::unordered_map<std::string, std::vector<OrtCustomOp*>> CustomOpsTable;
struct OrtTensorDimensions : std::vector<int64_t> {
OrtTensorDimensions(Ort::CustomOpApi ort, const OrtValue* value) {
OrtTensorTypeAndShapeInfo* info = ort.GetTensorTypeAndShape(value);
std::vector<int64_t>::operator=(ort.GetTensorShape(info));
ort.ReleaseTensorTypeAndShapeInfo(info);
}
};
CustomOpsTable& get_mmdeploy_custom_ops();
template <char const* domain, typename T>
class OrtOpsRegistry {
public:
OrtOpsRegistry() { get_mmdeploy_custom_ops()[domain].push_back(&instance); }
private:
T instance{};
};
#define REGISTER_ONNXRUNTIME_OPS(domain, name) \
static char __domain_##domain##name[] = #domain; \
static OrtOpsRegistry<__domain_##domain##name, name> ort_ops_registry_##domain##name {}
} // namespace mmdeploy
#endif // ORT_MMCV_UTILS_H
// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
// modified from
// https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/native/GridSampler.cpp
#include "grid_sample.h"
#include <cmath>
#include "ort_utils.h"
namespace mmdeploy {
#define MIN(a, b) (((a) < (b)) ? (a) : (b))
#define MAX(a, b) (((a) < (b)) ? (b) : (a))
#define CLIP_COORDINATES(in, out, clip_limit) out = MIN((clip_limit - 1), MAX(in, 0))
GridSampleKernel::GridSampleKernel(const OrtApi &api, const OrtKernelInfo *info)
: ort_(api), info_(info) {
align_corners_ = ort_.KernelInfoGetAttribute<int64_t>(info, "align_corners");
interpolation_mode_ = ort_.KernelInfoGetAttribute<int64_t>(info, "interpolation_mode");
padding_mode_ = ort_.KernelInfoGetAttribute<int64_t>(info, "padding_mode");
allocator_ = Ort::AllocatorWithDefaultOptions();
}
enum GridSamplerInterpolation { Bilinear = 0, Nearest = 1, Bicubic = 2 };
enum GridSamplerPadding { Zeros = 0, Border = 1, Reflection = 2 };
template <typename scalar_t>
static inline scalar_t grid_sampler_unnormalize(scalar_t coord, int64_t size, bool align_corners) {
if (align_corners) {
return ((coord + 1) / 2) * (size - 1);
} else {
return ((coord + 1) * size - 1) / 2;
}
}
// Clips coordinates to between 0 and clip_limit - 1
template <typename scalar_t>
static inline scalar_t clip_coordinates(scalar_t in, int64_t clip_limit) {
return std::min(static_cast<scalar_t>(clip_limit - 1), std::max(in, static_cast<scalar_t>(0)));
}
// Reflects coordinates until they fall between low and high (inclusive).
// The bounds are passed as twice their value so that half-integer values
// can be represented as ints.
template <typename scalar_t>
static inline scalar_t reflect_coordinates(scalar_t in, int64_t twice_low, int64_t twice_high) {
if (twice_low == twice_high) {
return static_cast<scalar_t>(0);
}
scalar_t min = static_cast<scalar_t>(twice_low) / 2;
scalar_t span = static_cast<scalar_t>(twice_high - twice_low) / 2;
in = std::fabs(in - min);
// `fmod` returns same sign as `in`, which is positive after the `fabs` above.
scalar_t extra = std::fmod(in, span);
int flips = static_cast<int>(std::floor(in / span));
if (flips % 2 == 0) {
return extra + min;
} else {
return span - extra + min;
}
}
template <typename scalar_t>
static inline scalar_t compute_coordinates(scalar_t coord, int64_t size, int64_t padding_mode,
bool align_corners) {
if (padding_mode == GridSamplerPadding::Border) {
coord = clip_coordinates(coord, size);
} else if (padding_mode == GridSamplerPadding::Reflection) {
if (align_corners) {
coord = reflect_coordinates(coord, 0, 2 * (size - 1));
} else {
coord = reflect_coordinates(coord, -1, 2 * size - 1);
}
coord = clip_coordinates(coord, size);
}
return coord;
}
// Computes the pixel source index value for a grid coordinate
template <typename scalar_t>
static inline scalar_t grid_sampler_compute_source_index(scalar_t coord, int64_t size,
int64_t padding_mode, bool align_corners) {
coord = grid_sampler_unnormalize(coord, size, align_corners);
coord = compute_coordinates(coord, size, padding_mode, align_corners);
return coord;
}
static inline bool within_bounds_2d(int64_t h, int64_t w, int64_t H, int64_t W) {
return h >= 0 && h < H && w >= 0 && w < W;
}
template <typename scalar_t>
static inline scalar_t get_value_bounded(const scalar_t *data, scalar_t x, scalar_t y, int64_t W,
int64_t H, int64_t sW, int64_t sH, int64_t padding_mode,
bool align_corners) {
x = compute_coordinates(x, W, padding_mode, align_corners);
y = compute_coordinates(y, H, padding_mode, align_corners);
int64_t ix = static_cast<int64_t>(x);
int64_t iy = static_cast<int64_t>(y);
if (within_bounds_2d(iy, ix, H, W)) {
return data[iy * sH + ix * sW];
}
return static_cast<scalar_t>(0);
}
template <typename scalar_t>
static inline scalar_t cubic_convolution1(scalar_t x, scalar_t A) {
return ((A + 2) * x - (A + 3)) * x * x + 1;
}
template <typename scalar_t>
static inline scalar_t cubic_convolution2(scalar_t x, scalar_t A) {
return ((A * x - 5 * A) * x + 8 * A) * x - 4 * A;
}
template <typename scalar_t>
static inline void get_cubic_upsample_coefficients(scalar_t coeffs[4], scalar_t t) {
scalar_t A = -0.75;
scalar_t x1 = t;
coeffs[0] = cubic_convolution2<scalar_t>(x1 + 1.0, A);
coeffs[1] = cubic_convolution1<scalar_t>(x1, A);
// opposite coefficients
scalar_t x2 = 1.0 - t;
coeffs[2] = cubic_convolution1<scalar_t>(x2, A);
coeffs[3] = cubic_convolution2<scalar_t>(x2 + 1.0, A);
}
template <typename scalar_t>
static inline scalar_t cubic_interp1d(scalar_t x0, scalar_t x1, scalar_t x2, scalar_t x3,
scalar_t t) {
scalar_t coeffs[4];
get_cubic_upsample_coefficients<scalar_t>(coeffs, t);
return x0 * coeffs[0] + x1 * coeffs[1] + x2 * coeffs[2] + x3 * coeffs[3];
}
void GridSampleKernel::Compute(OrtKernelContext *context) {
const bool align_corners = align_corners_;
const int64_t padding_mode = padding_mode_;
const int64_t interpolation_mode = interpolation_mode_;
const OrtValue *input = ort_.KernelContext_GetInput(context, 0);
const float *input_data = reinterpret_cast<const float *>(ort_.GetTensorData<float>(input));
const OrtValue *grid = ort_.KernelContext_GetInput(context, 1);
const float *grid_data = reinterpret_cast<const float *>(ort_.GetTensorData<float>(grid));
OrtTensorDimensions input_dims(ort_, input);
OrtTensorDimensions grid_dims(ort_, grid);
int64_t N = input_dims[0];
int64_t C = input_dims[1];
int64_t inp_H = input_dims[2];
int64_t inp_W = input_dims[3];
int64_t out_H = grid_dims[1];
int64_t out_W = grid_dims[2];
std::vector<int64_t> output_dims = {N, C, out_H, out_W};
OrtValue *output =
ort_.KernelContext_GetOutput(context, 0, output_dims.data(), output_dims.size());
float *out_ptr = ort_.GetTensorMutableData<float>(output);
int64_t inp_sN = input_dims[1] * input_dims[2] * input_dims[3];
int64_t inp_sC = input_dims[2] * input_dims[3];
int64_t inp_sH = input_dims[3];
int64_t inp_sW = 1;
int64_t grid_sN = grid_dims[1] * grid_dims[2] * grid_dims[3];
int64_t grid_sH = grid_dims[2] * grid_dims[3];
int64_t grid_sW = grid_dims[3];
int64_t grid_sCoor = 1;
int64_t out_sN = output_dims[1] * output_dims[2] * output_dims[3];
int64_t out_sC = output_dims[2] * output_dims[3];
int64_t out_sH = output_dims[3];
int64_t out_sW = 1;
// loop over each output pixel
for (int64_t n = 0; n < N; ++n) {
const float *grid_ptr_N = grid_data + n * grid_sN;
const float *inp_ptr_N = input_data + n * inp_sN;
for (int64_t h = 0; h < out_H; ++h) {
for (int64_t w = 0; w < out_W; ++w) {
const float *grid_ptr_NHW = grid_ptr_N + h * grid_sH + w * grid_sW;
float x = *grid_ptr_NHW;
float y = grid_ptr_NHW[grid_sCoor];
float ix = grid_sampler_compute_source_index(x, inp_W, padding_mode, align_corners);
float iy = grid_sampler_compute_source_index(y, inp_H, padding_mode, align_corners);
if (interpolation_mode == GridSamplerInterpolation::Bilinear) {
// get corner pixel values from (x, y)
// for 4d, we use north-east-south-west
int64_t ix_nw = static_cast<int64_t>(std::floor(ix));
int64_t iy_nw = static_cast<int64_t>(std::floor(iy));
int64_t ix_ne = ix_nw + 1;
int64_t iy_ne = iy_nw;
int64_t ix_sw = ix_nw;
int64_t iy_sw = iy_nw + 1;
int64_t ix_se = ix_nw + 1;
int64_t iy_se = iy_nw + 1;
// get surfaces to each neighbor:
float nw = (ix_se - ix) * (iy_se - iy);
float ne = (ix - ix_sw) * (iy_sw - iy);
float sw = (ix_ne - ix) * (iy - iy_ne);
float se = (ix - ix_nw) * (iy - iy_nw);
// calculate bilinear weighted pixel value and set output pixel
const float *inp_ptr_NC = inp_ptr_N;
float *out_ptr_NCHW = out_ptr + n * out_sN + h * out_sH + w * out_sW;
for (int64_t c = 0; c < C; ++c, out_ptr_NCHW += out_sC, inp_ptr_NC += inp_sC) {
auto res = static_cast<float>(0);
if (within_bounds_2d(iy_nw, ix_nw, inp_H, inp_W)) {
res += inp_ptr_NC[iy_nw * inp_sH + ix_nw * inp_sW] * nw;
}
if (within_bounds_2d(iy_ne, ix_ne, inp_H, inp_W)) {
res += inp_ptr_NC[iy_ne * inp_sH + ix_ne * inp_sW] * ne;
}
if (within_bounds_2d(iy_sw, ix_sw, inp_H, inp_W)) {
res += inp_ptr_NC[iy_sw * inp_sH + ix_sw * inp_sW] * sw;
}
if (within_bounds_2d(iy_se, ix_se, inp_H, inp_W)) {
res += inp_ptr_NC[iy_se * inp_sH + ix_se * inp_sW] * se;
}
*out_ptr_NCHW = res;
}
} else if (interpolation_mode == GridSamplerInterpolation::Nearest) {
int64_t ix_nearest = static_cast<int64_t>(std::nearbyint(ix));
int64_t iy_nearest = static_cast<int64_t>(std::nearbyint(iy));
// assign nearest neighbor pixel value to output pixel
float *out_ptr_NCHW = out_ptr + n * out_sN + h * out_sH + w * out_sW;
const float *inp_ptr_NC = inp_ptr_N;
for (int64_t c = 0; c < C; ++c, out_ptr_NCHW += out_sC, inp_ptr_NC += inp_sC) {
if (within_bounds_2d(iy_nearest, ix_nearest, inp_H, inp_W)) {
*out_ptr_NCHW = inp_ptr_NC[iy_nearest * inp_sH + ix_nearest * inp_sW];
} else {
*out_ptr_NCHW = static_cast<float>(0);
}
}
} else if (interpolation_mode == GridSamplerInterpolation::Bicubic) {
// grid_sampler_compute_source_index will "clip the value" of idx
// depends on the padding,
// which would cause calculation to be wrong,
// for example x = -0.1 -> ix = 0 for zero padding, but in bicubic ix
// = floor(x) = -1
// There would be more problem in reflection padding, since the -1 and
// +1 direction is not fixed in boundary condition
ix = grid_sampler_unnormalize(x, inp_W, align_corners);
iy = grid_sampler_unnormalize(y, inp_H, align_corners);
float ix_nw = std::floor(ix);
float iy_nw = std::floor(iy);
const float tx = ix - ix_nw;
const float ty = iy - iy_nw;
const float *inp_ptr_NC = inp_ptr_N;
float *out_ptr_NCHW = out_ptr + n * out_sN + h * out_sH + w * out_sW;
for (int64_t c = 0; c < C; ++c, out_ptr_NCHW += out_sC, inp_ptr_NC += inp_sC) {
float coefficients[4];
// Interpolate 4 values in the x direction
for (int64_t i = 0; i < 4; ++i) {
coefficients[i] = cubic_interp1d<float>(
get_value_bounded<float>(inp_ptr_NC, ix_nw - 1, iy_nw - 1 + i, inp_W, inp_H,
inp_sW, inp_sH, padding_mode, align_corners),
get_value_bounded<float>(inp_ptr_NC, ix_nw + 0, iy_nw - 1 + i, inp_W, inp_H,
inp_sW, inp_sH, padding_mode, align_corners),
get_value_bounded<float>(inp_ptr_NC, ix_nw + 1, iy_nw - 1 + i, inp_W, inp_H,
inp_sW, inp_sH, padding_mode, align_corners),
get_value_bounded<float>(inp_ptr_NC, ix_nw + 2, iy_nw - 1 + i, inp_W, inp_H,
inp_sW, inp_sH, padding_mode, align_corners),
tx);
}
// Interpolate in the y direction
*out_ptr_NCHW = cubic_interp1d<float>(coefficients[0], coefficients[1], coefficients[2],
coefficients[3], ty);
}
}
}
}
}
}
REGISTER_ONNXRUNTIME_OPS(mmdeploy, GridSampleOp);
} // namespace mmdeploy
// Copyright (c) OpenMMLab. All rights reserved.
#ifndef ONNXRUNTIME_GRIDSAMPLE_H
#define ONNXRUNTIME_GRIDSAMPLE_H
#include <onnxruntime_cxx_api.h>
namespace mmdeploy {
struct GridSampleKernel {
GridSampleKernel(const OrtApi &api, const OrtKernelInfo *info);
void Compute(OrtKernelContext *context);
protected:
Ort::CustomOpApi ort_;
const OrtKernelInfo *info_;
Ort::AllocatorWithDefaultOptions allocator_;
int64_t align_corners_;
int64_t interpolation_mode_;
int64_t padding_mode_;
};
struct GridSampleOp : Ort::CustomOpBase<GridSampleOp, GridSampleKernel> {
void *CreateKernel(const OrtApi &api, const OrtKernelInfo *info) const {
return new GridSampleKernel(api, info);
};
const char *GetName() const { return "grid_sampler"; };
size_t GetInputTypeCount() const { return 2; };
ONNXTensorElementDataType GetInputType(size_t /*index*/) const {
return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT;
};
size_t GetOutputTypeCount() const { return 1; };
ONNXTensorElementDataType GetOutputType(size_t /*index*/) const {
return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT;
};
const char *GetExecutionProviderType() const { return "CPUExecutionProvider"; };
};
} // namespace mmdeploy
#endif
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment