Commit 0369e974 authored by Khalique Ahmed's avatar Khalique Ahmed
Browse files

Merge branch 'batch_report' of https://github.com/ROCmSoftwarePlatform/AMDMIGraphX into mi100_opts

parents 3a474fca d70fd0df
...@@ -18,19 +18,10 @@ namespace op { ...@@ -18,19 +18,10 @@ namespace op {
struct dot struct dot
{ {
float alpha = 1.0;
float beta = 1.0;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return pack(f(self.alpha, "alpha"), f(self.beta, "beta"));
}
std::string name() const { return "dot"; } std::string name() const { return "dot"; }
shape compute_shape(std::vector<shape> inputs) const shape compute_shape(std::vector<shape> inputs) const
{ {
check_shapes{inputs, *this}.same_type(); check_shapes{inputs, *this}.same_type().has(2);
const shape& a = inputs.at(0); const shape& a = inputs.at(0);
const shape& b = inputs.at(1); const shape& b = inputs.at(1);
auto t = a.type(); auto t = a.type();
...@@ -58,25 +49,14 @@ struct dot ...@@ -58,25 +49,14 @@ struct dot
auto out_lens = a.lens(); auto out_lens = a.lens();
out_lens[dim_1] = b.lens()[dim_1]; out_lens[dim_1] = b.lens()[dim_1];
if(inputs.size() == 3 && out_lens != inputs.at(2).lens())
{
MIGRAPHX_THROW("DOT: dimension mismatch, operand C: {" +
to_string_range(inputs.at(2).lens()) +
"}, cannot add to operand A * B: {" + to_string_range(out_lens) + "}");
}
return {t, out_lens}; return {t, out_lens};
} }
argument compute(shape output_shape, std::vector<argument> args) const argument compute(shape output_shape, std::vector<argument> args) const
{ {
argument result; argument result = argument{output_shape};
if(args.size() == 3)
result = args[2];
else
result = argument{output_shape};
visit_all(result, args[0], args[1])( visit_all(result, args[0], args[1])(
[&](auto cmat, auto amat, auto bmat) { gemm(cmat, amat, bmat, alpha, beta); }); [&](auto cmat, auto amat, auto bmat) { gemm(cmat, amat, bmat, 1.0f, 0.0f); });
return result; return result;
} }
}; };
......
...@@ -35,7 +35,7 @@ struct if_op ...@@ -35,7 +35,7 @@ struct if_op
MIGRAPHX_THROW("IF: output shapes of submodules must be the same."); MIGRAPHX_THROW("IF: output shapes of submodules must be the same.");
} }
return shape(out_shapes0); return {out_shapes0};
} }
argument compute(const shape&, argument compute(const shape&,
......
...@@ -54,7 +54,7 @@ struct loop ...@@ -54,7 +54,7 @@ struct loop
ins_out_shapes.push_back({out_s.type(), lens}); ins_out_shapes.push_back({out_s.type(), lens});
} }
return shape(ins_out_shapes); return {ins_out_shapes};
} }
struct ref_loop struct ref_loop
......
#ifndef MIGRAPHX_GUARD_OPERATORS_NONMAXSUPPRESSION_HPP
#define MIGRAPHX_GUARD_OPERATORS_NONMAXSUPPRESSION_HPP
#include <cmath>
#include <queue>
#include <cstdint>
#include <iterator>
#include <migraphx/config.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/float_equal.hpp>
#include <migraphx/algorithm.hpp>
#include <migraphx/tensor_view.hpp>
#include <migraphx/shape_for_each.hpp>
#include <migraphx/check_shapes.hpp>
#include <migraphx/output_iterator.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace op {
struct nonmaxsuppression
{
bool center_point_box = false;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return pack(f(self.center_point_box, "center_point_box"));
}
std::string name() const { return "nonmaxsuppression"; }
shape compute_shape(std::vector<shape> inputs) const
{
// requires at least 2 inputs
check_shapes{inputs, *this}.standard();
check_shapes{{inputs.at(0), inputs.at(1)}, *this}.only_dims(3);
auto lens = inputs.front().lens();
// check input shape
if(lens[1] != inputs.at(1).lens()[2])
{
MIGRAPHX_THROW("NonMaxSuppression: dimension mismatch between first and second input!");
}
std::vector<int64_t> out_lens(2);
out_lens.at(0) = lens.at(1);
out_lens.at(1) = 3;
return {shape::int64_type, out_lens};
}
struct box
{
std::array<float, 2> x;
std::array<float, 2> y;
void sort()
{
std::sort(x.begin(), x.end());
std::sort(y.begin(), y.end());
}
std::array<float, 2>& operator[](std::size_t i) { return i == 0 ? x : y; }
float area() const
{
assert(std::is_sorted(x.begin(), x.end()));
assert(std::is_sorted(y.begin(), y.end()));
return (x[1] - x[0]) * (y[1] - y[0]);
}
};
template <class T>
box batch_box(const T* boxes, std::size_t bidx) const
{
box result{};
const T* start = boxes + 4 * bidx;
if(center_point_box)
{
float half_width = start[2] / 2.0f;
float half_height = start[3] / 2.0f;
float x_center = start[0];
float y_center = start[1];
result.x = {x_center - half_width, x_center + half_width};
result.y = {y_center - half_height, y_center + half_height};
}
else
{
result.x = {start[1], start[3]};
result.y = {start[0], start[2]};
}
return result;
}
inline bool suppress_by_iou(box b1, box b2, float iou_threshold) const
{
b1.sort();
b2.sort();
box intersection{};
for(auto i : range(2))
{
intersection[i][0] = std::max(b1[i][0], b2[i][0]);
intersection[i][1] = std::min(b1[i][1], b2[i][1]);
}
std::vector<std::array<float, 2>> bbox = {intersection.x, intersection.y};
if(std::any_of(bbox.begin(), bbox.end(), [](auto bx) {
return not std::is_sorted(bx.begin(), bx.end());
}))
{
return false;
}
const float area1 = b1.area();
const float area2 = b2.area();
const float intersection_area = intersection.area();
const float union_area = area1 + area2 - intersection_area;
if(area1 <= .0f or area2 <= .0f or union_area <= .0f)
{
return false;
}
const float intersection_over_union = intersection_area / union_area;
return intersection_over_union > iou_threshold;
}
argument compute(const shape& output_shape, std::vector<argument> args) const
{
argument result{output_shape};
result.visit([&](auto out) { std::fill(out.begin(), out.end(), 0); });
std::size_t max_output_boxes_per_class = 0;
float iou_threshold = 0.0f;
float score_threshold = 0.0f;
if(args.size() > 2)
{
max_output_boxes_per_class = args.at(2).at<std::size_t>();
}
// max_output_boxes_per_class is 0, no output
if(max_output_boxes_per_class == 0)
{
return result;
}
if(args.size() > 3)
{
iou_threshold = args.at(3).at<float>();
}
if(args.size() > 4)
{
score_threshold = args.at(4).at<float>();
}
const auto& lens = args.at(1).get_shape().lens();
auto batch_num = lens[0];
auto class_num = lens[1];
auto box_num = args.at(0).get_shape().lens()[1];
std::vector<std::pair<float, int64_t>> selected_boxes_inside_class;
std::vector<int64_t> selected_indices;
selected_boxes_inside_class.reserve(output_shape.elements());
auto scores = make_view<float>(args.at(1).get_shape(), args.at(1).cast<float>());
const float* boxes = args.at(0).cast<float>();
shape comp_s{shape::float_type, {batch_num, class_num}};
shape_for_each(comp_s, [&](auto idx) {
auto bidx = idx[0];
auto cidx = idx[1];
std::size_t score_offset = (bidx * class_num + cidx) * box_num;
const float* batch_boxes = boxes + bidx * box_num * 4;
std::priority_queue<std::pair<float, int64_t>> sorted_boxes;
auto insert_to_sorted_boxes =
make_function_output_iterator([&](const auto& x) { sorted_boxes.push(x); });
int64_t box_idx = 0;
transform_if(scores.begin() + score_offset,
scores.begin() + score_offset + box_num,
insert_to_sorted_boxes,
[&](auto sc) {
box_idx++;
return sc >= score_threshold;
},
[&](auto sc) { return std::make_pair(sc, box_idx - 1); });
selected_boxes_inside_class.clear();
// Get the next box with top score, filter by iou_threshold
while(!sorted_boxes.empty() &&
selected_boxes_inside_class.size() < max_output_boxes_per_class)
{
const std::pair<float, int64_t>& next_top_score = sorted_boxes.top();
// Check with existing selected boxes for this class, suppress if exceed the IOU
// (Intersection Over Union) threshold
bool not_selected = std::any_of(
selected_boxes_inside_class.begin(),
selected_boxes_inside_class.end(),
[&](auto selected_index) {
return this->suppress_by_iou(batch_box(batch_boxes, next_top_score.second),
batch_box(batch_boxes, selected_index.second),
iou_threshold);
});
if(not not_selected)
{
selected_boxes_inside_class.push_back(next_top_score);
selected_indices.push_back(bidx);
selected_indices.push_back(cidx);
selected_indices.push_back(next_top_score.second);
}
sorted_boxes.pop();
}
});
result.visit([&](auto out) {
std::copy(selected_indices.begin(), selected_indices.end(), out.begin());
});
return result;
}
};
} // namespace op
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
#ifndef MIGRAPHX_GUARD_OPERATORS_NONZERO_HPP
#define MIGRAPHX_GUARD_OPERATORS_NONZERO_HPP
#include <migraphx/shape_for_each.hpp>
#include <migraphx/check_shapes.hpp>
#include <migraphx/config.hpp>
#include <migraphx/float_equal.hpp>
#include <migraphx/par_for.hpp>
#include <cmath>
#include <utility>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace op {
struct nonzero
{
std::string name() const { return "nonzero"; }
shape compute_shape(std::vector<shape> inputs) const
{
check_shapes{inputs, *this}.has(1).standard();
auto elem_num = inputs[0].elements();
auto dim_num = inputs[0].lens().size();
std::vector<std::size_t> out_lens = {dim_num, elem_num};
return {shape::int64_type, out_lens};
}
argument compute(const shape& output_shape, std::vector<argument> args) const
{
std::vector<std::vector<std::size_t>> vec_idx;
auto s = args.front().get_shape();
args.front().visit([&](auto v) {
shape_for_each(s, [&](auto idx) {
if(not float_equal(v[s.index(idx)], 0))
{
vec_idx.push_back(idx);
}
});
});
argument result{output_shape};
result.visit([&](auto output) {
std::fill(output.begin(), output.end(), 0);
par_for(vec_idx.size(), [&](auto i) {
for(std::size_t j = 0; j < vec_idx.front().size(); ++j)
{
output[output_shape.index({j, i})] = vec_idx[i][j];
}
});
});
return result;
}
};
} // namespace op
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
#ifndef MIGRAPHX_GUARD_OP_POINTWISE_HPP
#define MIGRAPHX_GUARD_OP_POINTWISE_HPP
#include <migraphx/config.hpp>
#include <migraphx/check_shapes.hpp>
#include <migraphx/module.hpp>
#include <migraphx/permutation.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/par_for.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace op {
struct pointwise
{
std::string name() const { return "pointwise"; }
shape compute_shape(const std::vector<shape>& inputs, std::vector<module_ref> mods) const
{
if(mods.size() != 1)
{
MIGRAPHX_THROW("should have one submodule.");
}
auto* pm = mods.front();
auto pnames = pm->get_parameter_names();
std::sort(pnames.begin(), pnames.end());
check_shapes{inputs, *this}.has(pnames.size()).same_dims();
for(auto i : range(pnames.size()))
{
auto s1 = pm->get_parameter(pnames[i])->get_shape();
auto s2 = inputs[i];
if(s1.type() != s2.type())
MIGRAPHX_THROW("Mismatch type");
}
if(pm->get_output_shapes().size() != 1)
MIGRAPHX_THROW("submodule should have only one output.");
auto type = pm->get_output_shapes().front().type();
return shape::from_permutation(type, inputs.front().lens(), find_permutation(inputs));
}
argument compute(const shape& output_shape,
const std::vector<argument>& args,
const std::vector<module_ref>& mods,
const std::function<std::vector<argument>(
module_ref&, const std::unordered_map<std::string, argument>&)>& run) const
{
argument output{output_shape};
auto* pm = mods.front();
auto pnames = pm->get_parameter_names();
std::sort(pnames.begin(), pnames.end());
par_for(output_shape.elements(), [&](auto i) {
std::unordered_map<std::string, argument> params;
std::transform(
pnames.begin(),
pnames.end(),
args.begin(),
std::inserter(params, params.end()),
[&](auto&& name, auto&& arg) { return std::make_pair(name, arg.element(i)); });
auto results = run(pm, params);
assert(results.size() == 1);
visit_all(output, results.front())([&](auto out, auto x) { out[i] = x.front(); });
});
return output;
}
};
} // namespace op
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif // MIGRAPHX_GUARD_OP_POINTWISE_HPP
...@@ -18,21 +18,12 @@ namespace op { ...@@ -18,21 +18,12 @@ namespace op {
struct quant_dot struct quant_dot
{ {
int32_t alpha = 1;
int32_t beta = 1;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return pack(f(self.alpha, "alpha"), f(self.beta, "beta"));
}
value attributes() const { return {{"general_data_type", "dot"}}; } value attributes() const { return {{"general_data_type", "dot"}}; }
std::string name() const { return "quant_dot"; } std::string name() const { return "quant_dot"; }
shape compute_shape(std::vector<shape> inputs) const shape compute_shape(std::vector<shape> inputs) const
{ {
check_shapes{{inputs.at(0), inputs.at(1)}, *this}.same_type(); check_shapes{{inputs.at(0), inputs.at(1)}, *this}.same_type().has(2);
const shape& a = inputs.at(0); const shape& a = inputs.at(0);
const shape& b = inputs.at(1); const shape& b = inputs.at(1);
auto t = a.type(); auto t = a.type();
...@@ -64,18 +55,6 @@ struct quant_dot ...@@ -64,18 +55,6 @@ struct quant_dot
auto out_lens = a.lens(); auto out_lens = a.lens();
out_lens[dim_1] = b.lens()[dim_1]; out_lens[dim_1] = b.lens()[dim_1];
if(inputs.size() == 3 && out_lens != inputs.at(2).lens())
{
MIGRAPHX_THROW("QUANT_DOT: dimension mismatch, operand C: {" +
to_string_range(inputs.at(2).lens()) +
"}, cannot add to operand A * B: {" + to_string_range(out_lens) + "}");
}
if(inputs.size() == 3 && inputs.at(2).type() != shape::int32_type)
{
MIGRAPHX_THROW("QUANT_DOT: operand C type must be int32");
}
return {shape::int32_type, out_lens}; return {shape::int32_type, out_lens};
} }
}; };
......
...@@ -37,7 +37,7 @@ struct rnn_var_sl_shift_output ...@@ -37,7 +37,7 @@ struct rnn_var_sl_shift_output
argument compute(const shape& output_shape, std::vector<argument> args) const argument compute(const shape& output_shape, std::vector<argument> args) const
{ {
argument result{output_shape}; argument result{output_shape};
int64_t max_len = static_cast<int64_t>(output_shape.lens()[0]); int64_t max_len = output_shape.lens()[0];
visit_all(result, args[0])([&](auto output, auto input) { visit_all(result, args[0])([&](auto output, auto input) {
using value_type = typename decltype(output)::value_type; using value_type = typename decltype(output)::value_type;
args[1].visit([&](auto seq_lens) { args[1].visit([&](auto seq_lens) {
...@@ -76,7 +76,7 @@ struct rnn_var_sl_shift_sequence ...@@ -76,7 +76,7 @@ struct rnn_var_sl_shift_sequence
argument compute(const shape& output_shape, std::vector<argument> args) const argument compute(const shape& output_shape, std::vector<argument> args) const
{ {
argument result{output_shape}; argument result{output_shape};
int64_t max_len = static_cast<int64_t>(output_shape.lens()[0]); int64_t max_len = output_shape.lens()[0];
visit_all(result, args[0])([&](auto output, auto input) { visit_all(result, args[0])([&](auto output, auto input) {
using value_type = typename decltype(output)::value_type; using value_type = typename decltype(output)::value_type;
args[1].visit([&](auto seq_lens) { args[1].visit([&](auto seq_lens) {
......
#ifndef MIGRAPHX_GUARD_OPERATORS_ROIALIGN_HPP
#define MIGRAPHX_GUARD_OPERATORS_ROIALIGN_HPP
#include <limits>
#include <migraphx/check_shapes.hpp>
#include <migraphx/config.hpp>
#include <migraphx/argument.hpp>
#include <migraphx/par_for.hpp>
#include <migraphx/dfor.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/shape_for_each.hpp>
#include <cmath>
#include <numeric>
#include <utility>
#include <vector>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace op {
struct roialign
{
std::string coord_trans_mode = "half_pixel";
std::string mode = "avg";
int64_t output_height = 1;
int64_t output_width = 1;
int64_t sampling_ratio = 0;
float spatial_scale = 1.0f;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return pack(f(self.coord_trans_mode, "coordinate_transformation_mode"),
f(self.mode, "mode"),
f(self.output_height, "output_height"),
f(self.output_width, "output_width"),
f(self.sampling_ratio, "sampling_ratio"),
f(self.spatial_scale, "spatial_scale"));
}
std::string name() const { return "roialign"; }
shape compute_shape(std::vector<shape> inputs) const
{
check_shapes{inputs, *this}.has(3).standard();
auto x_lens = inputs.at(0).lens();
auto roi_lens = inputs.at(1).lens();
auto bi_lens = inputs.at(2).lens();
auto type = inputs.at(0).type();
// check input correct
if(bi_lens.size() != 1)
{
MIGRAPHX_THROW("ROIALIGN: batch indices should be 1 dimension!");
}
if(roi_lens.size() != 2 or roi_lens.at(1) != 4)
{
MIGRAPHX_THROW(
"ROIALIGN: rois should be 2 dimensions, and the second dim should be 4!");
}
if(roi_lens.front() != bi_lens.front())
{
MIGRAPHX_THROW("ROIALIGN: rois and batch indices inputs should have the same number!");
}
std::vector<std::size_t> out_lens = x_lens;
out_lens[0] = roi_lens[0];
out_lens[2] = output_height;
out_lens[3] = output_width;
return {type, out_lens};
}
struct pos_weight
{
// neighbor indices for the bilinear interpolation
std::array<std::size_t, 4> pos = {0, 0, 0, 0};
// neighbor weights for the bilinear interpolation
std::array<float, 4> w = {0.0f, 0.0f, 0.0f, 0.0f};
};
auto calc_pos_weight(const std::array<std::size_t, 2>& dims,
const shape& comp_s,
const std::array<float, 2>& roi_start,
const std::array<float, 2>& bin_size,
const std::array<std::size_t, 2>& bin_grid_size) const
{
std::vector<pos_weight> results(bin_grid_size[0] * bin_grid_size[1] * output_height *
output_width);
shape_for_each(comp_s, [&](auto idx) {
std::array<std::size_t, 2> p = {idx[0], idx[1]};
std::array<std::size_t, 2> i = {idx[2], idx[3]};
auto index = comp_s.index(idx);
std::array<float, 2> xy{};
std::array<int64_t, 2> low{};
std::array<int64_t, 2> high{};
for(auto ii : range(p.size()))
{
xy[ii] = roi_start[ii] + p[ii] * bin_size[ii] +
(i[ii] + .5f) * bin_size[ii] / bin_grid_size[ii];
xy[ii] = (coord_trans_mode == "output_half_pixel") ? (xy[ii] - 0.5f) : xy[ii];
if(xy[ii] < -1.0 or xy[ii] > dims[ii])
{
results[index] = pos_weight{};
return;
}
xy[ii] = std::max(xy[ii], 0.0f);
low[ii] = xy[ii];
high[ii] = low[ii] + 1;
if(low[ii] >= dims[ii] - 1)
{
xy[ii] = high[ii] = low[ii] = dims[ii] - 1;
}
}
results[index].pos = {low[0] * dims[1] + low[1],
low[0] * dims[1] + high[1],
high[0] * dims[1] + low[1],
high[0] * dims[1] + high[1]};
float ly = xy[0] - low[0];
float lx = xy[1] - low[1];
float hy = 1.0f - ly;
float hx = 1.0f - lx;
// save weights and indeces
results[index].w = {hy * hx, hy * lx, ly * hx, ly * lx};
});
return results;
}
struct max_pool
{
double init() { return std::numeric_limits<double>::lowest(); }
double operator()(double x, double y) { return std::max(x, y); }
double final(double x, std::size_t) { return (x); }
};
struct avg_pool
{
double init() { return 0.0; }
double operator()(double x, double y) { return x + y; }
double final(double x, std::size_t y) { return (y == 0) ? 0.0 : (x / y); }
};
template <class T, class Op>
std::tuple<double, int64_t> calc_pooling(const T& data,
const std::array<std::size_t, 2>& bin_grid_size,
const std::vector<pos_weight>& pos_weights,
int64_t index,
Op op) const
{
double output_val = op.init();
const int64_t count = bin_grid_size[0] * bin_grid_size[1];
dfor(bin_grid_size[0], bin_grid_size[1])([&](auto, auto) {
const auto& pc = pos_weights[index];
std::array<double, 4> wv;
std::transform(
pc.w.begin(), pc.w.end(), pc.pos.begin(), wv.begin(), [&](auto w, auto pos) {
return *(data + pos) * w;
});
output_val = std::accumulate(wv.begin(), wv.end(), output_val, op);
index += 1;
});
output_val = op.final(output_val, count);
return {output_val, index};
}
argument compute(const shape& output_shape, std::vector<argument> args) const
{
argument result{output_shape};
const auto& out_lens = output_shape.lens();
int64_t n_rois = out_lens[0];
std::size_t channels = out_lens[1];
// output dims of height and width, in all 2-dim arrays, the first dim
// is for height and second dim is for width
std::array<std::size_t, 2> out_dims = {out_lens[2], out_lens[3]};
const auto& x_lens = args.at(0).get_shape().lens();
// input dims of height and width
std::array<std::size_t, 2> in_dims = {x_lens[2], x_lens[3]};
auto roi_s = args.at(1).get_shape();
visit_all(result, args.at(0), args.at(1))([&](auto output, auto x, auto roi) {
const auto* batch_indices = args.at(2).cast<int64_t>();
par_for(n_rois, [&](auto n) {
const auto bottom_data = x.begin();
const auto roi_batch_ind = batch_indices[n];
// Do not using rounding; this implementation detail is critical
std::array<float, 2> roi_starts = {
static_cast<float>(roi[roi_s.index({n, 1})] * spatial_scale),
static_cast<float>(roi[roi_s.index({n, 0})] * spatial_scale)};
std::array<float, 2> roi_ends = {
static_cast<float>(roi[roi_s.index({n, 3})] * spatial_scale),
static_cast<float>(roi[roi_s.index({n, 2})] * spatial_scale)};
// Force malformed ROIs to be 1x1
std::array<float, 2> roi_size{};
std::array<float, 2> bin_size{};
std::array<std::size_t, 2> bin_grid_size{};
for(auto ii : range(roi_size.size()))
{
roi_size[ii] = roi_ends[ii] - roi_starts[ii];
roi_size[ii] = std::max(roi_size[ii], 1.0f);
bin_size[ii] = roi_size[ii] / out_dims[ii];
bin_grid_size[ii] = (sampling_ratio > 0)
? sampling_ratio
: std::ceil(roi_size[ii] / out_dims[ii]);
}
// we want to precalculate indices and weights shared by all channels,
// this is the key point of optimization
std::vector<std::size_t> comp_lens = {
out_dims[0], out_dims[1], bin_grid_size[0], bin_grid_size[1]};
shape comp_s{shape::float_type, comp_lens};
auto pre_calc =
this->calc_pos_weight(in_dims, comp_s, roi_starts, bin_size, bin_grid_size);
std::vector<std::size_t> comp_lens1 = {channels, out_dims[0], out_dims[1]};
shape comp_s1{migraphx::shape::float_type, comp_lens1};
std::vector<int64_t> vec_index(channels, 0);
shape_for_each(comp_s1, [&](auto idx) {
auto c = idx[0];
auto ph = idx[1];
auto pw = idx[2];
const auto offset_bottom_data =
bottom_data + static_cast<int64_t>((roi_batch_ind * channels + c) *
in_dims[0] * in_dims[1]);
double output_val;
std::tie(output_val, vec_index[c]) =
(mode == "avg") ? this->calc_pooling(offset_bottom_data,
bin_grid_size,
pre_calc,
vec_index[c],
avg_pool{})
: this->calc_pooling(offset_bottom_data,
bin_grid_size,
pre_calc,
vec_index[c],
max_pool{});
output(n, c, ph, pw) = output_val;
});
});
});
return result;
}
};
} // namespace op
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
...@@ -45,7 +45,7 @@ struct topk ...@@ -45,7 +45,7 @@ struct topk
shape s_val{type, lens}; shape s_val{type, lens};
shape s_ind{shape::int64_type, lens}; shape s_ind{shape::int64_type, lens};
return shape({s_val, s_ind}); return {{s_val, s_ind}};
} }
template <class T, class Compare> template <class T, class Compare>
...@@ -131,7 +131,7 @@ struct topk ...@@ -131,7 +131,7 @@ struct topk
}); });
}); });
return argument({res_val, res_ind}); return {{res_val, res_ind}};
} }
}; };
......
...@@ -57,6 +57,8 @@ ...@@ -57,6 +57,8 @@
#include <migraphx/op/mul.hpp> #include <migraphx/op/mul.hpp>
#include <migraphx/op/multibroadcast.hpp> #include <migraphx/op/multibroadcast.hpp>
#include <migraphx/op/neg.hpp> #include <migraphx/op/neg.hpp>
#include <migraphx/op/nonmaxsuppression.hpp>
#include <migraphx/op/nonzero.hpp>
#include <migraphx/op/outline.hpp> #include <migraphx/op/outline.hpp>
#include <migraphx/op/pad.hpp> #include <migraphx/op/pad.hpp>
#include <migraphx/op/pooling.hpp> #include <migraphx/op/pooling.hpp>
...@@ -79,6 +81,7 @@ ...@@ -79,6 +81,7 @@
#include <migraphx/op/rnn_last_hs_output.hpp> #include <migraphx/op/rnn_last_hs_output.hpp>
#include <migraphx/op/rnn_variable_seq_lens.hpp> #include <migraphx/op/rnn_variable_seq_lens.hpp>
#include <migraphx/op/rnn_var_sl_last_output.hpp> #include <migraphx/op/rnn_var_sl_last_output.hpp>
#include <migraphx/op/roialign.hpp>
#include <migraphx/op/round.hpp> #include <migraphx/op/round.hpp>
#include <migraphx/op/rsqrt.hpp> #include <migraphx/op/rsqrt.hpp>
#include <migraphx/op/scalar.hpp> #include <migraphx/op/scalar.hpp>
......
...@@ -23,6 +23,8 @@ MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_TRACE_EVAL) ...@@ -23,6 +23,8 @@ MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_TRACE_EVAL)
struct program_impl; struct program_impl;
struct marker;
/** /**
* @brief Stores the instruction stream * @brief Stores the instruction stream
*/ */
...@@ -65,7 +67,10 @@ struct program ...@@ -65,7 +67,10 @@ struct program
void finalize(); void finalize();
void perf_report(std::ostream& os, std::size_t n, parameter_map params) const; void
perf_report(std::ostream& os, std::size_t n, parameter_map params, std::size_t batch = 1) const;
void mark(const parameter_map& params, marker&& m);
value to_value() const; value to_value() const;
void from_value(const value& v); void from_value(const value& v);
......
...@@ -106,7 +106,7 @@ argument run_loop(const LoopModel& model, ...@@ -106,7 +106,7 @@ argument run_loop(const LoopModel& model,
std::copy(in_args.begin() + 2, in_args.end(), out_args.begin()); std::copy(in_args.begin() + 2, in_args.end(), out_args.begin());
model.set_zero(ctx, scan_outputs, iter); model.set_zero(ctx, scan_outputs, iter);
return argument(out_args); return {out_args};
} }
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
......
...@@ -71,7 +71,7 @@ std::string trim(const std::string& s, F f) ...@@ -71,7 +71,7 @@ std::string trim(const std::string& s, F f)
{ {
auto start = std::find_if_not(s.begin(), s.end(), f); auto start = std::find_if_not(s.begin(), s.end(), f);
auto last = std::find_if_not(s.rbegin(), std::string::const_reverse_iterator(start), f).base(); auto last = std::find_if_not(s.rbegin(), std::string::const_reverse_iterator(start), f).base();
return std::string(start, last); return {start, last};
} }
inline std::string trim(const std::string& s) inline std::string trim(const std::string& s)
......
...@@ -11,49 +11,8 @@ inline namespace MIGRAPHX_INLINE_NS { ...@@ -11,49 +11,8 @@ inline namespace MIGRAPHX_INLINE_NS {
static void inline_submodule(module& m, instruction_ref ins, bool cond) static void inline_submodule(module& m, instruction_ref ins, bool cond)
{ {
const auto& mod_inputs = ins->module_inputs(); const auto& mod_inputs = ins->module_inputs();
const auto* smod = cond ? mod_inputs.at(0) : mod_inputs.at(1); module_ref smod = cond ? mod_inputs.at(0) : mod_inputs.at(1);
auto mod_outputs = m.insert_module_instructions(ins, smod);
std::unordered_map<instruction_ref, instruction_ref> map_ins;
std::vector<instruction_ref> mod_outputs;
for(auto sins : iterator_for(*smod))
{
instruction_ref copy_ins{};
if(sins->name() == "@literal")
{
auto l = sins->get_literal();
copy_ins = m.add_literal(l);
}
else if(sins->name() == "@param")
{
auto&& name = any_cast<builtin::param>(sins->get_operator()).parameter;
auto s = sins->get_shape();
copy_ins = m.add_parameter(name, s);
}
else if(sins->name() == "@outline")
{
auto s = sins->get_shape();
copy_ins = m.add_outline(s);
}
else
{
auto mod_args = sins->module_inputs();
auto inputs = sins->inputs();
std::vector<instruction_ref> copy_inputs(inputs.size());
std::transform(inputs.begin(), inputs.end(), copy_inputs.begin(), [&](auto i) {
return contains(map_ins, i) ? map_ins[i] : i;
});
if(sins->name() == "@return")
{
mod_outputs = copy_inputs;
break;
}
copy_ins = m.insert_instruction(ins, sins->get_operator(), copy_inputs, mod_args);
}
map_ins[sins] = copy_ins;
mod_outputs = {copy_ins};
}
auto ins_outputs = ins->outputs(); auto ins_outputs = ins->outputs();
assert(mod_outputs.size() >= ins_outputs.size()); assert(mod_outputs.size() >= ins_outputs.size());
......
...@@ -468,5 +468,11 @@ std::vector<shape> try_compute_shape(const operation& op, const std::vector<shap ...@@ -468,5 +468,11 @@ std::vector<shape> try_compute_shape(const operation& op, const std::vector<shap
} }
return {new_shape}; return {new_shape};
} }
migraphx::instruction* as_address(const instruction_ref& ins) noexcept
{
return std::addressof(*ins);
}
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx } // namespace migraphx
#include <iterator>
#include <migraphx/module.hpp> #include <migraphx/module.hpp>
#include <migraphx/stringutils.hpp> #include <migraphx/stringutils.hpp>
#include <migraphx/instruction.hpp> #include <migraphx/instruction.hpp>
...@@ -302,6 +303,55 @@ instruction_ref module::move_instructions(instruction_ref src, instruction_ref d ...@@ -302,6 +303,55 @@ instruction_ref module::move_instructions(instruction_ref src, instruction_ref d
return src; return src;
} }
std::vector<instruction_ref> module::insert_module_instructions(
instruction_ref ins, module_ref m, std::unordered_map<instruction_ref, instruction_ref> map_ins)
{
std::vector<instruction_ref> mod_outputs;
for(auto sins : iterator_for(*m))
{
if(contains(map_ins, sins))
continue;
instruction_ref copy_ins;
if(sins->name() == "@literal")
{
auto l = sins->get_literal();
copy_ins = this->add_literal(l);
}
else if(sins->name() == "@param")
{
auto&& name = any_cast<builtin::param>(sins->get_operator()).parameter;
auto s = sins->get_shape();
copy_ins = this->add_parameter(name, s);
}
else if(sins->name() == "@outline")
{
auto s = sins->get_shape();
copy_ins = this->add_outline(s);
}
else
{
auto mod_args = sins->module_inputs();
auto inputs = sins->inputs();
std::vector<instruction_ref> copy_inputs(inputs.size());
std::transform(inputs.begin(), inputs.end(), copy_inputs.begin(), [&](auto i) {
return contains(map_ins, i) ? map_ins[i] : i;
});
if(sins->name() == "@return")
{
mod_outputs = copy_inputs;
break;
}
copy_ins = this->insert_instruction(ins, sins->get_operator(), copy_inputs, mod_args);
}
map_ins[sins] = copy_ins;
}
if(mod_outputs.empty())
mod_outputs = {map_ins.at(std::prev(m->end()))};
return mod_outputs;
}
instruction_ref module::add_literal(literal l) instruction_ref module::add_literal(literal l)
{ {
impl->emplace_front(std::move(l)); impl->emplace_front(std::move(l));
...@@ -332,6 +382,20 @@ instruction_ref module::add_return(std::vector<instruction_ref> args) ...@@ -332,6 +382,20 @@ instruction_ref module::add_return(std::vector<instruction_ref> args)
return result; return result;
} }
instruction_ref module::replace_return(std::vector<instruction_ref> args)
{
auto last = std::prev(this->end());
// If there is no return then add a return
if(last->name() != "@return")
return this->add_return(args);
shape r = compute_shape(last->get_operator(), args);
instruction::replace(last, last->get_operator(), r, std::move(args));
assert(last->valid(begin()));
return last;
}
shape module::get_parameter_shape(std::string name) const shape module::get_parameter_shape(std::string name) const
{ {
auto ins = std::find_if( auto ins = std::find_if(
......
...@@ -20,7 +20,7 @@ auto tune_attribute(const std::vector<int64_t>& vec, ...@@ -20,7 +20,7 @@ auto tune_attribute(const std::vector<int64_t>& vec,
const std::vector<std::size_t>& lens) const std::vector<std::size_t>& lens)
{ {
std::vector<int64_t> result(vec); std::vector<int64_t> result(vec);
int64_t n_rank = static_cast<int64_t>(lens.size()); int64_t n_rank = lens.size();
std::vector<op::normalize_attribute> vec_attrs = val.to_vector<op::normalize_attribute>(); std::vector<op::normalize_attribute> vec_attrs = val.to_vector<op::normalize_attribute>();
if(contains(vec_attrs, op::normalize_attribute::use_output)) if(contains(vec_attrs, op::normalize_attribute::use_output))
{ {
......
...@@ -39,7 +39,7 @@ struct parse_gather_elements : op_parser<parse_gather_elements> ...@@ -39,7 +39,7 @@ struct parse_gather_elements : op_parser<parse_gather_elements>
int tuned_axis = tune_axis(n_rank, axis, opd.op_name); int tuned_axis = tune_axis(n_rank, axis, opd.op_name);
auto axis_stride = data_s.strides()[tuned_axis]; auto axis_stride = data_s.strides()[tuned_axis];
int64_t data_elem_num = static_cast<int64_t>(data_s.elements()); int64_t data_elem_num = data_s.elements();
// reshape the input data as one dimension and used as input data // reshape the input data as one dimension and used as input data
// to the gather operator // to the gather operator
arg_data = info.add_instruction(make_op("reshape", {{"dims", {data_elem_num}}}), arg_data); arg_data = info.add_instruction(make_op("reshape", {{"dims", {data_elem_num}}}), arg_data);
......
...@@ -61,7 +61,7 @@ struct parse_gemm : op_parser<parse_gemm> ...@@ -61,7 +61,7 @@ struct parse_gemm : op_parser<parse_gemm>
? info.add_instruction(make_op("transpose", {{"permutation", perm}}), args[1]) ? info.add_instruction(make_op("transpose", {{"permutation", perm}}), args[1])
: args[1]; : args[1];
auto ret = info.add_instruction(make_op("dot", {{"alpha", 1.0f}, {"beta", 0.0f}}), l1, l2); auto ret = info.add_instruction(make_op("dot"), l1, l2);
if(args.size() == 3) if(args.size() == 3)
{ {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment