Commit 7e297b13 authored by Paul's avatar Paul
Browse files

Merge

parents 86ea5e91 aa7ff911
#ifndef MIGRAPHX_GUARD_OPERATORS_ROIALIGN_HPP
#define MIGRAPHX_GUARD_OPERATORS_ROIALIGN_HPP
#include <limits>
#include <migraphx/check_shapes.hpp>
#include <migraphx/op/common.hpp>
#include <migraphx/config.hpp>
#include <migraphx/argument.hpp>
#include <migraphx/par_for.hpp>
#include <migraphx/dfor.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/shape_for_each.hpp>
#include <cmath>
#include <numeric>
#include <utility>
#include <vector>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace op {
struct roialign
{
std::string coord_trans_mode = "half_pixel";
pooling_mode mode = {pooling_mode::average};
int64_t output_height = 1;
int64_t output_width = 1;
int64_t sampling_ratio = 0;
float spatial_scale = 1.0f;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return pack(f(self.coord_trans_mode, "coordinate_transformation_mode"),
f(self.mode, "mode"),
f(self.output_height, "output_height"),
f(self.output_width, "output_width"),
f(self.sampling_ratio, "sampling_ratio"),
f(self.spatial_scale, "spatial_scale"));
}
std::string name() const { return "roialign"; }
shape compute_shape(std::vector<shape> inputs) const
{
check_shapes{inputs, *this}.has(3);
auto x_lens = inputs.at(0).lens();
auto roi_lens = inputs.at(1).lens();
auto bi_lens = inputs.at(2).lens();
auto type = inputs.at(0).type();
// check input correct
if(bi_lens.size() != 1)
{
MIGRAPHX_THROW("ROIALIGN: batch indices should be 1 dimension!");
}
if(roi_lens.size() != 2 or roi_lens.at(1) != 4)
{
MIGRAPHX_THROW(
"ROIALIGN: rois should be 2 dimensions, and the second dim should be 4!");
}
if(roi_lens.front() != bi_lens.front())
{
MIGRAPHX_THROW("ROIALIGN: rois and batch indices inputs should have the same number!");
}
std::vector<std::size_t> out_lens = x_lens;
out_lens[0] = roi_lens[0];
out_lens[2] = output_height;
out_lens[3] = output_width;
return {type, out_lens};
}
struct pos_weight
{
// neighbor indices for the bilinear interpolation
std::array<std::size_t, 4> pos = {0, 0, 0, 0};
// neighbor weights for the bilinear interpolation
std::array<float, 4> w = {0.0f, 0.0f, 0.0f, 0.0f};
};
auto calc_pos_weight(const std::array<std::size_t, 2>& dims,
const shape& comp_s,
const std::array<float, 2>& roi_start,
const std::array<float, 2>& bin_size,
const std::array<std::size_t, 2>& bin_grid_size) const
{
std::vector<pos_weight> results(bin_grid_size[0] * bin_grid_size[1] * output_height *
output_width);
shape_for_each(comp_s, [&](auto idx) {
std::array<std::size_t, 2> p = {idx[0], idx[1]};
std::array<std::size_t, 2> i = {idx[2], idx[3]};
auto index = comp_s.index(idx);
std::array<float, 2> xy{};
std::array<int64_t, 2> low{};
std::array<int64_t, 2> high{};
for(auto ii : range(p.size()))
{
xy[ii] = roi_start[ii] + p[ii] * bin_size[ii] +
(i[ii] + .5f) * bin_size[ii] / bin_grid_size[ii];
xy[ii] = (coord_trans_mode == "output_half_pixel") ? (xy[ii] - 0.5f) : xy[ii];
if(xy[ii] < -1.0 or xy[ii] > dims[ii])
{
results[index] = pos_weight{};
return;
}
xy[ii] = std::max(xy[ii], 0.0f);
low[ii] = xy[ii];
high[ii] = low[ii] + 1;
if(low[ii] >= dims[ii] - 1)
{
xy[ii] = high[ii] = low[ii] = dims[ii] - 1;
}
}
results[index].pos = {low[0] * dims[1] + low[1],
low[0] * dims[1] + high[1],
high[0] * dims[1] + low[1],
high[0] * dims[1] + high[1]};
float ly = xy[0] - low[0];
float lx = xy[1] - low[1];
float hy = 1.0f - ly;
float hx = 1.0f - lx;
// save weights and indeces
results[index].w = {hy * hx, hy * lx, ly * hx, ly * lx};
});
return results;
}
struct max_pool
{
double init() { return std::numeric_limits<double>::lowest(); }
double operator()(double x, double y) { return std::max(x, y); }
double final(double x, std::size_t) { return (x); }
};
struct avg_pool
{
double init() { return 0.0; }
double operator()(double x, double y) { return x + y; }
double final(double x, std::size_t y) { return (y == 0) ? 0.0 : (x / y); }
};
template <class T, class Op>
std::tuple<double, int64_t> calc_pooling(const T& data,
const std::array<std::size_t, 2>& bin_grid_size,
const std::vector<pos_weight>& pos_weights,
int64_t index,
Op op) const
{
double output_val = op.init();
const int64_t count = bin_grid_size[0] * bin_grid_size[1];
dfor(bin_grid_size[0], bin_grid_size[1])([&](auto, auto) {
const auto& pc = pos_weights[index];
std::array<double, 4> wv;
std::transform(
pc.w.begin(), pc.w.end(), pc.pos.begin(), wv.begin(), [&](auto w, auto pos) {
return *(data + pos) * w;
});
output_val = std::accumulate(wv.begin(), wv.end(), output_val, op);
index += 1;
});
output_val = op.final(output_val, count);
return {output_val, index};
}
argument compute(const shape& output_shape, std::vector<argument> args) const
{
argument result{output_shape};
const auto& out_lens = output_shape.lens();
int64_t n_rois = out_lens[0];
std::size_t channels = out_lens[1];
// output dims of height and width, in all 2-dim arrays, the first dim
// is for height and second dim is for width
std::array<std::size_t, 2> out_dims = {out_lens[2], out_lens[3]};
const auto& x_lens = args.at(0).get_shape().lens();
// input dims of height and width
std::array<std::size_t, 2> in_dims = {x_lens[2], x_lens[3]};
auto roi_s = args.at(1).get_shape();
visit_all(result, args.at(0), args.at(1))([&](auto output, auto x, auto roi) {
const auto* batch_indices = args.at(2).cast<int64_t>();
par_for(n_rois, [&](auto n) {
const auto bottom_data = x.begin();
const auto roi_batch_ind = batch_indices[n];
// Do not using rounding; this implementation detail is critical
std::array<float, 2> roi_starts = {
static_cast<float>(roi[roi_s.index({n, 1})] * spatial_scale),
static_cast<float>(roi[roi_s.index({n, 0})] * spatial_scale)};
std::array<float, 2> roi_ends = {
static_cast<float>(roi[roi_s.index({n, 3})] * spatial_scale),
static_cast<float>(roi[roi_s.index({n, 2})] * spatial_scale)};
// Force malformed ROIs to be 1x1
std::array<float, 2> roi_size{};
std::array<float, 2> bin_size{};
std::array<std::size_t, 2> bin_grid_size{};
for(auto ii : range(roi_size.size()))
{
roi_size[ii] = roi_ends[ii] - roi_starts[ii];
roi_size[ii] = std::max(roi_size[ii], 1.0f);
bin_size[ii] = roi_size[ii] / out_dims[ii];
bin_grid_size[ii] = (sampling_ratio > 0)
? sampling_ratio
: std::ceil(roi_size[ii] / out_dims[ii]);
}
// we want to precalculate indices and weights shared by all channels,
// this is the key point of optimization
std::vector<std::size_t> comp_lens = {
out_dims[0], out_dims[1], bin_grid_size[0], bin_grid_size[1]};
shape comp_s{shape::float_type, comp_lens};
auto pre_calc =
this->calc_pos_weight(in_dims, comp_s, roi_starts, bin_size, bin_grid_size);
std::vector<std::size_t> comp_lens1 = {channels, out_dims[0], out_dims[1]};
shape comp_s1{migraphx::shape::float_type, comp_lens1};
std::vector<int64_t> vec_index(channels, 0);
shape_for_each(comp_s1, [&](auto idx) {
auto c = idx[0];
auto ph = idx[1];
auto pw = idx[2];
const auto offset_bottom_data =
bottom_data + static_cast<int64_t>((roi_batch_ind * channels + c) *
in_dims[0] * in_dims[1]);
double output_val;
std::tie(output_val, vec_index[c]) =
(mode == migraphx::op::pooling_mode::average)
? this->calc_pooling(offset_bottom_data,
bin_grid_size,
pre_calc,
vec_index[c],
avg_pool{})
: this->calc_pooling(offset_bottom_data,
bin_grid_size,
pre_calc,
vec_index[c],
max_pool{});
output(n, c, ph, pw) = output_val;
});
});
});
return result;
}
};
} // namespace op
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
......@@ -40,7 +40,6 @@ struct scalar
{
return args[0].reshape(output_shape);
}
lifetime get_lifetime() const { return lifetime::borrow; }
std::ptrdiff_t output_alias(const std::vector<shape>&) const { return 0; }
};
......
......@@ -8,6 +8,7 @@
#include <migraphx/shape_for_each.hpp>
#include <migraphx/config.hpp>
#include <migraphx/value.hpp>
#include <migraphx/op/name.hpp>
#include <migraphx/op/normalize_attribute.hpp>
#include <cmath>
#include <utility>
......@@ -16,7 +17,17 @@ namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace op {
struct scatter
// The scatter operator fetches a subset of data given by an index array and then performs a
// reduction operation (add, multiply, or just set the data) on each element returned. We implement
// it as a separate derived struct for each of the three reduction methods. The related operator
// scatterND is a generalization that works on a set of 3 tensors of different ranks. The
// complementary operations are gather/gatherND.
//
// This is a template for deriving child structs from. Each child needs to define
// only a reduction() method. Names are automatically handled by the op_name template.
template <class Derived>
struct scatter : op_name<Derived>
{
int64_t axis = 0;
......@@ -33,29 +44,44 @@ struct scatter
return {{"normalize_axes", normalize}};
}
std::string name() const { return "scatter"; }
shape normalize_compute_shape(std::vector<shape> inputs) const
{
check_shapes{inputs, *this}.has(3).standard();
return inputs.front();
// If non-packed, this converts to a packed output while preserving permutation of tensor
return inputs.front().with_lens(inputs.front().lens());
}
argument compute(const shape& output_shape, std::vector<argument> args) const
{
argument result{output_shape};
// max dimension in axis
auto& self = static_cast<const Derived&>(*this);
// max dimension in each axis
auto axis_dim_size = output_shape.lens()[axis];
// cast all arguments as correct type
visit_all(result, args[0], args[2])([&](auto output, auto data, auto update) {
// copy all of data to output
std::copy(data.begin(), data.end(), output.begin());
args[1].visit([&](auto indices) {
auto ind_s = indices.get_shape();
// iterate through items in shape
shape_for_each(ind_s, [&](const auto& idx) {
auto out_idx = idx;
auto index = indices[ind_s.index(idx)];
auto out_idx = idx;
// Overloaded tensor_view::() invokes indexing logic of
// std::size_t shape::index(std::size_t i) const
// which handles nonstandard shapes correctly
auto index = indices(idx.begin(), idx.end());
// normalize negative indexes (may be redundant after using
// normalize_compute_shape())
index = (index < 0) ? index + axis_dim_size : index;
out_idx[axis] = index;
output[output_shape.index(out_idx)] = update[ind_s.index(idx)];
// look up the appropriate locations in output, using idx and out_idx.
// call reduction() method of derived struct to copy and reduce that element
self.reduction()(output(out_idx.begin(), out_idx.end()),
update(idx.begin(), idx.end()));
});
});
});
......
#ifndef MIGRAPHX_GUARD_OPERATORS_SCATTER_ADD_HPP
#define MIGRAPHX_GUARD_OPERATORS_SCATTER_ADD_HPP
#include <array>
#include <migraphx/check_shapes.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/streamutils.hpp>
#include <migraphx/shape_for_each.hpp>
#include <migraphx/config.hpp>
#include <migraphx/value.hpp>
#include <migraphx/op/normalize_attribute.hpp>
#include <cmath>
#include <utility>
#include <migraphx/op/scatter.hpp>
// Scatter op. with "add" function as reduction.
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace op {
struct scatter_add : scatter<scatter_add>
{
// reduction (pointwise operation) is called by the parent struct's compute() method.
// It works much like a virtual function overload.
// For the scatter methods, there are three different reduction functions.
auto reduction() const
{
return [](auto& x, const auto& y) { x += y; };
}
// name of this struct is automatically assigned by the op_name<>
};
} // namespace op
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
#ifndef MIGRAPHX_GUARD_OPERATORS_SCATTER_MUL_HPP
#define MIGRAPHX_GUARD_OPERATORS_SCATTER_MUL_HPP
#include <array>
#include <migraphx/check_shapes.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/streamutils.hpp>
#include <migraphx/shape_for_each.hpp>
#include <migraphx/config.hpp>
#include <migraphx/value.hpp>
#include <migraphx/op/normalize_attribute.hpp>
#include <cmath>
#include <utility>
#include <migraphx/op/scatter.hpp>
// Scatter op. with "multiply" as the reduction function.
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace op {
struct scatter_mul : scatter<scatter_mul>
{
// reduction (pointwise operation) is called by the parent struct's compute() method.
// It works much like a virtual function overload.
// For the scatter operators, there are three different reduction functions.
auto reduction() const
{
return [](auto& x, const auto& y) { x *= y; };
}
};
} // namespace op
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
#ifndef MIGRAPHX_GUARD_OPERATORS_SCATTER_NONE_HPP
#define MIGRAPHX_GUARD_OPERATORS_SCATTER_NONE_HPP
#include <array>
#include <migraphx/check_shapes.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/streamutils.hpp>
#include <migraphx/shape_for_each.hpp>
#include <migraphx/config.hpp>
#include <migraphx/value.hpp>
#include <migraphx/op/normalize_attribute.hpp>
#include <migraphx/op/scatter.hpp>
#include <cmath>
#include <utility>
// Scatter op. with "none" as the reduction function (just copies the value). This is identical to
// the previously existing Scatter op.
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace op {
struct scatter_none : scatter<scatter_none>
{
// reduction (pointwise operation) is called by the parent struct's compute() method.
// It works much like a virtual function overload.
// For the scatter operators, there are three different reduction functions.
auto reduction() const
{
return [](auto& x, const auto& y) { x = y; };
}
};
} // namespace op
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
#ifndef MIGRAPHX_GUARD_OPERATORS_SCATTERND_ADD_HPP
#define MIGRAPHX_GUARD_OPERATORS_SCATTERND_ADD_HPP
#include <migraphx/op/scatternd_op.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace op {
struct scatternd_add : scatternd_op<scatternd_add>
{
scatternd_add() {}
auto reduction() const
{
return [](auto& x, const auto& y) { x += y; };
}
};
} // namespace op
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
#ifndef MIGRAPHX_GUARD_OPERATORS_SCATTERND_MUL_HPP
#define MIGRAPHX_GUARD_OPERATORS_SCATTERND_MUL_HPP
#include <migraphx/op/scatternd_op.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace op {
struct scatternd_mul : scatternd_op<scatternd_mul>
{
scatternd_mul() {}
auto reduction() const
{
return [](auto& x, const auto& y) { x *= y; };
}
};
} // namespace op
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
#ifndef MIGRAPHX_GUARD_OPERATORS_SCATTERND_NONE_HPP
#define MIGRAPHX_GUARD_OPERATORS_SCATTERND_NONE_HPP
#include <migraphx/op/scatternd_op.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace op {
struct scatternd_none : scatternd_op<scatternd_none>
{
scatternd_none() {}
auto reduction() const
{
return [](auto& x, const auto& y) { x = y; };
}
};
} // namespace op
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
#ifndef MIGRAPHX_GUARD_OPERATORS_SCATTERND_OP_HPP
#define MIGRAPHX_GUARD_OPERATORS_SCATTERND_OP_HPP
#include <migraphx/op/name.hpp>
#include <migraphx/check_shapes.hpp>
#include <migraphx/par_for.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace op {
template <class Derived>
struct scatternd_op : op_name<Derived>
{
shape compute_shape(std::vector<shape> inputs) const
{
check_shapes{inputs, *this}.has(3);
auto r = inputs.front().lens().size();
auto q = inputs.at(1).lens().size();
auto k = inputs.at(1).lens().back();
auto ind_lens = inputs.at(1).lens();
auto upd_lens = inputs.back().lens();
auto data_lens = inputs.front().lens();
if(k > r)
MIGRAPHX_THROW("ScatterND: index of size " + std::to_string(k) +
" is too large for tensor of rank " + std::to_string(r));
if(not(std::equal(ind_lens.begin(), ind_lens.begin() + q - 1, upd_lens.begin()) and
std::equal(data_lens.begin() + k, data_lens.end(), upd_lens.begin() + q - 1)))
MIGRAPHX_THROW("ScatterND: incorrect update shape. update.lens != indices.lens[0:q-1] "
"++ data.lens[k:r-1]");
auto s = inputs.front();
if(s.broadcasted())
{
return {s.type(), s.lens()};
}
else
{
return s.with_lens(s.lens());
}
}
argument compute(const shape& output_shape, std::vector<argument> args) const
{
argument result{output_shape};
auto& self = static_cast<const Derived&>(*this);
visit_all(result, args[0], args[2])([&](auto output, auto data, auto updates) {
std::copy(data.begin(), data.end(), output.begin());
args[1].visit([&](auto indices) {
auto updates_shape = updates.get_shape();
auto updates_std = shape{updates_shape.type(), updates_shape.lens()};
auto indices_shape = indices.get_shape();
auto k = indices_shape.lens().back();
auto q = indices_shape.lens().size();
auto r = output_shape.lens().size();
par_for(updates_shape.elements(), [&](const auto i) {
auto updates_idx = updates_std.multi(i);
std::vector<std::size_t> indices_idx(q, 0);
std::copy(
updates_idx.begin(), updates_idx.begin() + q - 1, indices_idx.begin());
auto index_start = indices.begin() +
indices_shape.index(indices_idx.begin(), indices_idx.end());
auto index_end = index_start + k;
std::vector<std::size_t> out_idx(r, 0);
std::copy(index_start, index_end, out_idx.begin());
std::copy(updates_idx.begin() + q - 1, updates_idx.end(), out_idx.begin() + k);
self.reduction()(output[output_shape.index(out_idx)], updates[i]);
});
});
});
return result;
}
auto init() const {}
scatternd_op() {}
};
} // namespace op
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
......@@ -18,6 +18,7 @@ namespace op {
struct sigmoid : unary<sigmoid>
{
std::string point_op() const { return "1.f / (1.f + ${function:exp}(-${0}))"; }
auto apply() const
{
return [](auto x) { return 1.f / (1.f + std::exp(-x)); };
......
......@@ -18,6 +18,7 @@ namespace op {
struct sign : unary<sign>
{
std::string point_op() const { return "(${0} > 0 ? 1 : ((${0} < 0) ? -1 : 0))"; }
auto apply() const
{
return [](auto x) { return (x > 0 ? 1 : ((x < 0) ? -1 : 0)); };
......
......@@ -37,48 +37,53 @@ struct squeeze
std::string name() const { return "squeeze"; }
shape normalize_compute_shape(std::vector<shape> inputs) const
{
check_shapes{inputs, *this}.has(1).standard();
check_shapes{inputs, *this}.has(1);
auto input_shape = inputs[0];
auto type = input_shape.type();
auto old_lens = input_shape.lens();
auto old_strides = input_shape.strides();
if(std::any_of(axes.begin(), axes.end(), [&](auto axis) { return old_lens[axis] != 1; }))
{
MIGRAPHX_THROW("squeeze axis dimension should be equal to 1");
}
std::vector<std::size_t> new_lens;
std::vector<std::size_t> new_strides;
if(axes.empty())
{
std::copy_if(old_lens.begin(),
old_lens.end(),
std::back_inserter(new_lens),
[](auto len) { return len != 1; });
for(auto i : range(old_lens.size()))
{
if(old_lens[i] != 1)
{
new_lens.push_back(old_lens[i]);
new_strides.push_back(old_strides[i]);
}
}
}
else
{
for(std::size_t i = 0; i < old_lens.size(); i++)
for(auto i : range(old_lens.size()))
{
if(std::find(axes.begin(), axes.end(), i) == axes.end())
{
new_lens.push_back(old_lens[i]);
new_strides.push_back(old_strides[i]);
}
}
}
if(new_lens.empty())
{
return shape{type};
}
else
{
return shape{type, new_lens};
return shape{type, new_lens, new_strides};
}
}
argument compute(shape output_shape, std::vector<argument> args) const
{
return args[0].reshape(output_shape);
}
lifetime get_lifetime() const { return lifetime::borrow; }
std::ptrdiff_t output_alias(const std::vector<shape>&) const { return 0; }
};
......
......@@ -72,8 +72,6 @@ struct step
return args[0].reshape(output_shape);
}
lifetime get_lifetime() const { return lifetime::borrow; }
std::ptrdiff_t output_alias(const std::vector<shape>&) const { return 0; }
};
......
#ifndef MIGRAPHX_GUARD_OPERATORS_GATHER_HPP
#define MIGRAPHX_GUARD_OPERATORS_GATHER_HPP
#include <algorithm>
#include <migraphx/check_shapes.hpp>
#include <migraphx/config.hpp>
#include <migraphx/op/normalize_attribute.hpp>
#include <migraphx/par_for.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/value.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace op {
struct topk
{
int64_t k = 1;
int64_t axis = 0;
bool largest = true;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return pack(f(self.k, "k"), f(self.axis, "axis"), f(self.largest, "largest"));
}
value attributes() const
{
value normalize;
normalize["axis"] = value::array{normalize_attribute::include_min};
return {{"normalize_axes", normalize}};
}
std::string name() const { return "topk"; }
shape normalize_compute_shape(std::vector<shape> inputs) const
{
check_shapes{inputs, *this}.has(1).standard();
auto lens = inputs.at(0).lens();
auto type = inputs.at(0).type();
lens[axis] = k;
shape s_val{type, lens};
shape s_ind{shape::int64_type, lens};
return {{s_val, s_ind}};
}
template <class T, class Compare>
struct heap_vector
{
std::vector<T> data;
Compare compare;
heap_vector(const std::vector<T>& val, Compare comp) : data(val), compare(std::move(comp))
{
std::make_heap(data.begin(), data.end(), compare);
}
void try_push(T val)
{
if(not compare(val, data.front()))
return;
std::pop_heap(data.begin(), data.end(), compare);
data.back() = val;
std::push_heap(data.begin(), data.end(), compare);
}
std::vector<T> sort()
{
auto sorted_data = data;
std::sort_heap(sorted_data.begin(), sorted_data.end(), compare);
return sorted_data;
}
};
template <class T, class Compare>
heap_vector<T, Compare> make_heap(std::vector<T> val, Compare compare) const
{
return {std::move(val), std::move(compare)};
}
argument compute(const shape& output_shape, std::vector<argument> args) const
{
auto vec_ss = output_shape.sub_shapes();
argument res_val{vec_ss.front()};
argument res_ind{vec_ss.back()};
auto in_s = args.front().get_shape();
auto out_s = vec_ss.front();
auto comp_lens = in_s.lens();
auto axis_dim = comp_lens[axis];
// compute shape
comp_lens[axis] = 1;
shape comp_s{in_s.type(), comp_lens};
visit_all(res_val, args.front())([&](auto out_val, auto input) {
auto* out_ind = res_ind.cast<int64_t>();
par_for(comp_s.elements(), [&](auto i) {
auto idx = comp_s.multi(i);
std::vector<std::size_t> indices(k);
std::iota(indices.begin(), indices.end(), 0);
auto comp = [&](auto i1, auto i2) {
auto idx1 = idx;
auto idx2 = idx;
idx1[axis] = i1;
idx2[axis] = i2;
return this->largest
? std::greater<>{}(input[in_s.index(idx1)], input[in_s.index(idx2)])
: std::less<>{}(input[in_s.index(idx1)], input[in_s.index(idx2)]);
};
auto hp = this->make_heap(indices, comp);
for(std::size_t ii = indices.size(); ii < axis_dim; ++ii)
{
hp.try_push(ii);
}
auto sorted_indices = hp.sort();
auto out_idx = idx;
auto in_idx = idx;
for(auto j : range(sorted_indices.size()))
{
out_idx[axis] = j;
in_idx[axis] = sorted_indices[j];
out_val[out_s.index(out_idx)] = input[in_s.index(in_idx)];
out_ind[out_s.index(out_idx)] = sorted_indices[j];
}
});
});
return {{res_val, res_ind}};
}
};
} // namespace op
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
......@@ -21,7 +21,7 @@ struct transpose
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return pack(f(self.dims, "dims"));
return pack(f(self.dims, "permutation"));
}
std::string name() const { return "transpose"; }
......@@ -32,31 +32,23 @@ struct transpose
auto input_lens = input.lens();
auto input_strides = input.strides();
auto t = input.type();
auto tuned_dims = dims;
// if not perm provided, reverse the dims
if(tuned_dims.empty())
{
tuned_dims.resize(input_lens.size());
std::iota(tuned_dims.begin(), tuned_dims.end(), 0);
std::reverse(tuned_dims.begin(), tuned_dims.end());
}
if(tuned_dims.size() != input_lens.size())
if(dims.size() != input_lens.size())
{
MIGRAPHX_THROW("Permutation has wrong number of axes");
}
std::vector<int64_t> axes(tuned_dims.size());
std::vector<int64_t> axes(dims.size());
std::iota(axes.begin(), axes.end(), 0);
if(!std::is_permutation(axes.begin(), axes.end(), tuned_dims.begin()))
if(!std::is_permutation(axes.begin(), axes.end(), dims.begin()))
{
MIGRAPHX_THROW("Invalid permutation");
MIGRAPHX_THROW("TRANSPOSE: Invalid permutation");
}
std::vector<size_t> output_lens(input_lens.size());
std::vector<size_t> output_strides(input_lens.size());
for(std::size_t i = 0; i < output_lens.size(); i++)
{
output_lens[i] = input_lens[tuned_dims[i]];
output_strides[i] = input_strides[tuned_dims[i]];
output_lens[i] = input_lens[dims[i]];
output_strides[i] = input_strides[dims[i]];
}
return {t, output_lens, output_strides};
}
......@@ -64,7 +56,6 @@ struct transpose
{
return args[0].reshape(output_shape);
}
lifetime get_lifetime() const { return lifetime::borrow; }
std::ptrdiff_t output_alias(const std::vector<shape>&) const { return 0; }
};
......
......@@ -41,7 +41,11 @@ struct unary : op_name<Derived>
{
check_shapes{inputs, static_cast<const Derived&>(*this)}.has(1);
auto s = inputs.at(0);
if(s.broadcasted())
if(s.scalar())
{
return s;
}
else if(s.broadcasted())
{
return {s.type(), s.lens()};
}
......@@ -60,7 +64,6 @@ struct unary : op_name<Derived>
input.end(),
output.begin(),
static_cast<const Derived&>(*this).apply());
});
});
return result;
......
......@@ -37,11 +37,11 @@ struct unsqueeze
std::string name() const { return "unsqueeze"; }
shape normalize_compute_shape(std::vector<shape> inputs) const
{
check_shapes{inputs, *this}.has(1).standard_or_scalar();
check_shapes{inputs, *this}.has(1);
auto input_shape = inputs[0];
auto type = input_shape.type();
auto old_lens = input_shape.lens();
auto old_strides = input_shape.strides();
if(input_shape.scalar())
{
if(old_lens.size() == 1 and old_lens.front() == 1)
......@@ -53,25 +53,34 @@ struct unsqueeze
std::size_t new_size = old_lens.size() + axes.size();
std::vector<std::size_t> new_lens(new_size);
std::vector<std::size_t> new_strides(new_size);
std::size_t p = 0;
for(std::size_t i = 0; i < new_size; i++)
for(auto i : range(new_size))
{
if(std::find(axes.begin(), axes.end(), i) != axes.end())
{
new_lens[i] = 1;
if(p == 0) // unsqueeze on the first axes
{
new_strides[i] = old_lens[0] * old_strides[0];
}
else // unsqueeze on middle or last axes
{
new_strides[i] = (p < old_strides.size()) ? old_strides[p - 1] : 1;
}
}
else
{
new_lens[i] = old_lens[p++];
new_lens[i] = old_lens[p];
new_strides[i] = old_strides[p++];
}
}
return shape{type, new_lens};
return shape{type, new_lens, new_strides};
}
argument compute(shape output_shape, std::vector<argument> args) const
{
return args[0].reshape(output_shape);
}
lifetime get_lifetime() const { return lifetime::borrow; }
std::ptrdiff_t output_alias(const std::vector<shape>&) const { return 0; }
};
......
#ifndef MIGRAPHX_GUARD_OPERATORS_WHERE_HPP
#define MIGRAPHX_GUARD_OPERATORS_WHERE_HPP
#include <array>
#include <migraphx/argument.hpp>
#include <migraphx/par_for.hpp>
#include <migraphx/check_shapes.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/streamutils.hpp>
#include <migraphx/shape_for_each.hpp>
#include <migraphx/config.hpp>
#include <migraphx/value.hpp>
#include <migraphx/op/normalize_attribute.hpp>
#include <cmath>
#include <utility>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace op {
struct where
{
std::string name() const { return "where"; }
value attributes() const { return {{"pointwise", true}, {"point_op", "${0} ? ${1} : ${2}"}}; }
shape compute_shape(std::vector<shape> inputs) const
{
check_shapes{inputs, *this}.has(3).same_dims();
auto s1 = inputs.at(1);
auto s2 = inputs.at(2);
if(s1 == s2 and s1.packed())
{
return s1;
}
else if(s1.packed() != s2.packed())
{
return s1.packed() ? s1 : s2;
}
else if(s1.broadcasted() != s2.broadcasted())
{
return s1.broadcasted() ? s2.with_lens(s1.lens()) : s1.with_lens(s1.lens());
}
else
{
return {s1.type(), s1.lens()};
}
}
argument compute(const shape& output_shape, std::vector<argument> args) const
{
argument result{output_shape};
visit_all(result, args[1], args[2])([&](auto output, const auto x, const auto y) {
args[0].visit([&](const auto condition) {
par_for(output_shape.elements(),
[&](auto i) { output[i] = condition[i] ? x[i] : y[i]; });
});
});
return result;
}
};
} // namespace op
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
......@@ -103,79 +103,69 @@ auto operator==(const T& x, const U& y) -> decltype(x.name() == y.name())
} // namespace operation_operators
template <class T>
auto normalize_compute_shape_op(rank<1>, const T& x, const std::vector<shape>& inputs)
-> decltype(x.normalize_compute_shape(inputs))
{
dependent_type<operation, T> y = x;
normalize_attributes(y, inputs[0].lens());
return any_cast<T>(y).normalize_compute_shape(inputs);
}
template <class T>
shape normalize_compute_shape_op(rank<0>, const T& x, const std::vector<shape>&)
auto compute_shape_op(rank<3>, const T& x, const std::vector<shape>& inputs)
-> decltype(x.compute_shape(inputs))
{
std::string name = x.name();
MIGRAPHX_THROW("Shape not computable: " + name);
return x.compute_shape(inputs);
}
template <class T>
shape normalize_compute_shape_op(const T& x, const std::vector<shape>& inputs)
auto compute_shape_op(rank<2>, const T& x, const std::vector<shape>& inputs)
-> decltype(x.normalize_compute_shape(inputs))
{
return normalize_compute_shape_op(rank<1>{}, x, inputs);
dependent_type<operation, T> y = x;
normalize_attributes(y, inputs[0].lens());
return any_cast<T>(y).normalize_compute_shape(inputs);
}
template <class T>
auto compute_shape_op(rank<1>,
const T& x,
const std::vector<shape>& inputs,
const std::vector<module_ref>& mod_args)
-> decltype(x.compute_shape(inputs, mod_args))
auto compute_shape_op(rank<1>, const T& x, const std::vector<shape>& inputs)
-> decltype(x.compute_shape(inputs, {}))
{
return x.compute_shape(inputs, mod_args);
return x.compute_shape(inputs, {});
}
template <class T>
shape
compute_shape_op(rank<0>, const T& x, const std::vector<shape>&, const std::vector<module_ref>&)
shape compute_shape_op(rank<0>, const T& x, const std::vector<shape>&)
{
std::string name = x.name();
MIGRAPHX_THROW("Shape not computable: " + name);
}
template <class T>
shape compute_shape_op(const T& x,
const std::vector<shape>& inputs,
const std::vector<module_ref>& mod_args)
shape compute_shape_op(const T& x, const std::vector<shape>& inputs)
{
return compute_shape_op(rank<1>{}, x, inputs, mod_args);
return compute_shape_op(rank<3>{}, x, inputs);
}
template <class T>
auto normalize_compute_shape_op(rank<1>,
const T& x,
const std::vector<shape>& inputs,
std::vector<module_ref>& mod_args)
-> decltype(x.normalize_compute_shape(inputs, mod_args))
auto mod_compute_shape_op(rank<1>,
const T& x,
const std::vector<shape>& inputs,
const std::vector<module_ref>& mod_args)
-> decltype(x.compute_shape(inputs, mod_args))
{
return x.normalize_compute_shape(inputs, mod_args);
return x.compute_shape(inputs, mod_args);
}
template <class T>
shape normalize_compute_shape_op(rank<0>,
const T& x,
const std::vector<shape>&,
const std::vector<module_ref>&)
shape mod_compute_shape_op(rank<0>,
const T& x,
const std::vector<shape>& inputs,
const std::vector<module_ref>& mod_args)
{
if(mod_args.empty())
return compute_shape_op(x, inputs);
std::string name = x.name();
MIGRAPHX_THROW("Shape not computable: " + name);
}
template <class T>
shape normalize_compute_shape_op(const T& x,
const std::vector<shape>& inputs,
std::vector<module_ref>& mod_args)
shape mod_compute_shape_op(const T& x,
const std::vector<shape>& inputs,
const std::vector<module_ref>& mod_args)
{
return normalize_compute_shape_op(rank<1>{}, x, inputs, mod_args);
return mod_compute_shape_op(rank<1>{}, x, inputs, mod_args);
}
template <class T>
......@@ -256,6 +246,18 @@ argument compute_op(const T& x,
return compute_op(rank<1>{}, x, output, inputs, module_args, f);
}
template <class T, class F>
auto compute_op(rank<4>,
const T& x,
context& ctx,
const shape& output,
const std::vector<argument>& inputs,
const std::vector<module_ref>& module_args,
F f) -> decltype(x.compute(auto_any_cast(ctx), output, inputs, module_args, f))
{
return x.compute(auto_any_cast(ctx), output, inputs, module_args, f);
}
template <class T, class F>
auto compute_op(rank<3>,
const T& x,
......@@ -313,7 +315,7 @@ argument compute_op(const T& x,
const std::vector<module_ref>& module_args,
F f)
{
return compute_op(rank<3>{}, x, ctx, output, inputs, module_args, f);
return compute_op(rank<4>{}, x, ctx, output, inputs, module_args, f);
}
template <class T>
......@@ -443,35 +445,62 @@ lifetime get_lifetime_op(const T&)
} // namespace detail
/*
* Type-erased interface for:
*
* struct operation
* {
* std::string name() const;
* bool is_context_free() const;
* bool need_normalization() const;
* bool has_finalize() const;
* lifetime get_lifetime() const;
* std::ptrdiff_t output_alias(const std::vector<shape>& input) const;
* value compile(context& ctx,const shape& output,const std::vector<shape>& input) ;
* void finalize(context& ctx,const shape& output,const std::vector<shape>& input) ;
* shape compute_shape(const std::vector<shape>& input) const;
* shape compute_shape(const std::vector<shape>& inputs,const std::vector<module_ref>&
* mod_args) const; argument compute(context& ctx,const shape& output,const std::vector<argument>&
* input) const; argument compute(const shape& output,const std::vector<argument>& input)
* const; argument compute(const shape& output,const std::vector<argument>& input,const
* std::vector<module_ref>& module_args,std::function<std::vector<argument>(module_ref&, const
* std::unordered_map<std::string, argument>&)> run) const; argument compute(context& ctx,const
* shape& output,const std::vector<argument>& input,const std::vector<module_ref>&
* module_args,std::function<std::vector<argument>(module_ref&, const
* std::unordered_map<std::string, argument>&)> run) const; value to_value() const; void
* from_value(const value& v) ; value attributes() const; friend std::ostream &
* operator<<(std::ostream & os,const operation & op) ; friend bool operator==(const operation &
* x,const operation & y) ;
* };
*
*/
#ifdef TYPE_ERASED_DECLARATION
// Type-erased interface for:
struct operation
{
//
std::string name() const;
// (optional)
bool is_context_free() const;
// (optional)
bool need_normalization() const;
// (optional)
bool has_finalize() const;
// (optional)
lifetime get_lifetime() const;
// (optional)
std::ptrdiff_t output_alias(const std::vector<shape>& input) const;
// (optional)
value compile(context& ctx, const shape& output, const std::vector<shape>& input);
// (optional)
void finalize(context& ctx, const shape& output, const std::vector<shape>& input);
// (optional)
shape compute_shape(const std::vector<shape>& input) const;
// (optional)
shape compute_shape(const std::vector<shape>& inputs,
const std::vector<module_ref>& mod_args) const;
// (optional)
argument compute(context& ctx, const shape& output, const std::vector<argument>& input) const;
// (optional)
argument compute(const shape& output, const std::vector<argument>& input) const;
// (optional)
argument compute(const shape& output,
const std::vector<argument>& input,
const std::vector<module_ref>& module_args,
std::function<std::vector<argument>(
module_ref&, const std::unordered_map<std::string, argument>&)> run) const;
// (optional)
argument compute(context& ctx,
const shape& output,
const std::vector<argument>& input,
const std::vector<module_ref>& module_args,
std::function<std::vector<argument>(
module_ref&, const std::unordered_map<std::string, argument>&)> run) const;
// (optional)
value to_value() const;
// (optional)
void from_value(const value& v);
// (optional)
value attributes() const;
//
friend std::ostream& operator<<(std::ostream& os, const operation& op);
//
friend bool operator==(const operation& x, const operation& y);
};
#else
struct operation
{
......@@ -836,7 +865,7 @@ struct operation
T&& private_detail_te_self,
const std::vector<shape>& input)
{
return detail::normalize_compute_shape_op(private_detail_te_self, input);
return detail::compute_shape_op(private_detail_te_self, input);
}
template <class T>
......@@ -855,7 +884,7 @@ struct operation
const std::vector<shape>& inputs,
const std::vector<module_ref>& mod_args)
{
return detail::compute_shape_op(private_detail_te_self, inputs, mod_args);
return detail::mod_compute_shape_op(private_detail_te_self, inputs, mod_args);
}
template <class T>
......@@ -1220,6 +1249,7 @@ inline const ValueType& any_cast(const operation& x)
throw std::bad_cast();
return *y;
}
#endif
inline bool operator!=(const operation& x, const operation& y) { return !(x == y); }
......@@ -1257,7 +1287,7 @@ template <class T>
inline auto compute_shape(const T& op, const std::vector<shape>& inputs)
-> decltype(op.normalize_compute_shape(inputs))
{
return detail::normalize_compute_shape_op(op, inputs);
return detail::compute_shape_op(op, inputs);
}
inline shape compute_shape(const operation& op,
......@@ -1282,7 +1312,7 @@ inline auto compute_shape(const T& op,
const std::vector<module_ref>& mod_args)
-> decltype(op.normalize_compute_shape(inputs, mod_args))
{
return detail::normalize_compute_shape_op(op, inputs, mod_args);
return detail::compute_shape_op(op, inputs, mod_args);
}
inline bool is_context_free(const operation& op) { return op.is_context_free(); }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment