".github/vscode:/vscode.git/clone" did not exist on "11421a3f44b95f706be0ba10016d0c2863cc0c4a"
Commit 7e297b13 authored by Paul's avatar Paul
Browse files

Merge

parents 86ea5e91 aa7ff911
......@@ -38,7 +38,7 @@ struct gather
shape normalize_compute_shape(std::vector<shape> inputs) const
{
check_shapes{inputs, *this}.has(2).standard();
check_shapes{inputs, *this}.has(2);
auto lens = inputs[0].lens();
auto type = inputs[0].type();
lens.erase(lens.begin() + axis);
......
#ifndef MIGRAPHX_GUARD_OPERATORS_GATHERND_HPP
#define MIGRAPHX_GUARD_OPERATORS_GATHERND_HPP
#include <migraphx/check_shapes.hpp>
#include <migraphx/shape_for_each.hpp>
#include <migraphx/par_for.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace op {
struct gathernd
{
int batch_dims = 0;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return pack(f(self.batch_dims, "batch_dims"));
}
std::string name() const { return "gathernd"; }
shape compute_shape(std::vector<shape> inputs) const
{
check_shapes{inputs, *this}.has(2);
auto r = inputs.front().lens().size();
auto q = inputs.back().lens().size();
auto k = inputs.back().lens().back();
if(k > r - batch_dims)
{
MIGRAPHX_THROW("GATHERND: Indices of length " + std::to_string(k) +
" cannot be used to access data of rank " +
std::to_string(r - batch_dims));
}
auto indices_lens_iter = inputs.back().lens().begin();
auto output_lens_size = q + r - k - batch_dims - 1;
std::vector<std::size_t> output_lens(output_lens_size);
std::copy(indices_lens_iter, indices_lens_iter + (q - 1), output_lens.begin());
if(k < r - batch_dims)
{
auto data_lens = inputs.front().lens();
std::copy(
data_lens.begin() + batch_dims + k, data_lens.end(), output_lens.begin() + q - 1);
}
shape output_shape{inputs.front().type(), output_lens};
return output_shape;
}
argument compute(const shape& output_shape, std::vector<argument> args) const
{
argument result{output_shape};
visit_all(result, args[0])([&](auto output, auto data) {
args[1].visit([&](auto indices) {
auto indices_shape = indices.get_shape();
auto indices_shape_lens = indices_shape.lens();
auto data_shape = data.get_shape();
auto data_shape_lens = data_shape.lens();
auto k = indices_shape.lens().back();
const auto num_slice_dims = k;
std::size_t num_slices = std::accumulate(indices_shape_lens.begin(),
indices_shape_lens.end() - 1,
1,
std::multiplies<std::size_t>());
std::size_t slice_size = std::accumulate(data_shape_lens.begin() + k + batch_dims,
data_shape_lens.end(),
1,
std::multiplies<std::size_t>());
std::size_t num_batches = std::accumulate(data_shape_lens.begin(),
data_shape_lens.begin() + batch_dims,
1,
std::multiplies<std::size_t>());
std::size_t data_batch_stride =
std::accumulate(data_shape_lens.begin() + batch_dims,
data_shape_lens.end(),
1,
std::multiplies<std::size_t>());
auto num_slices_per_batch = num_slices / num_batches;
std::vector<std::size_t> sizes_from_slice_dims(num_slice_dims);
{
auto running_product = slice_size;
for(std::size_t i = 0; i < num_slice_dims; ++i)
{
sizes_from_slice_dims[num_slice_dims - 1 - i] = running_product;
running_product *= data_shape_lens[batch_dims + num_slice_dims - 1 - i];
}
}
std::vector<std::size_t> input_slice_offsets(num_slices);
par_for(num_slices, [&](const auto i) {
std::size_t batch_idx = i / num_slices_per_batch;
auto slice_indices = indices.begin() + (i * num_slice_dims);
std::size_t relative_slice_offset = 0;
for(size_t dim_idx = 0; dim_idx < num_slice_dims; ++dim_idx)
{
int64_t index = *(slice_indices + dim_idx);
const std::size_t input_dim_idx = batch_dims + dim_idx;
const auto input_dim = data_shape_lens[input_dim_idx];
if(index < -static_cast<int64_t>(input_dim) or
index >= static_cast<int64_t>(input_dim))
MIGRAPHX_THROW("GatherND: index " + std::to_string(index) +
" is out of bounds for dim of len " +
std::to_string(input_dim));
if(index < 0)
index += input_dim;
relative_slice_offset += index * sizes_from_slice_dims[dim_idx];
}
input_slice_offsets[i] =
(batch_idx * data_batch_stride) + relative_slice_offset;
});
par_for(num_slices * slice_size, [&](const auto i) {
auto slice_offset = input_slice_offsets[i / slice_size];
output[i] = data[slice_offset + i % slice_size];
});
});
});
return result;
}
};
} // namespace op
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
......@@ -45,6 +45,8 @@ struct get_tuple_elem
assert(index < vec_args.size());
return vec_args.at(index);
}
std::ptrdiff_t output_alias(const std::vector<shape>&) const { return 0; }
};
} // namespace op
......
......@@ -35,7 +35,7 @@ struct if_op
MIGRAPHX_THROW("IF: output shapes of submodules must be the same.");
}
return shape(out_shapes0);
return {out_shapes0};
}
argument compute(const shape&,
......
#ifndef MIGRAPHX_GUARD_OPERATORS_ISNAN_HPP
#define MIGRAPHX_GUARD_OPERATORS_ISNAN_HPP
#include <migraphx/op/unary.hpp>
#include <migraphx/config.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace op {
struct isnan : unary<isnan>
{
auto apply() const
{
return [](auto x) { return std::isnan(x); };
}
std::string name() const { return "isnan"; }
shape compute_shape(std::vector<shape> inputs) const
{
return unary<isnan>::compute_shape(std::move(inputs)).with_type(shape::bool_type);
}
};
} // namespace op
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
......@@ -35,7 +35,7 @@ struct load
{
if((offset + s.bytes()) > args[0].get_shape().bytes())
MIGRAPHX_THROW("Load access is out of bounds");
return argument::load(s, args[0].data() + offset);
return argument{s, args[0].data() + offset};
}
lifetime get_lifetime() const { return lifetime::borrow; }
std::ptrdiff_t output_alias(const std::vector<shape>&) const { return 0; }
......
#ifndef MIGRAPHX_GUARD_OPERATORS_LOOP_HPP
#define MIGRAPHX_GUARD_OPERATORS_LOOP_HPP
#include <migraphx/check_shapes.hpp>
#include <migraphx/argument.hpp>
#include <migraphx/functional.hpp>
#include <migraphx/config.hpp>
#include <migraphx/module.hpp>
#include <migraphx/run_loop.hpp>
#include <migraphx/ranges.hpp>
#include <cmath>
#include <string>
#include <utility>
#include <set>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace op {
struct loop
{
int64_t max_iterations = 10;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return pack(f(self.max_iterations, "max_iterations"));
}
std::string name() const { return "loop"; }
shape compute_shape(const std::vector<shape>& inputs, std::vector<module_ref> mods) const
{
check_shapes{inputs, *this}.standard();
if(mods.size() != 1)
{
MIGRAPHX_THROW("LOOP: operator should have one submodule.");
}
const auto& mod = mods.front();
auto mod_out_shapes = mod->get_output_shapes();
auto dep_param_num = inputs.size() - 2;
// first item of the mod output shapes is condition used in loop,
// which is not needed to compute output shape
mod_out_shapes.erase(mod_out_shapes.begin());
std::vector<shape> ins_out_shapes(mod_out_shapes.begin(),
mod_out_shapes.begin() + dep_param_num);
mod_out_shapes.erase(mod_out_shapes.begin(), mod_out_shapes.begin() + dep_param_num);
for(const auto& out_s : mod_out_shapes)
{
auto lens = out_s.lens();
lens.insert(lens.begin(), max_iterations);
ins_out_shapes.push_back({out_s.type(), lens});
}
return {ins_out_shapes};
}
struct ref_loop
{
int64_t max_iterations = 0;
template <class T>
void copy(context&, const argument& src, T& dst) const
{
dst = *src.cast<T>();
}
template <class T>
void copy(context&, T src, const argument& dst) const
{
*dst.cast<T>() = src;
}
void append(const std::vector<argument>& iter_state,
const std::vector<argument>& concatenated_outputs,
int iter) const
{
assert(iter_state.size() == concatenated_outputs.size());
for(auto i : range(iter_state.size()))
{
const auto& iter_stat = iter_state.at(i);
const auto& scan_out = concatenated_outputs.at(i);
auto* in_data = iter_stat.data();
auto* out_data = scan_out.data();
std::size_t out_size = iter_stat.get_shape().bytes();
assert((iter + 1) * out_size <= scan_out.get_shape().bytes());
std::copy(in_data, in_data + out_size, out_data + iter * out_size);
}
}
void set_zero(context&, const std::vector<argument>& concatenated_outputs, int iter) const
{
if(iter >= max_iterations)
return;
for(const auto& out : concatenated_outputs)
{
auto s = out.get_shape();
auto size = s.bytes() / max_iterations;
std::fill(out.data() + iter * size, out.data() + max_iterations * size, 0);
}
}
std::unordered_map<std::string, int> get_output_params(const module&) const { return {}; }
};
argument compute(context& ctx,
const shape& out_shape,
const std::vector<argument>& args,
const std::vector<module_ref>& mods,
const std::function<std::vector<argument>(
module_ref&, const std::unordered_map<std::string, argument>&)>& run) const
{
// wrap up the arguments vector, so ref and gpu impl are the same
auto cpy_args = args;
bool in_cond = args.at(1).at<bool>();
bool cond = in_cond;
int64_t iter = 0;
// insert iter and cond used in the loop
auto s_cond = args.at(1).get_shape();
auto s_iter = args.at(0).get_shape();
cpy_args.push_back({s_iter, &iter});
cpy_args.push_back({s_cond, &cond});
cpy_args.insert(cpy_args.end(), args.begin() + 2, args.end());
// add cond and mod outputs to the argument list
cpy_args.push_back(argument(s_cond));
cpy_args.push_back(argument(out_shape));
// run loop
return run_loop(ref_loop{max_iterations}, ctx, cpy_args, mods, run);
}
};
} // namespace op
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
......@@ -23,7 +23,7 @@ struct multibroadcast
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return pack(f(self.output_lens, "output_lens"));
return pack(f(self.output_lens, "out_lens"));
}
std::string name() const { return "multibroadcast"; }
......@@ -69,7 +69,6 @@ struct multibroadcast
{
return args[0].reshape(output_shape);
}
lifetime get_lifetime() const { return lifetime::borrow; }
std::ptrdiff_t output_alias(const std::vector<shape>&) const { return 0; }
};
......
#ifndef MIGRAPHX_GUARD_OPERATORS_MULTINOMIAL_HPP
#define MIGRAPHX_GUARD_OPERATORS_MULTINOMIAL_HPP
#include <migraphx/operation.hpp>
#include <migraphx/check_shapes.hpp>
#include <migraphx/par_for.hpp>
#include <random>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace op {
struct multinomial
{
shape::type_t dtype = shape::type_t::int32_type;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return pack(f(self.dtype, "dtype"));
}
std::string name() const { return "multinomial"; }
shape compute_shape(std::vector<shape> inputs) const
{
check_shapes{inputs, *this}.has(2).only_dims(2);
size_t sample_size = inputs.back().lens().back();
if(not contains({shape::int32_type, shape::int64_type}, dtype))
MIGRAPHX_THROW(
"Multinomial: Invalid output type. Valid types are int32_type and int64_type.");
return {dtype, {inputs.front().lens().front(), sample_size}};
}
argument compute(const shape& output_shape, std::vector<argument> args) const
{
argument result{output_shape};
size_t batch_size = output_shape.lens().front();
size_t class_size = args[0].get_shape().lens().back();
size_t sample_size = output_shape.lens().back();
visit_all(args[0], args[1])([&](auto cdf, auto dist) {
result.visit([&](auto output) {
par_for(batch_size * sample_size, [&](auto i) {
auto idx = args[1].get_shape().multi(i);
auto cdf_begin = cdf.begin() + (idx[0] * class_size);
auto cdf_end = cdf_begin + class_size;
auto sample_iter =
std::upper_bound(cdf_begin, cdf_end, dist[i] * *(std::prev(cdf_end)));
output[i] = std::distance(cdf_begin, sample_iter);
});
});
});
return result;
}
};
} // namespace op
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
#ifndef MIGRAPHX_GUARD_OPERATORS_NONMAXSUPPRESSION_HPP
#define MIGRAPHX_GUARD_OPERATORS_NONMAXSUPPRESSION_HPP
#include <cmath>
#include <queue>
#include <cstdint>
#include <iterator>
#include <migraphx/config.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/float_equal.hpp>
#include <migraphx/algorithm.hpp>
#include <migraphx/tensor_view.hpp>
#include <migraphx/shape_for_each.hpp>
#include <migraphx/check_shapes.hpp>
#include <migraphx/output_iterator.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace op {
struct nonmaxsuppression
{
bool center_point_box = false;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return pack(f(self.center_point_box, "center_point_box"));
}
std::string name() const { return "nonmaxsuppression"; }
shape compute_shape(std::vector<shape> inputs) const
{
// requires at least 2 inputs
check_shapes{inputs, *this}.standard();
check_shapes{{inputs.at(0), inputs.at(1)}, *this}.only_dims(3);
auto lens = inputs.front().lens();
// check input shape
if(lens[1] != inputs.at(1).lens()[2])
{
MIGRAPHX_THROW("NonMaxSuppression: dimension mismatch between first and second input!");
}
std::vector<int64_t> out_lens(2);
out_lens.at(0) = lens.at(1);
out_lens.at(1) = 3;
return {shape::int64_type, out_lens};
}
struct box
{
std::array<float, 2> x;
std::array<float, 2> y;
void sort()
{
std::sort(x.begin(), x.end());
std::sort(y.begin(), y.end());
}
std::array<float, 2>& operator[](std::size_t i) { return i == 0 ? x : y; }
float area() const
{
assert(std::is_sorted(x.begin(), x.end()));
assert(std::is_sorted(y.begin(), y.end()));
return (x[1] - x[0]) * (y[1] - y[0]);
}
};
template <class T>
box batch_box(const T* boxes, std::size_t bidx) const
{
box result{};
const T* start = boxes + 4 * bidx;
if(center_point_box)
{
float half_width = start[2] / 2.0f;
float half_height = start[3] / 2.0f;
float x_center = start[0];
float y_center = start[1];
result.x = {x_center - half_width, x_center + half_width};
result.y = {y_center - half_height, y_center + half_height};
}
else
{
result.x = {start[1], start[3]};
result.y = {start[0], start[2]};
}
return result;
}
inline bool suppress_by_iou(box b1, box b2, float iou_threshold) const
{
b1.sort();
b2.sort();
box intersection{};
for(auto i : range(2))
{
intersection[i][0] = std::max(b1[i][0], b2[i][0]);
intersection[i][1] = std::min(b1[i][1], b2[i][1]);
}
std::vector<std::array<float, 2>> bbox = {intersection.x, intersection.y};
if(std::any_of(bbox.begin(), bbox.end(), [](auto bx) {
return not std::is_sorted(bx.begin(), bx.end());
}))
{
return false;
}
const float area1 = b1.area();
const float area2 = b2.area();
const float intersection_area = intersection.area();
const float union_area = area1 + area2 - intersection_area;
if(area1 <= .0f or area2 <= .0f or union_area <= .0f)
{
return false;
}
const float intersection_over_union = intersection_area / union_area;
return intersection_over_union > iou_threshold;
}
argument compute(const shape& output_shape, std::vector<argument> args) const
{
argument result{output_shape};
result.visit([&](auto out) { std::fill(out.begin(), out.end(), 0); });
std::size_t max_output_boxes_per_class = 0;
float iou_threshold = 0.0f;
float score_threshold = 0.0f;
if(args.size() > 2)
{
max_output_boxes_per_class = args.at(2).at<std::size_t>();
}
// max_output_boxes_per_class is 0, no output
if(max_output_boxes_per_class == 0)
{
return result;
}
if(args.size() > 3)
{
iou_threshold = args.at(3).at<float>();
}
if(args.size() > 4)
{
score_threshold = args.at(4).at<float>();
}
const auto& lens = args.at(1).get_shape().lens();
auto batch_num = lens[0];
auto class_num = lens[1];
auto box_num = args.at(0).get_shape().lens()[1];
std::vector<std::pair<float, int64_t>> selected_boxes_inside_class;
std::vector<int64_t> selected_indices;
selected_boxes_inside_class.reserve(output_shape.elements());
auto scores = make_view<float>(args.at(1).get_shape(), args.at(1).cast<float>());
const float* boxes = args.at(0).cast<float>();
shape comp_s{shape::float_type, {batch_num, class_num}};
shape_for_each(comp_s, [&](auto idx) {
auto bidx = idx[0];
auto cidx = idx[1];
std::size_t score_offset = (bidx * class_num + cidx) * box_num;
const float* batch_boxes = boxes + bidx * box_num * 4;
std::priority_queue<std::pair<float, int64_t>> sorted_boxes;
auto insert_to_sorted_boxes =
make_function_output_iterator([&](const auto& x) { sorted_boxes.push(x); });
int64_t box_idx = 0;
transform_if(
scores.begin() + score_offset,
scores.begin() + score_offset + box_num,
insert_to_sorted_boxes,
[&](auto sc) {
box_idx++;
return sc >= score_threshold;
},
[&](auto sc) { return std::make_pair(sc, box_idx - 1); });
selected_boxes_inside_class.clear();
// Get the next box with top score, filter by iou_threshold
while(!sorted_boxes.empty() &&
selected_boxes_inside_class.size() < max_output_boxes_per_class)
{
const std::pair<float, int64_t>& next_top_score = sorted_boxes.top();
// Check with existing selected boxes for this class, suppress if exceed the IOU
// (Intersection Over Union) threshold
bool not_selected = std::any_of(
selected_boxes_inside_class.begin(),
selected_boxes_inside_class.end(),
[&](auto selected_index) {
return this->suppress_by_iou(batch_box(batch_boxes, next_top_score.second),
batch_box(batch_boxes, selected_index.second),
iou_threshold);
});
if(not not_selected)
{
selected_boxes_inside_class.push_back(next_top_score);
selected_indices.push_back(bidx);
selected_indices.push_back(cidx);
selected_indices.push_back(next_top_score.second);
}
sorted_boxes.pop();
}
});
result.visit([&](auto out) {
std::copy(selected_indices.begin(), selected_indices.end(), out.begin());
});
return result;
}
};
} // namespace op
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
#ifndef MIGRAPHX_GUARD_OPERATORS_NONZERO_HPP
#define MIGRAPHX_GUARD_OPERATORS_NONZERO_HPP
#include <migraphx/shape_for_each.hpp>
#include <migraphx/check_shapes.hpp>
#include <migraphx/config.hpp>
#include <migraphx/float_equal.hpp>
#include <migraphx/par_for.hpp>
#include <cmath>
#include <utility>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace op {
struct nonzero
{
std::string name() const { return "nonzero"; }
shape compute_shape(std::vector<shape> inputs) const
{
check_shapes{inputs, *this}.has(1).standard();
auto elem_num = inputs[0].elements();
auto dim_num = inputs[0].lens().size();
std::vector<std::size_t> out_lens = {dim_num, elem_num};
return {shape::int64_type, out_lens};
}
argument compute(const shape& output_shape, std::vector<argument> args) const
{
std::vector<std::vector<std::size_t>> vec_idx;
auto s = args.front().get_shape();
args.front().visit([&](auto v) {
shape_for_each(s, [&](auto idx) {
if(not float_equal(v[s.index(idx)], 0))
{
vec_idx.push_back(idx);
}
});
});
argument result{output_shape};
result.visit([&](auto output) {
std::fill(output.begin(), output.end(), 0);
par_for(vec_idx.size(), [&](auto i) {
for(std::size_t j = 0; j < vec_idx.front().size(); ++j)
{
output[output_shape.index({j, i})] = vec_idx[i][j];
}
});
});
return result;
}
};
} // namespace op
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
#ifndef MIGRAPHX_GUARD_OP_POINTWISE_HPP
#define MIGRAPHX_GUARD_OP_POINTWISE_HPP
#include <migraphx/config.hpp>
#include <migraphx/check_shapes.hpp>
#include <migraphx/module.hpp>
#include <migraphx/permutation.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/par_for.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace op {
struct pointwise
{
std::string name() const { return "pointwise"; }
shape compute_shape(const std::vector<shape>& inputs, std::vector<module_ref> mods) const
{
if(mods.size() != 1)
{
MIGRAPHX_THROW("should have one submodule.");
}
auto* pm = mods.front();
auto pnames = pm->get_parameter_names();
std::sort(pnames.begin(), pnames.end());
check_shapes{inputs, *this}.has(pnames.size()).same_dims();
if(pm->get_output_shapes().size() != 1)
MIGRAPHX_THROW("submodule should have only one output.");
auto type = pm->get_output_shapes().front().type();
// Scalar output if all inputs are scalar
if(inputs.front().elements() == 1 and
all_of(inputs, [](const auto& s) { return s.scalar(); }))
return shape{type};
return shape::from_permutation(type, inputs.front().lens(), find_permutation(inputs));
}
argument compute(const shape& output_shape,
const std::vector<argument>& args,
const std::vector<module_ref>& mods,
const std::function<std::vector<argument>(
module_ref&, const std::unordered_map<std::string, argument>&)>& run) const
{
argument output{output_shape};
auto* pm = mods.front();
auto pnames = pm->get_parameter_names();
std::sort(pnames.begin(), pnames.end());
par_for(output_shape.elements(), [&](auto i) {
std::unordered_map<std::string, argument> params;
std::transform(
pnames.begin(),
pnames.end(),
args.begin(),
std::inserter(params, params.end()),
[&](auto&& name, auto&& arg) { return std::make_pair(name, arg.element(i)); });
auto results = run(pm, params);
assert(results.size() == 1);
visit_all(output, results.front())([&](auto out, auto x) { out[i] = x.front(); });
});
return output;
}
};
} // namespace op
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif // MIGRAPHX_GUARD_OP_POINTWISE_HPP
......@@ -8,6 +8,7 @@
#include <migraphx/streamutils.hpp>
#include <migraphx/functional.hpp>
#include <migraphx/literal.hpp>
#include <migraphx/par_for.hpp>
#include <migraphx/value.hpp>
#include <migraphx/shape_for_each.hpp>
#include <migraphx/int_divide.hpp>
......@@ -16,16 +17,18 @@
#include <utility>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace op {
struct pooling
{
std::string mode = "average";
pooling_mode mode = {pooling_mode::average};
std::vector<std::size_t> padding = {0, 0};
std::vector<std::size_t> stride = {1, 1};
std::vector<std::size_t> lengths = {1, 1};
bool ceil_mode = false;
int lp_order = 2;
template <class Self, class F>
static auto reflect(Self& self, F f)
......@@ -34,7 +37,8 @@ struct pooling
f(self.padding, "padding"),
f(self.stride, "stride"),
f(self.lengths, "lengths"),
f(self.ceil_mode, "ceil_mode"));
f(self.ceil_mode, "ceil_mode"),
f(self.lp_order, "lp_order"));
}
std::string name() const { return "pooling"; }
......@@ -88,6 +92,114 @@ struct pooling
check_attribute_size();
return stride.size();
}
struct lpnorm_pool
{
int p = 0;
lpnorm_pool() = delete;
explicit lpnorm_pool(int x) : p{x} {};
template <class T>
double init() const
{
return 0.0;
}
double operator()(double x, double y) const { return x + std::pow(std::abs(y), p); }
double final(double x, std::size_t) const { return std::pow(x, 1. / p); }
};
struct avg_pool
{
template <class T>
double init() const
{
return 0.0;
}
double operator()(double x, double y) const { return x + y; }
double final(double x, std::size_t y) const { return (y == 0) ? 0.0 : (x / y); }
};
struct max_pool
{
template <class T>
T init() const
{
return std::numeric_limits<T>::lowest();
}
double operator()(double x, double y) const { return std::max(x, y); }
double final(double x, std::size_t) const { return (x); }
};
template <class Type, class Out, class In, class Op>
void calc_pooling(const shape& output_shape, Out& output, const In& input, Op op) const
{
auto in_s = input.get_shape();
auto in_lens = in_s.lens();
par_for(output_shape.elements(), [&](auto i) {
auto idx_o = output_shape.multi(i);
auto n_dim = idx_o.size();
std::vector<std::size_t> win_start;
std::vector<std::size_t> win_size;
for(std::size_t dim = 2; dim < n_dim; ++dim)
{
auto d_2 = dim - 2;
int start =
static_cast<int>(idx_o[dim] * stride[d_2]) - static_cast<int>(padding[d_2]);
int end = std::min(start + lengths[d_2], in_lens[dim]);
start = std::max(start, 0);
win_start.push_back(start);
win_size.push_back(end - start);
}
shape win_shape{output_shape.type(), win_size};
auto pool_size = win_shape.elements();
double output_val = op.template init<Type>();
shape_for_each(win_shape, [&](auto idx_w) {
auto idx = idx_o;
std::transform(idx_w.begin(),
idx_w.end(),
win_start.begin(),
idx.begin() + 2,
[](auto ii, auto jj) { return ii + jj; });
if(std::all_of(idx.begin() + 2, idx.end(), [&](auto ii) { return ii >= 0; }) and
idx < in_lens)
{
output_val = op(output_val, input[in_s.index(idx)]);
}
});
output[i] = Type(op.final(output_val, pool_size));
});
}
argument compute(const shape& output_shape, std::vector<argument> args) const
{
argument result{output_shape};
visit_all(result, args[0])([&](auto output, auto input) {
using type = typename decltype(output)::value_type;
switch(mode)
{
case migraphx::op::pooling_mode::average:
calc_pooling<type>(output_shape, output, input, avg_pool{});
break;
case migraphx::op::pooling_mode::max:
calc_pooling<type>(output_shape, output, input, max_pool{});
break;
case migraphx::op::pooling_mode::lpnorm:
calc_pooling<type>(output_shape, output, input, lpnorm_pool{lp_order});
break;
}
});
return result;
}
};
} // namespace op
......
......@@ -38,18 +38,38 @@ struct prefix_scan_op : op_name<Derived>
shape normalize_compute_shape(std::vector<shape> inputs) const
{
check_shapes{inputs, *this}.has(1);
return inputs.at(0);
auto s = inputs.front();
if(s.broadcasted())
{
return {s.type(), s.lens()};
}
else
{
return s.with_lens(s.lens());
}
}
argument compute(const shape&, std::vector<argument> args) const
argument compute(const shape& output_shape, std::vector<argument> args) const
{
argument result = args[0].copy();
auto s = result.get_shape();
auto slice = shape{s.type(), {s.lens()[axis]}, {s.strides()[axis]}};
auto lens = s.lens();
lens[axis] = 1;
auto batch = shape{s.type(), lens, s.strides()};
auto& self = static_cast<const Derived&>(*this);
argument result{output_shape};
auto s = args[0].get_shape();
if(s == output_shape)
{
result = args[0].copy();
}
else
{
visit_all(result, args[0])([&](auto output, auto input) {
par_for(output_shape.elements(),
[&](auto i) { output[output_shape.index(i)] = input[s.index(i)]; });
});
s = output_shape;
}
auto slice = shape{s.type(), {s.lens()[axis]}, {s.strides()[axis]}};
auto lens = s.lens();
lens[axis] = 1;
auto batch = shape{s.type(), lens, s.strides()};
auto& self = static_cast<const Derived&>(*this);
result.visit([&](auto output) {
using type = decltype(output);
par_for(batch.elements(), [&](auto i) {
......
......@@ -9,6 +9,7 @@ namespace op {
struct prelu : binary<prelu>
{
std::string point_op() const { return "(${0} < 0) ? (${0} * ${1}) : ${0}"; }
auto apply() const
{
return [](auto x, auto slope) { return ((x < 0) ? (x * slope) : x); };
......
......@@ -18,21 +18,12 @@ namespace op {
struct quant_dot
{
int32_t alpha = 1;
int32_t beta = 1;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return pack(f(self.alpha, "alpha"), f(self.beta, "beta"));
}
value attributes() const { return {{"general_data_type", "dot"}}; }
std::string name() const { return "quant_dot"; }
shape compute_shape(std::vector<shape> inputs) const
{
check_shapes{{inputs.at(0), inputs.at(1)}, *this}.same_type();
check_shapes{{inputs.at(0), inputs.at(1)}, *this}.same_type().has(2);
const shape& a = inputs.at(0);
const shape& b = inputs.at(1);
auto t = a.type();
......@@ -64,18 +55,6 @@ struct quant_dot
auto out_lens = a.lens();
out_lens[dim_1] = b.lens()[dim_1];
if(inputs.size() == 3 && out_lens != inputs.at(2).lens())
{
MIGRAPHX_THROW("QUANT_DOT: dimension mismatch, operand C: {" +
to_string_range(inputs.at(2).lens()) +
"}, cannot add to operand A * B: {" + to_string_range(out_lens) + "}");
}
if(inputs.size() == 3 && inputs.at(2).type() != shape::int32_type)
{
MIGRAPHX_THROW("QUANT_DOT: operand C type must be int32");
}
return {shape::int32_type, out_lens};
}
};
......
......@@ -25,6 +25,7 @@ struct quantizelinear
std::string name() const { return "quantizelinear"; }
shape compute_shape(std::vector<shape> inputs) const
{
check_shapes{inputs, *this}.same_dims();
if(inputs.size() == 3)
{
return {inputs[2].type(), inputs[0].lens(), inputs[0].strides()};
......@@ -36,7 +37,7 @@ struct quantizelinear
{
auto x = args.at(0);
auto y_scale = args.at(1);
std::vector<int8_t> zeros(output_shape.elements(), 0);
std::vector<int8_t> zeros(output_shape.bytes(), 0);
argument y_zero_point{output_shape, zeros.data()};
if(args.size() == 3)
{
......
......@@ -9,6 +9,7 @@ namespace op {
struct recip : unary<recip>
{
std::string point_op() const { return "1 / ${0}"; }
auto apply() const
{
return [](auto x) { return 1 / x; };
......
......@@ -9,6 +9,7 @@
#include <migraphx/shape_for_each.hpp>
#include <migraphx/config.hpp>
#include <migraphx/lifetime.hpp>
#include <migraphx/value.hpp>
#include <cmath>
#include <utility>
......@@ -26,6 +27,8 @@ struct reshape
return pack(f(self.dims, "dims"));
}
value attributes() const { return {{"require_std_shape", true}}; }
std::string name() const { return "reshape"; }
shape compute_shape(std::vector<shape> inputs) const
{
......@@ -72,7 +75,6 @@ struct reshape
return args[0].reshape(output_shape);
}
lifetime get_lifetime() const { return lifetime::borrow; }
std::ptrdiff_t output_alias(const std::vector<shape>&) const { return 0; }
};
......
......@@ -37,7 +37,7 @@ struct rnn_var_sl_shift_output
argument compute(const shape& output_shape, std::vector<argument> args) const
{
argument result{output_shape};
int64_t max_len = static_cast<int64_t>(output_shape.lens()[0]);
int64_t max_len = output_shape.lens()[0];
visit_all(result, args[0])([&](auto output, auto input) {
using value_type = typename decltype(output)::value_type;
args[1].visit([&](auto seq_lens) {
......@@ -76,7 +76,7 @@ struct rnn_var_sl_shift_sequence
argument compute(const shape& output_shape, std::vector<argument> args) const
{
argument result{output_shape};
int64_t max_len = static_cast<int64_t>(output_shape.lens()[0]);
int64_t max_len = output_shape.lens()[0];
visit_all(result, args[0])([&](auto output, auto input) {
using value_type = typename decltype(output)::value_type;
args[1].visit([&](auto seq_lens) {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment