Commit 307c40cd authored by Shucai Xiao's avatar Shucai Xiao Committed by mvermeulen
Browse files

Refactor reduce ops (#350)

* first version of refactoring reduce operators.

* clang format

* refactor the gpu implemantation of the reduce_mean operator

* clang format

* refactor gpu implementation of the resuce_sum operator

* fix cpp check error

* fix cppcheck error

* fix cppcheck error

* fix review comments

* clang format

* fix a jenkin error

* fixed review comments

* clang format

* fix review comments

* clang format

* fix review comments

* clang format
parent 87528938
#ifndef MIGRAPHX_GUARD_OPERATORS_MEAN_HPP
#define MIGRAPHX_GUARD_OPERATORS_MEAN_HPP
#include <migraphx/check_shapes.hpp>
#include <migraphx/argument.hpp>
#include <migraphx/shape_for_each.hpp>
#include <migraphx/par_for.hpp>
#include <migraphx/config.hpp>
#include <vector>
#include <migraphx/op/reduce_op.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace op {
struct reduce_mean
struct reduce_mean : reduce_op<reduce_mean>
{
std::vector<std::int64_t> axes{};
reduce_mean() {}
reduce_mean(std::vector<int64_t> ax) : reduce_op(std::move(ax)) {}
template <class Self, class F>
static auto reflect(Self& self, F f)
auto op() const
{
return pack(f(self.axes, "axes"));
return [=](auto x, auto y) { return x + y; };
}
std::string name() const { return "reduce_mean"; }
std::vector<int64_t> tune_axes(std::size_t n_dim) const
{
auto tuned_axes = axes;
if(tuned_axes.empty())
{
tuned_axes.resize(n_dim);
std::iota(tuned_axes.begin(), tuned_axes.end(), 0);
}
else
{
for(auto& axis : tuned_axes)
{
int64_t s_dim = static_cast<int64_t>(n_dim);
if(axis >= s_dim or axis < -s_dim)
{
MIGRAPHX_THROW("REDUCE_MEAN: axis out of range");
}
if(axis < 0)
{
axis += n_dim;
}
}
}
return tuned_axes;
}
shape compute_shape(std::vector<shape> inputs) const
{
check_shapes{inputs, *this}.has(1);
auto s = inputs.at(0);
auto lens = s.lens();
auto tuned_axes = tune_axes(lens.size());
for(auto axis : tuned_axes)
{
lens[axis] = 1;
}
return {s.type(), lens};
}
template <class T>
void calc_mean(tensor_view<T>& input,
shape& batch_shape,
std::vector<int64_t>& tuned_axes,
std::vector<std::size_t>& out_idx,
tensor_view<T>& output) const
auto output(const shape& s) const
{
auto data_idx = out_idx;
T val = T{0};
shape_for_each(batch_shape, [&](auto b_idx) {
for(auto axis : tuned_axes)
{
data_idx[axis] = b_idx[axis];
}
val += input(data_idx.begin(), data_idx.end());
});
output(out_idx.begin(), out_idx.end()) = val / batch_shape.elements();
}
argument compute(const shape& output_shape, std::vector<argument> args) const
{
argument result{output_shape};
auto arg_lens = args.front().get_shape().lens();
auto tuned_axes = tune_axes(arg_lens.size());
std::vector<std::size_t> batch_lens(output_shape.lens().size(), 1);
for(auto axis : tuned_axes)
{
batch_lens[axis] = arg_lens[axis];
}
shape batch_shape{output_shape.type(), batch_lens};
visit_all(result, args[0])([&](auto output, auto input) {
par_for(output_shape.elements(), [&](auto i) {
auto out_idx = output_shape.multi(i);
this->calc_mean(input, batch_shape, tuned_axes, out_idx, output);
});
});
return result;
return [&](auto val) { return val / s.elements(); };
}
};
......
#ifndef MIGRAPHX_GUARD_OPERATORS_OP_HPP
#define MIGRAPHX_GUARD_OPERATORS_OP_HPP
#include <migraphx/op/name.hpp>
#include <migraphx/check_shapes.hpp>
#include <migraphx/argument.hpp>
#include <migraphx/shape_for_each.hpp>
#include <migraphx/par_for.hpp>
#include <migraphx/config.hpp>
#include <vector>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace op {
struct lowest
{
template <class T>
operator T() const
{
return std::numeric_limits<T>::lowest();
}
};
struct highest
{
template <class T>
operator T() const
{
return std::numeric_limits<T>::max();
}
};
struct zero
{
template <class T>
operator T() const
{
return T{0};
}
};
template <class Derived>
struct reduce_op : op_name<Derived>
{
std::vector<std::int64_t> axes{};
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return pack(f(self.axes, "axes"));
}
std::vector<int64_t> tune_axes(std::size_t n_dim) const
{
auto tuned_axes = axes;
if(tuned_axes.empty())
{
tuned_axes.resize(n_dim);
std::iota(tuned_axes.begin(), tuned_axes.end(), 0);
}
else
{
for(auto& axis : tuned_axes)
{
int64_t s_dim = static_cast<int64_t>(n_dim);
if(axis >= s_dim or axis < -s_dim)
{
MIGRAPHX_THROW("REDUCE_OP: axis out of range");
}
if(axis < 0)
{
axis += n_dim;
}
}
}
return tuned_axes;
}
shape compute_shape(std::vector<shape> inputs) const
{
check_shapes{inputs, *this}.has(1);
auto s = inputs.at(0);
auto lens = s.lens();
auto tuned_axes = tune_axes(lens.size());
for(auto axis : tuned_axes)
{
lens[axis] = 1;
}
return {s.type(), lens};
}
template <class T>
void tune_dims(const std::vector<int64_t>& tuned_axes,
const std::vector<T>& in_lens,
std::vector<T>& out_lens) const
{
for(auto axis : tuned_axes)
{
out_lens[axis] = in_lens[axis];
}
}
template <class T>
void reduce(tensor_view<T>& input,
shape& batch_shape,
std::vector<int64_t>& tuned_axes,
std::vector<std::size_t>& out_idx,
tensor_view<T>& output) const
{
auto data_idx = out_idx;
T val = static_cast<const Derived&>(*this).init();
shape_for_each(batch_shape, [&](auto b_idx) {
this->tune_dims(tuned_axes, b_idx, data_idx);
val = static_cast<const Derived&>(*this).op()(
static_cast<const Derived&>(*this).input()(input(data_idx.begin(), data_idx.end())),
val);
});
output(out_idx.begin(), out_idx.end()) =
static_cast<const Derived&>(*this).output(batch_shape)(val);
}
argument compute(const shape& output_shape, std::vector<argument> args) const
{
argument result{output_shape};
auto arg_lens = args.front().get_shape().lens();
auto tuned_axes = tune_axes(arg_lens.size());
std::vector<std::size_t> batch_lens(output_shape.lens().size(), 1);
tune_dims(tuned_axes, arg_lens, batch_lens);
shape batch_shape{output_shape.type(), batch_lens};
visit_all(result, args[0])([&](auto output, auto input) {
par_for(output_shape.elements(), [&](auto i) {
auto out_idx = output_shape.multi(i);
this->reduce(input, batch_shape, tuned_axes, out_idx, output);
});
});
return result;
}
auto init() const { return zero(); }
auto input() const
{
return [&](auto val) { return val; };
}
auto output(const shape&) const
{
return [&](auto val) { return val; };
}
reduce_op() {}
reduce_op(std::vector<int64_t> ax) : axes(std::move(ax)) {}
};
} // namespace op
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
#ifndef MIGRAPHX_GUARD_OPERATORS_SUM_HPP
#define MIGRAPHX_GUARD_OPERATORS_SUM_HPP
#include <migraphx/check_shapes.hpp>
#include <migraphx/argument.hpp>
#include <migraphx/shape_for_each.hpp>
#include <migraphx/par_for.hpp>
#include <migraphx/config.hpp>
#include <vector>
#include <migraphx/op/reduce_op.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace op {
struct reduce_sum
struct reduce_sum : reduce_op<reduce_sum>
{
std::vector<int64_t> axes{};
reduce_sum() {}
reduce_sum(std::vector<int64_t> ax) : reduce_op(std::move(ax)) {}
template <class Self, class F>
static auto reflect(Self& self, F f)
auto op() const
{
return pack(f(self.axes, "axes"));
}
std::string name() const { return "reduce_sum"; }
std::vector<int64_t> tune_axes(std::size_t n_dim) const
{
auto tuned_axes = axes;
if(tuned_axes.empty())
{
tuned_axes.resize(n_dim);
std::iota(tuned_axes.begin(), tuned_axes.end(), 0);
}
else
{
for(auto& axis : tuned_axes)
{
int64_t s_dim = static_cast<int64_t>(n_dim);
if(axis >= s_dim or axis < -s_dim)
{
MIGRAPHX_THROW("REDUCE_MEAN: axis out of range");
}
if(axis < 0)
{
axis += n_dim;
}
}
}
return tuned_axes;
}
shape compute_shape(std::vector<shape> inputs) const
{
check_shapes{inputs, *this}.has(1);
auto s = inputs.at(0);
auto lens = s.lens();
auto tuned_axes = tune_axes(lens.size());
for(auto axis : tuned_axes)
{
lens[axis] = 1;
}
return {s.type(), lens};
}
template <class T>
void calc_sum(tensor_view<T>& input,
shape& batch_shape,
std::vector<int64_t>& tuned_axes,
std::vector<std::size_t>& out_idx,
tensor_view<T>& output) const
{
auto data_idx = out_idx;
T val = T{0};
shape_for_each(batch_shape, [&](auto b_idx) {
for(auto axis : tuned_axes)
{
data_idx[axis] = b_idx[axis];
}
val += input(data_idx.begin(), data_idx.end());
});
output(out_idx.begin(), out_idx.end()) = val;
}
argument compute(const shape& output_shape, std::vector<argument> args) const
{
argument result{output_shape};
auto arg_lens = args.front().get_shape().lens();
std::vector<int64_t> tuned_axes = tune_axes(arg_lens.size());
std::vector<std::size_t> batch_lens(output_shape.lens().size(), 1);
for(auto axis : tuned_axes)
{
batch_lens[axis] = arg_lens[axis];
}
shape batch_shape{output_shape.type(), batch_lens};
visit_all(result, args[0])([&](auto output, auto input) {
par_for(output_shape.elements(), [&](auto i) {
auto out_idx = output_shape.multi(i);
this->calc_sum(input, batch_shape, tuned_axes, out_idx, output);
});
});
return result;
return [=](auto x, auto y) { return x + y; };
}
};
......
......@@ -91,8 +91,6 @@ add_library(migraphx_gpu
adjust_allocation.cpp
pack_int8_args.cpp
clip.cpp
reduce_sum.cpp
reduce_mean.cpp
int8_gemm_pack.cpp
int8_conv_pack.cpp
)
......
#ifndef MIGRAPHX_GUARD_RTGLIB_OP_NAME_HPP
#define MIGRAPHX_GUARD_RTGLIB_OP_NAME_HPP
#include <migraphx/config.hpp>
#include <migraphx/type_name.hpp>
#include <utility>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
template <class Derived>
struct oper
{
// function to extract the name part of an operator. For example, we have
// a operation "sin", then the get_type_name() will return
// "migraphx::version_1::gpu::hip_sin", this functin will return the name
// "gpu::sin" as the operator name
std::string name() const
{
const std::string& name = get_type_name<Derived>();
// search the namespace gpu (::gpu::)
auto pos_ns = name.find("::gpu::");
if(pos_ns != std::string::npos)
{
auto pos_name = name.find("hip_", pos_ns + std::string("::gpu::").length());
if(pos_name != std::string::npos)
{
return std::string("gpu::") + name.substr(pos_name + 4);
}
else
{
return name.substr(pos_ns + 2);
}
}
return "unknown_operator_name";
}
};
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
#ifndef MIGRAPHX_GUARD_RTGLIB_UNARY_HPP
#define MIGRAPHX_GUARD_RTGLIB_UNARY_HPP
#include <migraphx/gpu/name.hpp>
#include <migraphx/gpu/hip.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/shape.hpp>
......@@ -14,31 +15,6 @@ namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
template <class Derived>
struct oper
{
std::string name() const
{
const std::string& name = get_type_name<Derived>();
// search the namespace gpu (::gpu::)
auto pos_ns = name.find("::gpu::");
if(pos_ns != std::string::npos)
{
auto pos_name = name.find("hip_", pos_ns + std::string("::gpu::").length());
if(pos_name != std::string::npos)
{
return std::string("gpu::") + name.substr(pos_name + 4);
}
else
{
return name.substr(pos_ns + 2);
}
}
return "unknown";
}
};
template <class Derived, void (*F)(hipStream_t, const argument&, const argument&)>
struct unary_device : oper<Derived>
{
......
#ifndef MIGRAPHX_GUARD_RTGLIB_REDUCE_MEAN_HPP
#define MIGRAPHX_GUARD_RTGLIB_REDUCE_MEAN_HPP
#include <migraphx/shape.hpp>
#include <migraphx/op/reduce_mean.hpp>
#include <migraphx/reflect.hpp>
#include <migraphx/gpu/reduce_op.hpp>
#include <migraphx/gpu/device/reduce_mean.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
......@@ -11,24 +11,10 @@ namespace gpu {
struct context;
struct hip_reduce_mean
struct hip_reduce_mean : reduce_op<hip_reduce_mean, op::reduce_mean, device::reduce_mean>
{
op::reduce_mean op;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return migraphx::reflect(self.op, f);
}
std::string name() const { return "gpu::reduce_mean"; }
shape compute_shape(std::vector<shape> inputs) const;
argument
compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
{
return shapes.size() - 1;
}
hip_reduce_mean() {}
hip_reduce_mean(const op::reduce_mean& op_ref) : reduce_op(op_ref) {}
};
} // namespace gpu
......
#ifndef MIGRAPHX_GUARD_RTGLIB_REDUCE_OP_HPP
#define MIGRAPHX_GUARD_RTGLIB_REDUCE_OP_HPP
#include <migraphx/gpu/name.hpp>
#include <migraphx/gpu/hip.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/shape.hpp>
#include <migraphx/argument.hpp>
#include <migraphx/config.hpp>
#include <migraphx/type_name.hpp>
#include <utility>
#include <iostream>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
struct context;
template <class Derived, class Op, void (*F)(hipStream_t, const argument&, const argument&)>
struct reduce_op : oper<Derived>
{
Op op;
template <class Self, class T>
static auto reflect(Self& self, T f)
{
return migraphx::reflect(self.op, f);
}
shape compute_shape(const std::vector<shape>& inputs) const
{
std::vector<shape> in_shapes{inputs};
in_shapes.pop_back();
return op.compute_shape(in_shapes);
}
argument compute(context& ctx, const shape&, const std::vector<argument>& args) const
{
F(ctx.get_stream().get(), args[1], args[0]);
return args[1];
}
std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
{
return shapes.size() - 1;
}
reduce_op() {}
reduce_op(const Op& op_ref) : op(op_ref) {}
};
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
#ifndef MIGRAPHX_GUARD_RTGLIB_REDUCE_SUM_HPP
#define MIGRAPHX_GUARD_RTGLIB_REDUCE_SUM_HPP
#include <migraphx/shape.hpp>
#include <migraphx/op/reduce_sum.hpp>
#include <migraphx/reflect.hpp>
#include <migraphx/gpu/reduce_op.hpp>
#include <migraphx/gpu/device/reduce_sum.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
......@@ -11,24 +11,10 @@ namespace gpu {
struct context;
struct hip_reduce_sum
struct hip_reduce_sum : reduce_op<hip_reduce_sum, op::reduce_sum, device::reduce_sum>
{
op::reduce_sum op;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return migraphx::reflect(self.op, f);
}
std::string name() const { return "gpu::reduce_sum"; }
shape compute_shape(std::vector<shape> inputs) const;
argument
compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
{
return shapes.size() - 1;
}
hip_reduce_sum() {}
hip_reduce_sum(const op::reduce_sum& op_ref) : reduce_op(op_ref) {}
};
} // namespace gpu
......
#include <migraphx/gpu/reduce_mean.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/device/reduce_mean.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
shape hip_reduce_mean::compute_shape(std::vector<shape> inputs) const
{
inputs.pop_back();
return op.compute_shape(inputs);
}
argument
hip_reduce_mean::compute(context& ctx, const shape&, const std::vector<argument>& args) const
{
device::reduce_mean(ctx.get_stream().get(), args.back(), args.front());
return args.back();
}
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#include <migraphx/gpu/reduce_sum.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/device/reduce_sum.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
shape hip_reduce_sum::compute_shape(std::vector<shape> inputs) const
{
inputs.pop_back();
return op.compute_shape(inputs);
}
argument
hip_reduce_sum::compute(context& ctx, const shape&, const std::vector<argument>& args) const
{
device::reduce_sum(ctx.get_stream().get(), args.back(), args.front());
return args.back();
}
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment