Refactor reduce ops (#350)

* first version of refactoring reduce operators. * clang format * refactor the gpu implemantation of the reduce_mean operator * clang format * refactor gpu implementation of the resuce_sum operator * fix cpp check error * fix cppcheck error * fix cppcheck error * fix review comments * clang format * fix a jenkin error * fixed review comments * clang format * fix review comments * clang format * fix review comments * clang format

Refactor reduce ops (#350)
* first version of refactoring reduce operators. * clang format * refactor the gpu implemantation of the reduce_mean operator * clang format * refactor gpu implementation of the resuce_sum operator * fix cpp check error * fix cppcheck error * fix cppcheck error * fix review comments * clang format * fix a jenkin error * fixed review comments * clang format * fix review comments * clang format * fix review comments * clang format
307c40cd · Shucai Xiao · mvermeulen · 87528938 · 307c40cd · 307c40cd
Commit 307c40cd authored Sep 16, 2019 by Shucai Xiao Committed by mvermeulen Sep 16, 2019
11 changed files
--- a/src/include/migraphx/op/reduce_mean.hpp
+++ b/src/include/migraphx/op/reduce_mean.hpp
 #ifndef MIGRAPHX_GUARD_OPERATORS_MEAN_HPP
 #define MIGRAPHX_GUARD_OPERATORS_MEAN_HPP

-#include <migraphx/check_shapes.hpp>
-#include <migraphx/argument.hpp>
-#include <migraphx/shape_for_each.hpp>
-#include <migraphx/par_for.hpp>
-#include <migraphx/config.hpp>
-#include <vector>
+#include <migraphx/op/reduce_op.hpp>

 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
 namespace op {

-struct reduce_mean
+struct reduce_mean : reduce_op<reduce_mean>
 {
-    std::vector<std::int64_t> axes{};
+    reduce_mean() {}
+    reduce_mean(std::vector<int64_t> ax) : reduce_op(std::move(ax)) {}

-    template <class Self, class F>
-    static auto reflect(Self& self, F f)
+    auto op() const
    {
-        return pack(f(self.axes, "axes"));
+        return [=](auto x, auto y) { return x + y; };
    }

-    std::string name() const { return "reduce_mean"; }
-
-    std::vector<int64_t> tune_axes(std::size_t n_dim) const
-    {
-        auto tuned_axes = axes;
-        if(tuned_axes.empty())
-        {
-            tuned_axes.resize(n_dim);
-            std::iota(tuned_axes.begin(), tuned_axes.end(), 0);
-        }
-        else
-        {
-            for(auto& axis : tuned_axes)
-            {
-                int64_t s_dim = static_cast<int64_t>(n_dim);
-                if(axis >= s_dim or axis < -s_dim)
-                {
-                    MIGRAPHX_THROW("REDUCE_MEAN: axis out of range");
-                }
-                if(axis < 0)
-                {
-                    axis += n_dim;
-                }
-            }
-        }
-
-        return tuned_axes;
-    }
-
-    shape compute_shape(std::vector<shape> inputs) const
-    {
-        check_shapes{inputs, *this}.has(1);
-        auto s          = inputs.at(0);
-        auto lens       = s.lens();
-        auto tuned_axes = tune_axes(lens.size());
-        for(auto axis : tuned_axes)
-        {
-            lens[axis] = 1;
-        }
-
-        return {s.type(), lens};
-    }
-
-    template <class T>
-    void calc_mean(tensor_view<T>& input,
-                   shape& batch_shape,
-                   std::vector<int64_t>& tuned_axes,
-                   std::vector<std::size_t>& out_idx,
-                   tensor_view<T>& output) const
+    auto output(const shape& s) const
    {
-        auto data_idx = out_idx;
-        T val         = T{0};
-        shape_for_each(batch_shape, [&](auto b_idx) {
-            for(auto axis : tuned_axes)
-            {
-                data_idx[axis] = b_idx[axis];
-            }
-            val += input(data_idx.begin(), data_idx.end());
-        });
-
-        output(out_idx.begin(), out_idx.end()) = val / batch_shape.elements();
-    }
-
-    argument compute(const shape& output_shape, std::vector<argument> args) const
-    {
-        argument result{output_shape};
-        auto arg_lens   = args.front().get_shape().lens();
-        auto tuned_axes = tune_axes(arg_lens.size());
-        std::vector<std::size_t> batch_lens(output_shape.lens().size(), 1);
-        for(auto axis : tuned_axes)
-        {
-            batch_lens[axis] = arg_lens[axis];
-        }
-        shape batch_shape{output_shape.type(), batch_lens};
-        visit_all(result, args[0])([&](auto output, auto input) {
-            par_for(output_shape.elements(), [&](auto i) {
-                auto out_idx = output_shape.multi(i);
-                this->calc_mean(input, batch_shape, tuned_axes, out_idx, output);
-            });
-        });
-
-        return result;
+        return [&](auto val) { return val / s.elements(); };
    }
 };


--- a/src/include/migraphx/op/reduce_op.hpp
+++ b/src/include/migraphx/op/reduce_op.hpp
+#ifndef MIGRAPHX_GUARD_OPERATORS_OP_HPP
+#define MIGRAPHX_GUARD_OPERATORS_OP_HPP
+
+#include <migraphx/op/name.hpp>
+#include <migraphx/check_shapes.hpp>
+#include <migraphx/argument.hpp>
+#include <migraphx/shape_for_each.hpp>
+#include <migraphx/par_for.hpp>
+#include <migraphx/config.hpp>
+#include <vector>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace op {
+
+struct lowest
+{
+    template <class T>
+    operator T() const
+    {
+        return std::numeric_limits<T>::lowest();
+    }
+};
+
+struct highest
+{
+    template <class T>
+    operator T() const
+    {
+        return std::numeric_limits<T>::max();
+    }
+};
+
+struct zero
+{
+    template <class T>
+    operator T() const
+    {
+        return T{0};
+    }
+};
+
+template <class Derived>
+struct reduce_op : op_name<Derived>
+{
+    std::vector<std::int64_t> axes{};
+
+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        return pack(f(self.axes, "axes"));
+    }
+
+    std::vector<int64_t> tune_axes(std::size_t n_dim) const
+    {
+        auto tuned_axes = axes;
+        if(tuned_axes.empty())
+        {
+            tuned_axes.resize(n_dim);
+            std::iota(tuned_axes.begin(), tuned_axes.end(), 0);
+        }
+        else
+        {
+            for(auto& axis : tuned_axes)
+            {
+                int64_t s_dim = static_cast<int64_t>(n_dim);
+                if(axis >= s_dim or axis < -s_dim)
+                {
+                    MIGRAPHX_THROW("REDUCE_OP: axis out of range");
+                }
+                if(axis < 0)
+                {
+                    axis += n_dim;
+                }
+            }
+        }
+
+        return tuned_axes;
+    }
+
+    shape compute_shape(std::vector<shape> inputs) const
+    {
+        check_shapes{inputs, *this}.has(1);
+        auto s          = inputs.at(0);
+        auto lens       = s.lens();
+        auto tuned_axes = tune_axes(lens.size());
+        for(auto axis : tuned_axes)
+        {
+            lens[axis] = 1;
+        }
+
+        return {s.type(), lens};
+    }
+
+    template <class T>
+    void tune_dims(const std::vector<int64_t>& tuned_axes,
+                   const std::vector<T>& in_lens,
+                   std::vector<T>& out_lens) const
+    {
+        for(auto axis : tuned_axes)
+        {
+            out_lens[axis] = in_lens[axis];
+        }
+    }
+
+    template <class T>
+    void reduce(tensor_view<T>& input,
+                shape& batch_shape,
+                std::vector<int64_t>& tuned_axes,
+                std::vector<std::size_t>& out_idx,
+                tensor_view<T>& output) const
+    {
+        auto data_idx = out_idx;
+        T val         = static_cast<const Derived&>(*this).init();
+        shape_for_each(batch_shape, [&](auto b_idx) {
+            this->tune_dims(tuned_axes, b_idx, data_idx);
+            val = static_cast<const Derived&>(*this).op()(
+                static_cast<const Derived&>(*this).input()(input(data_idx.begin(), data_idx.end())),
+                val);
+        });
+
+        output(out_idx.begin(), out_idx.end()) =
+            static_cast<const Derived&>(*this).output(batch_shape)(val);
+    }
+
+    argument compute(const shape& output_shape, std::vector<argument> args) const
+    {
+        argument result{output_shape};
+        auto arg_lens   = args.front().get_shape().lens();
+        auto tuned_axes = tune_axes(arg_lens.size());
+        std::vector<std::size_t> batch_lens(output_shape.lens().size(), 1);
+        tune_dims(tuned_axes, arg_lens, batch_lens);
+        shape batch_shape{output_shape.type(), batch_lens};
+        visit_all(result, args[0])([&](auto output, auto input) {
+            par_for(output_shape.elements(), [&](auto i) {
+                auto out_idx = output_shape.multi(i);
+                this->reduce(input, batch_shape, tuned_axes, out_idx, output);
+            });
+        });
+
+        return result;
+    }
+
+    auto init() const { return zero(); }
+
+    auto input() const
+    {
+        return [&](auto val) { return val; };
+    }
+
+    auto output(const shape&) const
+    {
+        return [&](auto val) { return val; };
+    }
+
+    reduce_op() {}
+    reduce_op(std::vector<int64_t> ax) : axes(std::move(ax)) {}
+};
+
+} // namespace op
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+
+#endif
--- a/src/include/migraphx/op/reduce_sum.hpp
+++ b/src/include/migraphx/op/reduce_sum.hpp
 #ifndef MIGRAPHX_GUARD_OPERATORS_SUM_HPP
 #define MIGRAPHX_GUARD_OPERATORS_SUM_HPP

-#include <migraphx/check_shapes.hpp>
-#include <migraphx/argument.hpp>
-#include <migraphx/shape_for_each.hpp>
-#include <migraphx/par_for.hpp>
-#include <migraphx/config.hpp>
-#include <vector>
+#include <migraphx/op/reduce_op.hpp>

 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
 namespace op {

-struct reduce_sum
+struct reduce_sum : reduce_op<reduce_sum>
 {
-    std::vector<int64_t> axes{};
+    reduce_sum() {}
+    reduce_sum(std::vector<int64_t> ax) : reduce_op(std::move(ax)) {}

-    template <class Self, class F>
-    static auto reflect(Self& self, F f)
+    auto op() const
    {
-        return pack(f(self.axes, "axes"));
-    }
-
-    std::string name() const { return "reduce_sum"; }
-
-    std::vector<int64_t> tune_axes(std::size_t n_dim) const
-    {
-        auto tuned_axes = axes;
-        if(tuned_axes.empty())
-        {
-            tuned_axes.resize(n_dim);
-            std::iota(tuned_axes.begin(), tuned_axes.end(), 0);
-        }
-        else
-        {
-            for(auto& axis : tuned_axes)
-            {
-                int64_t s_dim = static_cast<int64_t>(n_dim);
-                if(axis >= s_dim or axis < -s_dim)
-                {
-                    MIGRAPHX_THROW("REDUCE_MEAN: axis out of range");
-                }
-                if(axis < 0)
-                {
-                    axis += n_dim;
-                }
-            }
-        }
-
-        return tuned_axes;
-    }
-
-    shape compute_shape(std::vector<shape> inputs) const
-    {
-        check_shapes{inputs, *this}.has(1);
-        auto s          = inputs.at(0);
-        auto lens       = s.lens();
-        auto tuned_axes = tune_axes(lens.size());
-        for(auto axis : tuned_axes)
-        {
-            lens[axis] = 1;
-        }
-
-        return {s.type(), lens};
-    }
-
-    template <class T>
-    void calc_sum(tensor_view<T>& input,
-                  shape& batch_shape,
-                  std::vector<int64_t>& tuned_axes,
-                  std::vector<std::size_t>& out_idx,
-                  tensor_view<T>& output) const
-    {
-        auto data_idx = out_idx;
-        T val         = T{0};
-        shape_for_each(batch_shape, [&](auto b_idx) {
-            for(auto axis : tuned_axes)
-            {
-                data_idx[axis] = b_idx[axis];
-            }
-            val += input(data_idx.begin(), data_idx.end());
-        });
-
-        output(out_idx.begin(), out_idx.end()) = val;
-    }
-
-    argument compute(const shape& output_shape, std::vector<argument> args) const
-    {
-        argument result{output_shape};
-        auto arg_lens                   = args.front().get_shape().lens();
-        std::vector<int64_t> tuned_axes = tune_axes(arg_lens.size());
-        std::vector<std::size_t> batch_lens(output_shape.lens().size(), 1);
-        for(auto axis : tuned_axes)
-        {
-            batch_lens[axis] = arg_lens[axis];
-        }
-        shape batch_shape{output_shape.type(), batch_lens};
-        visit_all(result, args[0])([&](auto output, auto input) {
-            par_for(output_shape.elements(), [&](auto i) {
-                auto out_idx = output_shape.multi(i);
-                this->calc_sum(input, batch_shape, tuned_axes, out_idx, output);
-            });
-        });
-
-        return result;
+        return [=](auto x, auto y) { return x + y; };
    }
 };


--- a/src/targets/gpu/CMakeLists.txt
+++ b/src/targets/gpu/CMakeLists.txt
@@ -91,8 +91,6 @@ add_library(migraphx_gpu
    adjust_allocation.cpp
    pack_int8_args.cpp
    clip.cpp
-    reduce_sum.cpp
-    reduce_mean.cpp
    int8_gemm_pack.cpp
    int8_conv_pack.cpp
 )

--- a/src/targets/gpu/include/migraphx/gpu/name.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/name.hpp
+#ifndef MIGRAPHX_GUARD_RTGLIB_OP_NAME_HPP
+#define MIGRAPHX_GUARD_RTGLIB_OP_NAME_HPP
+
+#include <migraphx/config.hpp>
+#include <migraphx/type_name.hpp>
+#include <utility>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+
+template <class Derived>
+struct oper
+{
+    // function to extract the name part of an operator. For example, we have
+    // a operation "sin", then the get_type_name() will return
+    // "migraphx::version_1::gpu::hip_sin", this functin will return the name
+    // "gpu::sin" as the operator name
+    std::string name() const
+    {
+        const std::string& name = get_type_name<Derived>();
+        // search the namespace gpu (::gpu::)
+        auto pos_ns = name.find("::gpu::");
+        if(pos_ns != std::string::npos)
+        {
+            auto pos_name = name.find("hip_", pos_ns + std::string("::gpu::").length());
+            if(pos_name != std::string::npos)
+            {
+                return std::string("gpu::") + name.substr(pos_name + 4);
+            }
+            else
+            {
+                return name.substr(pos_ns + 2);
+            }
+        }
+
+        return "unknown_operator_name";
+    }
+};
+
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+
+#endif
--- a/src/targets/gpu/include/migraphx/gpu/oper.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/oper.hpp
 #ifndef MIGRAPHX_GUARD_RTGLIB_UNARY_HPP
 #define MIGRAPHX_GUARD_RTGLIB_UNARY_HPP

+#include <migraphx/gpu/name.hpp>
 #include <migraphx/gpu/hip.hpp>
 #include <migraphx/gpu/context.hpp>
 #include <migraphx/shape.hpp>
@@ -14,31 +15,6 @@ namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {

-template <class Derived>
-struct oper
-{
-    std::string name() const
-    {
-        const std::string& name = get_type_name<Derived>();
-        // search the namespace gpu (::gpu::)
-        auto pos_ns = name.find("::gpu::");
-        if(pos_ns != std::string::npos)
-        {
-            auto pos_name = name.find("hip_", pos_ns + std::string("::gpu::").length());
-            if(pos_name != std::string::npos)
-            {
-                return std::string("gpu::") + name.substr(pos_name + 4);
-            }
-            else
-            {
-                return name.substr(pos_ns + 2);
-            }
-        }
-
-        return "unknown";
-    }
-};
-
 template <class Derived, void (*F)(hipStream_t, const argument&, const argument&)>
 struct unary_device : oper<Derived>
 {

--- a/src/targets/gpu/include/migraphx/gpu/reduce_mean.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/reduce_mean.hpp
 #ifndef MIGRAPHX_GUARD_RTGLIB_REDUCE_MEAN_HPP
 #define MIGRAPHX_GUARD_RTGLIB_REDUCE_MEAN_HPP

-#include <migraphx/shape.hpp>
 #include <migraphx/op/reduce_mean.hpp>
-#include <migraphx/reflect.hpp>
+#include <migraphx/gpu/reduce_op.hpp>
+#include <migraphx/gpu/device/reduce_mean.hpp>

 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
@@ -11,24 +11,10 @@ namespace gpu {

 struct context;

-struct hip_reduce_mean
+struct hip_reduce_mean : reduce_op<hip_reduce_mean, op::reduce_mean, device::reduce_mean>
 {
-    op::reduce_mean op;
-
-    template <class Self, class F>
-    static auto reflect(Self& self, F f)
-    {
-        return migraphx::reflect(self.op, f);
-    }
-
-    std::string name() const { return "gpu::reduce_mean"; }
-    shape compute_shape(std::vector<shape> inputs) const;
-    argument
-    compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
-    std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
-    {
-        return shapes.size() - 1;
-    }
+    hip_reduce_mean() {}
+    hip_reduce_mean(const op::reduce_mean& op_ref) : reduce_op(op_ref) {}
 };

 } // namespace gpu

--- a/src/targets/gpu/include/migraphx/gpu/reduce_op.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/reduce_op.hpp
+#ifndef MIGRAPHX_GUARD_RTGLIB_REDUCE_OP_HPP
+#define MIGRAPHX_GUARD_RTGLIB_REDUCE_OP_HPP
+
+#include <migraphx/gpu/name.hpp>
+#include <migraphx/gpu/hip.hpp>
+#include <migraphx/gpu/context.hpp>
+#include <migraphx/shape.hpp>
+#include <migraphx/argument.hpp>
+#include <migraphx/config.hpp>
+#include <migraphx/type_name.hpp>
+#include <utility>
+#include <iostream>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+
+struct context;
+
+template <class Derived, class Op, void (*F)(hipStream_t, const argument&, const argument&)>
+struct reduce_op : oper<Derived>
+{
+    Op op;
+
+    template <class Self, class T>
+    static auto reflect(Self& self, T f)
+    {
+        return migraphx::reflect(self.op, f);
+    }
+
+    shape compute_shape(const std::vector<shape>& inputs) const
+    {
+        std::vector<shape> in_shapes{inputs};
+        in_shapes.pop_back();
+        return op.compute_shape(in_shapes);
+    }
+
+    argument compute(context& ctx, const shape&, const std::vector<argument>& args) const
+    {
+        F(ctx.get_stream().get(), args[1], args[0]);
+        return args[1];
+    }
+
+    std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
+    {
+        return shapes.size() - 1;
+    }
+
+    reduce_op() {}
+    reduce_op(const Op& op_ref) : op(op_ref) {}
+};
+
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+
+#endif
--- a/src/targets/gpu/include/migraphx/gpu/reduce_sum.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/reduce_sum.hpp
 #ifndef MIGRAPHX_GUARD_RTGLIB_REDUCE_SUM_HPP
 #define MIGRAPHX_GUARD_RTGLIB_REDUCE_SUM_HPP

-#include <migraphx/shape.hpp>
 #include <migraphx/op/reduce_sum.hpp>
-#include <migraphx/reflect.hpp>
+#include <migraphx/gpu/reduce_op.hpp>
+#include <migraphx/gpu/device/reduce_sum.hpp>

 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
@@ -11,24 +11,10 @@ namespace gpu {

 struct context;

-struct hip_reduce_sum
+struct hip_reduce_sum : reduce_op<hip_reduce_sum, op::reduce_sum, device::reduce_sum>
 {
-    op::reduce_sum op;
-
-    template <class Self, class F>
-    static auto reflect(Self& self, F f)
-    {
-        return migraphx::reflect(self.op, f);
-    }
-
-    std::string name() const { return "gpu::reduce_sum"; }
-    shape compute_shape(std::vector<shape> inputs) const;
-    argument
-    compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
-    std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
-    {
-        return shapes.size() - 1;
-    }
+    hip_reduce_sum() {}
+    hip_reduce_sum(const op::reduce_sum& op_ref) : reduce_op(op_ref) {}
 };

 } // namespace gpu

--- a/src/targets/gpu/reduce_mean.cpp
+++ b/src/targets/gpu/reduce_mean.cpp
-#include <migraphx/gpu/reduce_mean.hpp>
-#include <migraphx/gpu/context.hpp>
-#include <migraphx/gpu/device/reduce_mean.hpp>
-
-namespace migraphx {
-inline namespace MIGRAPHX_INLINE_NS {
-namespace gpu {
-
-shape hip_reduce_mean::compute_shape(std::vector<shape> inputs) const
-{
-    inputs.pop_back();
-    return op.compute_shape(inputs);
-}
-
-argument
-hip_reduce_mean::compute(context& ctx, const shape&, const std::vector<argument>& args) const
-{
-    device::reduce_mean(ctx.get_stream().get(), args.back(), args.front());
-    return args.back();
-}
-
-} // namespace gpu
-} // namespace MIGRAPHX_INLINE_NS
-} // namespace migraphx
--- a/src/targets/gpu/reduce_sum.cpp
+++ b/src/targets/gpu/reduce_sum.cpp
-#include <migraphx/gpu/reduce_sum.hpp>
-#include <migraphx/gpu/context.hpp>
-#include <migraphx/gpu/device/reduce_sum.hpp>
-
-namespace migraphx {
-inline namespace MIGRAPHX_INLINE_NS {
-namespace gpu {
-
-shape hip_reduce_sum::compute_shape(std::vector<shape> inputs) const
-{
-    inputs.pop_back();
-    return op.compute_shape(inputs);
-}
-
-argument
-hip_reduce_sum::compute(context& ctx, const shape&, const std::vector<argument>& args) const
-{
-    device::reduce_sum(ctx.get_stream().get(), args.back(), args.front());
-    return args.back();
-}
-
-} // namespace gpu
-} // namespace MIGRAPHX_INLINE_NS
-} // namespace migraphx