merge develop branch

8d059502 · Shucai Xiao · 026365a6 · 80b06ca7 · 8d059502 · 8d059502
Commit 8d059502 authored Jul 10, 2019 by Shucai Xiao
20 changed files
--- a/src/include/migraphx/op/erf.hpp
+++ b/src/include/migraphx/op/erf.hpp
+#ifndef MIGRAPHX_GUARD_OPERATORS_ERF_HPP
+#define MIGRAPHX_GUARD_OPERATORS_ERF_HPP
+#include <migraphx/op/unary.hpp>
+#include <migraphx/config.hpp>
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace op {
+struct erf : unary<erf>
+{
+    auto apply() const
+    {
+        return [](auto x) { return std::erf(x); };
+    }
+};
+} // namespace op
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+#endif
--- a/src/include/migraphx/op/reduce_mean.hpp
+++ b/src/include/migraphx/op/reduce_mean.hpp
+#ifndef MIGRAPHX_GUARD_OPERATORS_MEAN_HPP
+#define MIGRAPHX_GUARD_OPERATORS_MEAN_HPP
+#include <migraphx/check_shapes.hpp>
+#include <migraphx/argument.hpp>
+#include <migraphx/shape_for_each.hpp>
+#include <migraphx/par_for.hpp>
+#include <migraphx/config.hpp>
+#include <vector>
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace op {
+struct reduce_mean
+{
+    std::vector<std::int64_t> axes{};
+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        return pack(f(self.axes, "axes"));
+    }
+    std::string name() const { return "reduce_mean"; }
+    std::vector<int64_t> tune_axes(std::size_t n_dim) const
+    {
+        auto tuned_axes = axes;
+        if(tuned_axes.empty())
+        {
+            tuned_axes.resize(n_dim);
+            std::iota(tuned_axes.begin(), tuned_axes.end(), 0);
+        }
+        else
+        {
+            for(auto& axis : tuned_axes)
+            {
+                int64_t s_dim = static_cast<int64_t>(n_dim);
+                if(axis >= s_dim or axis < -s_dim)
+                {
+                    MIGRAPHX_THROW("REDUCE_MEAN: axis out of range");
+                }
+                if(axis < 0)
+                {
+                    axis += n_dim;
+                }
+            }
+        }
+        return tuned_axes;
+    }
+    shape compute_shape(std::vector<shape> inputs) const
+    {
+        check_shapes{inputs, *this}.has(1);
+        auto s          = inputs.at(0);
+        auto lens       = s.lens();
+        auto tuned_axes = tune_axes(lens.size());
+        for(auto axis : tuned_axes)
+        {
+            lens[axis] = 1;
+        }
+        return {s.type(), lens};
+    }
+    template <class T>
+    void calc_mean(tensor_view<T>& input,
+                   shape& batch_shape,
+                   std::vector<int64_t>& tuned_axes,
+                   std::vector<std::size_t>& out_idx,
+                   tensor_view<T>& output) const
+    {
+        auto data_idx = out_idx;
+        T val         = T{0};
+        shape_for_each(batch_shape, [&](auto b_idx) {
+            for(auto axis : tuned_axes)
+            {
+                data_idx[axis] = b_idx[axis];
+            }
+            val += input(data_idx.begin(), data_idx.end());
+        });
+        output(out_idx.begin(), out_idx.end()) = val / batch_shape.elements();
+    }
+    argument compute(const shape& output_shape, std::vector<argument> args) const
+    {
+        argument result{output_shape};
+        auto arg_lens   = args.front().get_shape().lens();
+        auto tuned_axes = tune_axes(arg_lens.size());
+        std::vector<std::size_t> batch_lens(output_shape.lens().size(), 1);
+        for(auto axis : tuned_axes)
+        {
+            batch_lens[axis] = arg_lens[axis];
+        }
+        shape batch_shape{output_shape.type(), batch_lens};
+        visit_all(result, args[0])([&](auto output, auto input) {
+            par_for(output_shape.elements(), [&](auto i) {
+                auto out_idx = output_shape.multi(i);
+                this->calc_mean(input, batch_shape, tuned_axes, out_idx, output);
+            });
+        });
+        return result;
+    }
+};
+} // namespace op
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+#endif
--- a/src/include/migraphx/op/reduce_sum.hpp
+++ b/src/include/migraphx/op/reduce_sum.hpp
@@ -4,6 +4,7 @@
 #include <migraphx/check_shapes.hpp>
 #include <migraphx/argument.hpp>
 #include <migraphx/shape_for_each.hpp>
+#include <migraphx/par_for.hpp>
 #include <migraphx/config.hpp>
 #include <vector>
@@ -13,7 +14,7 @@ namespace op {
 struct reduce_sum
 {
-    std::vector<std::size_t> axes;
+    std::vector<int64_t> axes{};
    template <class Self, class F>
    static auto reflect(Self& self, F f)
@@ -23,25 +24,82 @@ struct reduce_sum
    std::string name() const { return "reduce_sum"; }
+    std::vector<int64_t> tune_axes(std::size_t n_dim) const
+    {
+        auto tuned_axes = axes;
+        if(tuned_axes.empty())
+        {
+            tuned_axes.resize(n_dim);
+            std::iota(tuned_axes.begin(), tuned_axes.end(), 0);
+        }
+        else
+        {
+            for(auto& axis : tuned_axes)
+            {
+                int64_t s_dim = static_cast<int64_t>(n_dim);
+                if(axis >= s_dim or axis < -s_dim)
+                {
+                    MIGRAPHX_THROW("REDUCE_MEAN: axis out of range");
+                }
+                if(axis < 0)
+                {
+                    axis += n_dim;
+                }
+            }
+        }
+        return tuned_axes;
+    }
    shape compute_shape(std::vector<shape> inputs) const
    {
        check_shapes{inputs, *this}.has(1);
-        auto s    = inputs.at(0);
+        auto s          = inputs.at(0);
-        auto lens = s.lens();
+        auto lens       = s.lens();
-        for(auto axis : axes)
+        auto tuned_axes = tune_axes(lens.size());
+        for(auto axis : tuned_axes)
+        {
            lens[axis] = 1;
+        }
        return {s.type(), lens};
    }
+    template <class T>
+    void calc_sum(tensor_view<T>& input,
+                  shape& batch_shape,
+                  std::vector<int64_t>& tuned_axes,
+                  std::vector<std::size_t>& out_idx,
+                  tensor_view<T>& output) const
+    {
+        auto data_idx = out_idx;
+        T val         = T{0};
+        shape_for_each(batch_shape, [&](auto b_idx) {
+            for(auto axis : tuned_axes)
+            {
+                data_idx[axis] = b_idx[axis];
+            }
+            val += input(data_idx.begin(), data_idx.end());
+        });
+        output(out_idx.begin(), out_idx.end()) = val;
+    }
    argument compute(const shape& output_shape, std::vector<argument> args) const
    {
        argument result{output_shape};
+        auto arg_lens                   = args.front().get_shape().lens();
+        std::vector<int64_t> tuned_axes = tune_axes(arg_lens.size());
+        std::vector<std::size_t> batch_lens(output_shape.lens().size(), 1);
+        for(auto axis : tuned_axes)
+        {
+            batch_lens[axis] = arg_lens[axis];
+        }
+        shape batch_shape{output_shape.type(), batch_lens};
        visit_all(result, args[0])([&](auto output, auto input) {
-            shape_for_each(input.get_shape(), [&](auto&& in_idx) {
+            par_for(output_shape.elements(), [&](auto i) {
-                auto out_idx = in_idx;
+                auto out_idx = output_shape.multi(i);
-                for(auto axis : axes)
+                this->calc_sum(input, batch_shape, tuned_axes, out_idx, output);
-                    out_idx[axis] = 0;
-                output(out_idx.begin(), out_idx.end()) += input(in_idx.begin(), in_idx.end());
            });
        });

--- a/src/include/migraphx/operators.hpp
+++ b/src/include/migraphx/operators.hpp
@@ -24,6 +24,7 @@
 #include <migraphx/op/div.hpp>
 #include <migraphx/op/dot.hpp>
 #include <migraphx/op/elu.hpp>
+#include <migraphx/op/erf.hpp>
 #include <migraphx/op/exp.hpp>
 #include <migraphx/op/flatten.hpp>
 #include <migraphx/op/gather.hpp>
@@ -46,6 +47,7 @@
 #include <migraphx/op/pooling.hpp>
 #include <migraphx/op/pow.hpp>
 #include <migraphx/op/reduce_sum.hpp>
+#include <migraphx/op/reduce_mean.hpp>
 #include <migraphx/op/relu.hpp>
 #include <migraphx/op/reshape.hpp>
 #include <migraphx/op/rnn.hpp>

--- a/src/onnx/onnx.cpp
+++ b/src/onnx/onnx.cpp
@@ -40,6 +40,7 @@ struct onnx_parser
        add_generic_op("Sigmoid", op::sigmoid{});
        add_generic_op("Abs", op::abs{});
        add_generic_op("Exp", op::exp{});
+        add_generic_op("Erf", op::erf{});
        add_generic_op("Log", op::log{});
        // disable dropout for inference
        add_generic_op("Dropout", op::identity{});
@@ -99,7 +100,8 @@ struct onnx_parser
        add_mem_op("GRU", &onnx_parser::parse_gru);
        add_mem_op("LSTM", &onnx_parser::parse_lstm);
        add_mem_op("Pad", &onnx_parser::parse_pad);
-        add_mem_op("ReduceSum", &onnx_parser::parse_reduce_sum);
+        add_mem_op("ReduceSum", &onnx_parser::parse_reduce_oper<op::reduce_sum>);
+        add_mem_op("ReduceMean", &onnx_parser::parse_reduce_oper<op::reduce_mean>);
        // init the activation function map
        init_actv_func();
@@ -1371,20 +1373,21 @@ struct onnx_parser
        return {hidden_states, last_output, last_cell_output};
    }
-    instruction_ref parse_reduce_sum(const std::string&,
+    template <class T>
-                                     attribute_map attributes,
+    instruction_ref parse_reduce_oper(const std::string&,
-                                     std::vector<instruction_ref> args)
+                                      attribute_map attributes,
+                                      std::vector<instruction_ref> args)
    {
        std::size_t n_dim = args.front()->get_shape().lens().size();
        // default to reduce over all dimensions
-        std::vector<std::size_t> axes(n_dim);
+        std::vector<int64_t> axes(n_dim);
        std::iota(axes.begin(), axes.end(), 0);
        if(contains(attributes, "axes"))
        {
            axes.clear();
            auto&& attr_axes = attributes["axes"].ints();
-            axes             = std::vector<std::size_t>(attr_axes.begin(), attr_axes.end());
+            axes             = std::vector<int64_t>(attr_axes.begin(), attr_axes.end());
        }
        int keep_dims = 1;
@@ -1395,13 +1398,12 @@ struct onnx_parser
        if(keep_dims == 1)
        {
-            return prog.add_instruction(op::reduce_sum{axes}, std::move(args));
+            return prog.add_instruction(T{axes}, std::move(args));
        }
        else
        {
-            auto ins = prog.add_instruction(op::reduce_sum{axes}, std::move(args));
+            auto ins = prog.add_instruction(T{axes}, std::move(args));
-            std::vector<int64_t> squeeze_axes{axes.begin(), axes.end()};
+            return prog.add_instruction(op::squeeze{axes}, ins);
-            return prog.add_instruction(op::squeeze{squeeze_axes}, ins);
        }
    }

--- a/src/propagate_constant.cpp
+++ b/src/propagate_constant.cpp
@@ -10,8 +10,8 @@ inline namespace MIGRAPHX_INLINE_NS {
 bool skip_propogate(instruction_ref ins)
 {
-    if(ins->name() == "@literal")
+    if(ins->name() == "contiguous")
-        return true;
+        return skip_propogate(ins->inputs().front());
    auto&& s = ins->get_shape();
    if(s.broadcasted() and not s.scalar())
        return true;
@@ -33,7 +33,7 @@ void propagate_constant::apply(program& p) const
                                                         ins->outputs().end());
            for(auto child : children)
            {
-                if(skip_propogate(child))
+                if(child->name() == "@literal" or skip_propogate(child))
                {
                    self(child);
                    continue;

--- a/src/targets/gpu/CMakeLists.txt
+++ b/src/targets/gpu/CMakeLists.txt
@@ -17,6 +17,7 @@ add_library(migraphx_device
    device/max.cpp
    device/min.cpp
    device/exp.cpp
+    device/erf.cpp
    device/log.cpp
    device/sin.cpp
    device/cos.cpp
@@ -36,9 +37,11 @@ add_library(migraphx_device
    device/pad.cpp
    device/gather.cpp
    device/sub.cpp
+    device/div.cpp
    device/clip.cpp
    device/reduce_sum.cpp
    device/pow.cpp
+    device/reduce_mean.cpp
 )
 set_target_properties(migraphx_device PROPERTIES EXPORT_NAME device)
 rocm_clang_tidy_check(migraphx_device)
@@ -77,6 +80,7 @@ add_library(migraphx_gpu
    adjust_allocation.cpp
    clip.cpp
    reduce_sum.cpp
+    reduce_mean.cpp
 )
 set_target_properties(migraphx_gpu PROPERTIES EXPORT_NAME gpu)
 rocm_clang_tidy_check(migraphx_gpu)

--- a/src/targets/gpu/device/div.cpp
+++ b/src/targets/gpu/device/div.cpp
+#include <migraphx/gpu/device/div.hpp>
+#include <migraphx/gpu/device/nary.hpp>
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+namespace device {
+void div(hipStream_t stream, const argument& result, const argument& arg1, const argument& arg2)
+{
+    nary(stream, result, arg1, arg2)([](auto x, auto y) { return x / y; });
+}
+} // namespace device
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/targets/gpu/device/erf.cpp
+++ b/src/targets/gpu/device/erf.cpp
+#include <migraphx/gpu/device/erf.hpp>
+#include <migraphx/gpu/device/nary.hpp>
+#include <migraphx/gpu/device/types.hpp>
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+namespace device {
+void erf(hipStream_t stream, const argument& result, const argument& arg)
+{
+    nary(stream, result, arg)([](auto x) { return ::erf(to_hip_type(x)); });
+}
+} // namespace device
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/targets/gpu/device/include/migraphx/gpu/device/reduce.hpp
+++ b/src/targets/gpu/device/include/migraphx/gpu/device/reduce.hpp
@@ -28,6 +28,16 @@ struct id
    }
 };
+struct mean
+{
+    size_t item_num = 1;
+    template <class T>
+    MIGRAPHX_DEVICE_CONSTEXPR auto operator()(T x) const
+    {
+        return static_cast<T>(x / item_num);
+    }
+};
 struct max
 {
    template <class T, class U>

--- a/src/targets/gpu/device/pow.cpp
+++ b/src/targets/gpu/device/pow.cpp
@@ -9,7 +9,7 @@ namespace device {
 void pow(hipStream_t stream, const argument& result, const argument& arg1, const argument& arg2)
 {
    nary(stream, result, arg1, arg2)(
-        [](auto e, auto b) { return ::pow(to_hip_type(b), to_hip_type(e)); });
+        [](auto b, auto e) { return ::pow(to_hip_type(b), to_hip_type(e)); });
 }
 } // namespace device

--- a/src/targets/gpu/device/reduce_mean.cpp
+++ b/src/targets/gpu/device/reduce_mean.cpp
+#include <migraphx/gpu/device/reduce_mean.hpp>
+#include <migraphx/gpu/device/reduce.hpp>
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+namespace device {
+void reduce_mean(hipStream_t stream, const argument& result, const argument& arg)
+{
+    std::size_t item_num = arg.get_shape().elements() / result.get_shape().elements();
+    reduce(stream, result, arg, sum{}, 0, id{}, mean{item_num});
+}
+} // namespace device
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/targets/gpu/device/sub.cpp
+++ b/src/targets/gpu/device/sub.cpp
@@ -8,7 +8,7 @@ namespace device {
 void sub(hipStream_t stream, const argument& result, const argument& arg1, const argument& arg2)
 {
-    nary(stream, result, arg1, arg2)([](auto x, auto y) { return y - x; });
+    nary(stream, result, arg1, arg2)([](auto x, auto y) { return x - y; });
 }
 } // namespace device

--- a/src/targets/gpu/include/migraphx/gpu/device/div.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/device/div.hpp
+#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_DIV_HPP
+#define MIGRAPHX_GUARD_RTGLIB_DEVICE_DIV_HPP
+#include <migraphx/argument.hpp>
+#include <migraphx/config.hpp>
+#include <hip/hip_runtime_api.h>
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+namespace device {
+void div(hipStream_t stream, const argument& result, const argument& arg1, const argument& arg2);
+} // namespace device
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+#endif
--- a/src/targets/gpu/include/migraphx/gpu/device/erf.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/device/erf.hpp
+#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_ERF_HPP
+#define MIGRAPHX_GUARD_RTGLIB_DEVICE_ERF_HPP
+#include <migraphx/argument.hpp>
+#include <migraphx/config.hpp>
+#include <hip/hip_runtime_api.h>
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+namespace device {
+void erf(hipStream_t stream, const argument& result, const argument& arg);
+} // namespace device
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+#endif
--- a/src/targets/gpu/include/migraphx/gpu/device/reduce_mean.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/device/reduce_mean.hpp
+#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_REDUCE_MEAN_HPP
+#define MIGRAPHX_GUARD_RTGLIB_DEVICE_REDUCE_MEAN_HPP
+#include <migraphx/argument.hpp>
+#include <migraphx/config.hpp>
+#include <hip/hip_runtime_api.h>
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+namespace device {
+void reduce_mean(hipStream_t stream, const argument& result, const argument& arg);
+} // namespace device
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+#endif
--- a/src/targets/gpu/include/migraphx/gpu/div.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/div.hpp
+#ifndef MIGRAPHX_GUARD_RTGLIB_DIV_HPP
+#define MIGRAPHX_GUARD_RTGLIB_DIV_HPP
+#include <migraphx/gpu/oper.hpp>
+#include <migraphx/gpu/device/div.hpp>
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+struct hip_div : binary_device<hip_div, device::div>
+{
+};
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+#endif
--- a/src/targets/gpu/include/migraphx/gpu/erf.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/erf.hpp
+#ifndef MIGRAPHX_GUARD_RTGLIB_ERF_HPP
+#define MIGRAPHX_GUARD_RTGLIB_ERF_HPP
+#include <migraphx/gpu/oper.hpp>
+#include <migraphx/gpu/device/erf.hpp>
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+struct hip_erf : unary_device<hip_erf, device::erf>
+{
+};
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+#endif
--- a/src/targets/gpu/include/migraphx/gpu/oper.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/oper.hpp
@@ -88,7 +88,7 @@ struct binary_device : oper<Derived>
    argument compute(context& ctx, const shape&, const std::vector<argument>& args) const
    {
-        F(ctx.get_stream().get(), args[2], args[1], args[0]);
+        F(ctx.get_stream().get(), args[2], args[0], args[1]);
        return args[2];
    }

--- a/src/targets/gpu/include/migraphx/gpu/reduce_mean.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/reduce_mean.hpp
+#ifndef MIGRAPHX_GUARD_RTGLIB_REDUCE_MEAN_HPP
+#define MIGRAPHX_GUARD_RTGLIB_REDUCE_MEAN_HPP
+#include <migraphx/shape.hpp>
+#include <migraphx/op/reduce_mean.hpp>
+#include <migraphx/reflect.hpp>
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+struct context;
+struct hip_reduce_mean
+{
+    op::reduce_mean op;
+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        return migraphx::reflect(self.op, f);
+    }
+    std::string name() const { return "gpu::reduce_mean"; }
+    shape compute_shape(std::vector<shape> inputs) const;
+    argument
+    compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
+    std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
+    {
+        return shapes.size() - 1;
+    }
+};
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+#endif