Merge from develop

087c205e · Paul · a3a9e469 · e15b8333 · 087c205e · 087c205e
Commit 087c205e authored Mar 04, 2019 by Paul
20 changed files
--- a/src/targets/gpu/device/acos.cpp
+++ b/src/targets/gpu/device/acos.cpp
+#include <migraphx/gpu/device/acos.hpp>
+#include <migraphx/gpu/device/nary.hpp>
+#include <migraphx/gpu/device/types.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+namespace device {
+
+void acos(hipStream_t stream, const argument& result, const argument& arg)
+{
+    nary(stream, result, arg)([](auto x) { return ::acos(to_hip_type(x)); });
+}
+
+} // namespace device
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/targets/gpu/device/asin.cpp
+++ b/src/targets/gpu/device/asin.cpp
+#include <migraphx/gpu/device/asin.hpp>
+#include <migraphx/gpu/device/nary.hpp>
+#include <migraphx/gpu/device/types.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+namespace device {
+
+void asin(hipStream_t stream, const argument& result, const argument& arg)
+{
+    nary(stream, result, arg)([](auto x) { return ::asin(to_hip_type(x)); });
+}
+
+} // namespace device
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/targets/gpu/device/atan.cpp
+++ b/src/targets/gpu/device/atan.cpp
+#include <migraphx/gpu/device/atan.hpp>
+#include <migraphx/gpu/device/nary.hpp>
+#include <migraphx/gpu/device/types.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+namespace device {
+
+void atan(hipStream_t stream, const argument& result, const argument& arg)
+{
+    nary(stream, result, arg)([](auto x) { return ::atan(to_hip_type(x)); });
+}
+
+} // namespace device
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/targets/gpu/device/cos.cpp
+++ b/src/targets/gpu/device/cos.cpp
+#include <migraphx/gpu/device/cos.hpp>
+#include <migraphx/gpu/device/nary.hpp>
+#include <migraphx/gpu/device/types.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+namespace device {
+
+void cos(hipStream_t stream, const argument& result, const argument& arg)
+{
+    nary(stream, result, arg)([](auto x) { return ::cos(to_hip_type(x)); });
+}
+
+} // namespace device
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/targets/gpu/device/cosh.cpp
+++ b/src/targets/gpu/device/cosh.cpp
+#include <migraphx/gpu/device/cosh.hpp>
+#include <migraphx/gpu/device/nary.hpp>
+#include <migraphx/gpu/device/types.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+namespace device {
+
+void cosh(hipStream_t stream, const argument& result, const argument& arg)
+{
+    nary(stream, result, arg)([](auto x) { return ::cosh(to_hip_type(x)); });
+}
+
+} // namespace device
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/targets/gpu/device/exp.cpp
+++ b/src/targets/gpu/device/exp.cpp
+#include <migraphx/gpu/device/exp.hpp>
+#include <migraphx/gpu/device/nary.hpp>
+#include <migraphx/gpu/device/types.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+namespace device {
+
+void exp(hipStream_t stream, const argument& result, const argument& arg)
+{
+    nary(stream, result, arg)([](auto x) { return ::exp(to_hip_type(x)); });
+}
+
+} // namespace device
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/targets/gpu/device/gather.cpp
+++ b/src/targets/gpu/device/gather.cpp
+#include <migraphx/shape.hpp>
+#include <migraphx/argument.hpp>
+#include <migraphx/gpu/device/gather.hpp>
+#include <migraphx/gpu/device/tensor.hpp>
+#include <migraphx/gpu/device/launch.hpp>
+#include <migraphx/gpu/device/types.hpp>
+#include <migraphx/gpu/hip.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+namespace device {
+
+argument gather(hipStream_t stream,
+                const migraphx::shape& output_shape,
+                std::vector<migraphx::argument> args,
+                int axis)
+{
+    int axis_index = (axis < 0) ? (axis + args[0].get_shape().lens().size()) : axis;
+    visit_all(args.back(), args[0])([&](auto output, auto input) {
+        std::size_t nelements = output_shape.elements();
+        args[1].visit([&](auto indices) {
+            const auto* indices_ptr = device_cast(indices.data());
+            auto* out_ptr           = device_cast(output.data());
+            const auto* in_ptr      = device_cast(input.data());
+            auto& input_shape       = args[0].get_shape();
+            auto lens               = input_shape.lens();
+            lens[axis_index]        = args[1].get_shape().elements();
+            migraphx::shape out_comp_shape{output_shape.type(), lens};
+            visit_tensor_size(out_comp_shape.lens().size(), [&](auto n_out_dim) {
+                hip_tensor_descriptor<n_out_dim> desc_input(input_shape);
+                hip_tensor_descriptor<n_out_dim> desc_output(out_comp_shape);
+                gs_launch(stream, nelements)([=](auto ii) {
+                    auto in_idx        = desc_output.multi(ii);
+                    in_idx[axis_index] = indices_ptr[in_idx[axis_index]];
+                    out_ptr[ii]        = in_ptr[desc_input.linear(in_idx)];
+                });
+            });
+        });
+    });
+
+    return args.back();
+}
+
+} // namespace device
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/targets/gpu/device/include/migraphx/gpu/device/nary.hpp
+++ b/src/targets/gpu/device/include/migraphx/gpu/device/nary.hpp
@@ -313,6 +313,12 @@ void nary_impl(hipStream_t stream, F f, argument result, Arguments... args)
        nary_nonstandard_impl(stream, f, result, args...);
 }

+template <class F>
+void nary_impl(hipStream_t stream, F f, argument result)
+{
+    nary_standard_impl(stream, f, result);
+}
+
 template <class... Arguments>
 auto nary_nonstandard(hipStream_t stream, argument result, Arguments... args)
 {

--- a/src/targets/gpu/device/log.cpp
+++ b/src/targets/gpu/device/log.cpp
+#include <migraphx/gpu/device/log.hpp>
+#include <migraphx/gpu/device/nary.hpp>
+#include <migraphx/gpu/device/types.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+namespace device {
+
+void log(hipStream_t stream, const argument& result, const argument& arg)
+{
+    nary(stream, result, arg)([](auto x) { return ::log(to_hip_type(x)); });
+}
+
+} // namespace device
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/targets/gpu/device/max.cpp
+++ b/src/targets/gpu/device/max.cpp
+#include <migraphx/gpu/device/max.hpp>
+#include <migraphx/gpu/device/nary.hpp>
+#include <migraphx/gpu/device/types.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+namespace device {
+
+void max(hipStream_t stream, const argument& result, const argument& arg1, const argument& arg2)
+{
+    nary(stream, result, arg1, arg2)(
+        [](auto x, auto y) { return std::max(to_hip_type(x), to_hip_type(y)); });
+}
+
+} // namespace device
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/targets/gpu/device/min.cpp
+++ b/src/targets/gpu/device/min.cpp
+#include <migraphx/gpu/device/min.hpp>
+#include <migraphx/gpu/device/nary.hpp>
+#include <migraphx/gpu/device/types.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+namespace device {
+
+void min(hipStream_t stream, const argument& result, const argument& arg1, const argument& arg2)
+{
+    nary(stream, result, arg1, arg2)(
+        [](auto x, auto y) { return std::min(to_hip_type(x), to_hip_type(y)); });
+}
+
+} // namespace device
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/targets/gpu/device/pad.cpp
+++ b/src/targets/gpu/device/pad.cpp
+#include <migraphx/shape.hpp>
+#include <migraphx/argument.hpp>
+#include <migraphx/gpu/device/nary.hpp>
+#include <migraphx/gpu/device/pad.hpp>
+#include <migraphx/gpu/device/tensor.hpp>
+#include <migraphx/gpu/device/launch.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+namespace device {
+
+argument
+pad(hipStream_t stream, argument result, argument arg1, float value, std::vector<std::int64_t> pads)
+{
+    std::size_t nelements = arg1.get_shape().elements();
+
+    nary(stream, result)([=] { return value; });
+    visit_all(result, arg1)([&](auto output, auto input) {
+        visit_tensor_size(result.get_shape().lens().size(), [&](auto ndim) {
+            std::size_t offsets[ndim];
+            std::copy(pads.begin(), pads.begin() + ndim, offsets);
+            auto* outptr      = output.data();
+            const auto* inptr = input.data();
+            hip_tensor_descriptor<ndim> desc_input(input.get_shape());
+            hip_tensor_descriptor<ndim> desc_output(output.get_shape());
+            gs_launch(stream, nelements)([=](auto i) {
+                auto idx = desc_input.multi(i);
+                for(std::size_t j = 0; j < ndim; j++)
+                {
+                    idx[j] += offsets[j];
+                }
+                outptr[desc_output.linear(idx)] = inptr[i];
+            });
+        });
+    });
+    return result;
+}
+
+} // namespace device
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/targets/gpu/device/sinh.cpp
+++ b/src/targets/gpu/device/sinh.cpp
+#include <migraphx/gpu/device/sinh.hpp>
+#include <migraphx/gpu/device/nary.hpp>
+#include <migraphx/gpu/device/types.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+namespace device {
+
+void sinh(hipStream_t stream, const argument& result, const argument& arg)
+{
+    nary(stream, result, arg)([](auto x) { return ::sinh(to_hip_type(x)); });
+}
+
+} // namespace device
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/targets/gpu/device/sub.cpp
+++ b/src/targets/gpu/device/sub.cpp
+#include <migraphx/gpu/device/sub.hpp>
+#include <migraphx/gpu/device/nary.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+namespace device {
+
+void sub(hipStream_t stream, const argument& result, const argument& arg1, const argument& arg2)
+{
+    nary(stream, result, arg1, arg2)([](auto x, auto y) { return y - x; });
+}
+
+} // namespace device
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/targets/gpu/device/tan.cpp
+++ b/src/targets/gpu/device/tan.cpp
+#include <migraphx/gpu/device/tan.hpp>
+#include <migraphx/gpu/device/nary.hpp>
+#include <migraphx/gpu/device/types.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+namespace device {
+
+void tan(hipStream_t stream, const argument& result, const argument& arg)
+{
+    nary(stream, result, arg)([](auto x) { return ::tan(to_hip_type(x)); });
+}
+
+} // namespace device
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/targets/gpu/eliminate_workspace.cpp
+++ b/src/targets/gpu/eliminate_workspace.cpp
@@ -14,9 +14,6 @@ namespace gpu {

 void eliminate_workspace::apply(program& p) const
 {
-    if(!enabled(MIGRAPHX_DISABLE_MEMORY_COLORING{}))
-        return;
-
    std::size_t n = 0;
    std::vector<instruction_ref> allocs;
    for(auto ins : iterator_for(p))
@@ -32,11 +29,14 @@ void eliminate_workspace::apply(program& p) const
            allocs.push_back(ins);
        }
    }
-    auto ws = p.add_parameter("workspace", shape{shape::int8_type, {n}});
-    for(auto&& a : allocs)
+    if(n > 0)
    {
-        p.replace_instruction(a, ws);
-        p.remove_instruction(a);
+        auto ws = p.add_parameter("workspace", shape{shape::int8_type, {n}});
+        for(auto&& a : allocs)
+        {
+            p.replace_instruction(a, ws);
+            p.remove_instruction(a);
+        }
    }
 }


--- a/src/targets/gpu/elu.cpp
+++ b/src/targets/gpu/elu.cpp
+#include <migraphx/gpu/elu.hpp>
+#include <migraphx/gpu/context.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+
+shape miopen_elu::compute_shape(const std::vector<shape>& inputs) const
+{
+    check_shapes{inputs, *this}.has(2).not_broadcasted();
+    return inputs.at(1);
+}
+
+argument miopen_elu::compute(context& ctx,
+                             const shape& output_shape,
+                             const std::vector<argument>& args) const
+{
+    float alpha = 1;
+    float beta  = 0;
+    auto x_desc = make_tensor(args[0].get_shape());
+    auto y_desc = make_tensor(output_shape);
+    miopenActivationForward(ctx.get_stream().get_miopen(),
+                            ad.get(),
+                            &alpha,
+                            x_desc.get(),
+                            args[0].implicit(),
+                            &beta,
+                            y_desc.get(),
+                            args[1].implicit());
+
+    return args[1];
+}
+
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/targets/gpu/fuse_ops.cpp
+++ b/src/targets/gpu/fuse_ops.cpp
@@ -3,6 +3,7 @@
 #include <migraphx/gpu/miopen.hpp>
 #include <migraphx/gpu/convolution.hpp>
 #include <migraphx/gpu/device/add_relu.hpp>
+#include <migraphx/gpu/device/add.hpp>
 #include <migraphx/instruction.hpp>

 namespace migraphx {
@@ -137,6 +138,8 @@ MIGRAPHX_PRED_MATCHER(fusable_conv, instruction_ref ins)
    auto wei = ins->inputs().at(1)->get_shape();
    assert(wei.lens().size() == 4);
    auto conv = any_cast<miopen_convolution>(ins->get_operator());
+    if(conv.op.group > 1)
+        return false;
    if(wei.lens()[1] > 512 and conv.algo != miopenConvolutionFwdAlgoWinograd)
        return false;
    auto op = conv.op;
@@ -265,17 +268,15 @@ struct miopen_conv_bias
    argument compute(context& ctx, const shape&, const std::vector<argument>& args) const
    {
        auto fargs  = make_fused_args();
-        float alpha = 1, beta = 0;
+        float alpha = 1;
+        float beta  = 0;
        miopenSetOpArgsConvForward(fargs.get(), conv, &alpha, &beta, args[1].implicit());
        miopenSetOpArgsBiasForward(fargs.get(), bias, &alpha, &beta, args[3].implicit());
        return f.execute(ctx, fargs, args[0], args[4]);
    }

-    shape compile(context& ctx)
-    {
-        f.compile(ctx);
-        return f.get_workspace(ctx);
-    }
+    void finalize(context& ctx, const shape&, const std::vector<shape>&) { f.compile(ctx); }
+    shape get_workspace(context& ctx) { return f.get_workspace(ctx); }
    int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; }
 };

@@ -308,18 +309,15 @@ struct miopen_conv_bias_relu
    argument compute(context& ctx, const shape&, const std::vector<argument>& args) const
    {
        auto fargs  = make_fused_args();
-        float alpha = 1, beta = 0;
+        float alpha = 1;
+        float beta  = 0;
        miopenSetOpArgsConvForward(fargs.get(), conv, &alpha, &beta, args[1].implicit());
        miopenSetOpArgsBiasForward(fargs.get(), bias, &alpha, &beta, args[3].implicit());
        miopenSetOpArgsActivForward(fargs.get(), relu, &alpha, &beta, 0, 0, 0);
        return f.execute(ctx, fargs, args[0], args[4]);
    }
-
-    shape compile(context& ctx)
-    {
-        f.compile(ctx);
-        return f.get_workspace(ctx);
-    }
+    void finalize(context& ctx, const shape&, const std::vector<shape>&) { f.compile(ctx); }
+    shape get_workspace(context& ctx) { return f.get_workspace(ctx); }
    int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; }
 };

@@ -346,8 +344,8 @@ void apply_conv_bias(context& ctx, program& p, match::matcher_result r)

    Op cb{conv_op, input_ins->get_shape(), weights_ins->get_shape(), bias_ins->get_shape()};
    // TODO: Insert ws allocation
-    auto ws = cb.compile(ctx);
-
+    auto ws = cb.get_workspace(ctx);
+    (void)ws;
    p.replace_instruction(ins, cb, input_ins, weights_ins, old_ws_ins, bias_ins, alloc_ins);
 }


--- a/src/targets/gpu/gather.cpp
+++ b/src/targets/gpu/gather.cpp
+#include <migraphx/gpu/gather.hpp>
+#include <migraphx/gpu/context.hpp>
+#include <migraphx/gpu/device/gather.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+
+shape hip_gather::compute_shape(std::vector<shape> inputs) const
+{
+    inputs.pop_back();
+    return op.compute_shape(inputs);
+}
+
+argument hip_gather::compute(context& ctx,
+                             const shape& output_shape,
+                             const std::vector<argument>& args) const
+{
+    return device::gather(ctx.get_stream().get(), output_shape, args, op.axis);
+}
+
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/targets/gpu/gemm.cpp
+++ b/src/targets/gpu/gemm.cpp
 #include <migraphx/gpu/gemm.hpp>
-#include <migraphx/operators.hpp>
-#include <migraphx/manage_ptr.hpp>
-#include <migraphx/gpu/miopen.hpp>
-#include <utility>
+#include <migraphx/gpu/context.hpp>

 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
@@ -107,6 +104,7 @@ argument miopen_gemm::compute(context& ctx,
                             ldc);

    });
+
    return args[2];
 }