Merge branch 'develop' into mem_color_ordering_fix

a5c1c7f6 · Paul Fultz II · GitHub · 462a4920 · d516b099 · a5c1c7f6
Unverified Commit a5c1c7f6 authored Feb 10, 2019 by Paul Fultz II Committed by GitHub Feb 10, 2019
20 changed files
--- a/src/targets/gpu/device/contiguous.cpp
+++ b/src/targets/gpu/device/contiguous.cpp
@@ -3,7 +3,7 @@
 #include <migraphx/gpu/device/nary.hpp>

 namespace migraphx {
-inline namespace MIGRAPH_INLINE_NS {
+inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {
 namespace device {

@@ -14,5 +14,5 @@ void contiguous(hipStream_t stream, argument result, argument arg)

 } // namespace device
 } // namespace gpu
-} // namespace MIGRAPH_INLINE_NS
+} // namespace MIGRAPHX_INLINE_NS
 } // namespace migraphx
--- a/src/targets/gpu/device/cos.cpp
+++ b/src/targets/gpu/device/cos.cpp
+#include <migraphx/gpu/device/cos.hpp>
+#include <migraphx/gpu/device/nary.hpp>
+#include <migraphx/gpu/device/types.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+namespace device {
+
+void cos(hipStream_t stream, const argument& result, const argument& arg)
+{
+    nary(stream, result, arg)([](auto x) { return ::cos(to_hip_type(x)); });
+}
+
+} // namespace device
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/targets/gpu/device/cosh.cpp
+++ b/src/targets/gpu/device/cosh.cpp
+#include <migraphx/gpu/device/cosh.hpp>
+#include <migraphx/gpu/device/nary.hpp>
+#include <migraphx/gpu/device/types.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+namespace device {
+
+void cosh(hipStream_t stream, const argument& result, const argument& arg)
+{
+    nary(stream, result, arg)([](auto x) { return ::cosh(to_hip_type(x)); });
+}
+
+} // namespace device
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/targets/gpu/device/exp.cpp
+++ b/src/targets/gpu/device/exp.cpp
+#include <migraphx/gpu/device/exp.hpp>
+#include <migraphx/gpu/device/nary.hpp>
+#include <migraphx/gpu/device/types.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+namespace device {
+
+void exp(hipStream_t stream, const argument& result, const argument& arg)
+{
+    nary(stream, result, arg)([](auto x) { return ::exp(to_hip_type(x)); });
+}
+
+} // namespace device
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/targets/gpu/device/gather.cpp
+++ b/src/targets/gpu/device/gather.cpp
+#include <migraphx/shape.hpp>
+#include <migraphx/argument.hpp>
+#include <migraphx/gpu/device/gather.hpp>
+#include <migraphx/gpu/device/tensor.hpp>
+#include <migraphx/gpu/device/launch.hpp>
+#include <migraphx/gpu/device/types.hpp>
+#include <migraphx/gpu/hip.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+namespace device {
+
+argument gather(hipStream_t stream,
+                const migraphx::shape& output_shape,
+                std::vector<migraphx::argument> args,
+                int axis)
+{
+    int axis_index = (axis < 0) ? (axis + output_shape.lens().size()) : axis;
+    visit_all(args.back(), args[0])([&](auto output, auto input) {
+        std::size_t nelements = output_shape.elements();
+        args[1].visit([&](auto indices) {
+            visit_tensor_size(output_shape.lens().size(), [&](auto ndim) {
+                const auto* indices_ptr = device_cast(indices.data());
+                auto* outptr            = device_cast(output.data());
+                const auto* inptr       = device_cast(input.data());
+                hip_tensor_descriptor<ndim> desc_input(input.get_shape());
+                hip_tensor_descriptor<ndim> desc_output(output.get_shape());
+                gs_launch(stream, nelements)([=](auto i) {
+                    auto lens        = desc_output.multi(i);
+                    lens[axis_index] = indices_ptr[lens[axis_index]];
+                    outptr[i]        = inptr[desc_input.linear(lens)];
+                });
+            });
+        });
+    });
+
+    return args.back();
+}
+
+} // namespace device
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/targets/gpu/device/include/migraphx/gpu/device/launch.hpp
+++ b/src/targets/gpu/device/include/migraphx/gpu/device/launch.hpp
-#ifndef MIGRAPH_GUARD_RTGLIB_DEVICE_LAUNCH_HPP
-#define MIGRAPH_GUARD_RTGLIB_DEVICE_LAUNCH_HPP
+#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_LAUNCH_HPP
+#define MIGRAPHX_GUARD_RTGLIB_DEVICE_LAUNCH_HPP

 #include <hip/hip_runtime.h>
 #include <migraphx/config.hpp>

 namespace migraphx {
-inline namespace MIGRAPH_INLINE_NS {
+inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {
 namespace device {

@@ -53,14 +53,14 @@ inline auto gs_launch(hipStream_t stream, std::size_t n, std::size_t local = 102
 // Workaround hcc's broken tile_static macro
 #ifdef tile_static
 #undef tile_static
-#define MIGRAPH_DEVICE_SHARED __attribute__((tile_static))
+#define MIGRAPHX_DEVICE_SHARED __attribute__((tile_static))
 #else
-#define MIGRAPH_DEVICE_SHARED __shared__
+#define MIGRAPHX_DEVICE_SHARED __shared__
 #endif

 } // namespace device
 } // namespace gpu
-} // namespace MIGRAPH_INLINE_NS
+} // namespace MIGRAPHX_INLINE_NS
 } // namespace migraphx

 #endif
--- a/src/targets/gpu/device/include/migraphx/gpu/device/nary.hpp
+++ b/src/targets/gpu/device/include/migraphx/gpu/device/nary.hpp
-#ifndef MIGRAPH_GUARD_RTGLIB_DEVICE_NARY_HPP
-#define MIGRAPH_GUARD_RTGLIB_DEVICE_NARY_HPP
+#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_NARY_HPP
+#define MIGRAPHX_GUARD_RTGLIB_DEVICE_NARY_HPP

 #include <migraphx/gpu/device/tensor.hpp>
 #include <migraphx/gpu/device/launch.hpp>
@@ -9,7 +9,7 @@
 #include <migraphx/config.hpp>

 namespace migraphx {
-inline namespace MIGRAPH_INLINE_NS {
+inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {
 namespace device {

@@ -87,7 +87,7 @@ void trinary_broadcast_vec_impl(hipStream_t stream,
        const std::size_t bdim_vec_len = bdim_len / vec_size;

        launch(stream, nglobal, nlocal)([=](auto idx) __device__ {
-            MIGRAPH_DEVICE_SHARED vec4<type> buffer[2048 / vec_size];
+            MIGRAPHX_DEVICE_SHARED vec4<type> buffer[2048 / vec_size];
            // Load bias into LDS
            for(size_t i = idx.local; i < bdim_vec_len; i += nlocal)
            {
@@ -144,7 +144,7 @@ void trinary_broadcast_impl(hipStream_t stream,
        const std::size_t n       = output.size();

        launch(stream, nglobal, nlocal)([=](auto idx) __device__ {
-            MIGRAPH_DEVICE_SHARED type buffer[2048];
+            MIGRAPHX_DEVICE_SHARED type buffer[2048];
            // Load bias into LDS
            for(size_t i = idx.local; i < bdim_len; i += nlocal)
            {
@@ -192,7 +192,7 @@ void binary_broadcast_vec_impl(
        const std::size_t bdim_vec_len = bdim_len / vec_size;

        launch(stream, nglobal, nlocal)([=](auto idx) __device__ {
-            MIGRAPH_DEVICE_SHARED vec4<type> buffer[2048 / vec_size];
+            MIGRAPHX_DEVICE_SHARED vec4<type> buffer[2048 / vec_size];
            // Load bias into LDS
            for(size_t i = idx.local; i < bdim_vec_len; i += nlocal)
            {
@@ -243,7 +243,7 @@ void binary_broadcast_impl(
        const std::size_t n       = output.size();

        launch(stream, nglobal, nlocal)([=](auto idx) __device__ {
-            MIGRAPH_DEVICE_SHARED type buffer[2048];
+            MIGRAPHX_DEVICE_SHARED type buffer[2048];
            // Load bias into LDS
            for(size_t i = idx.local; i < bdim_len; i += nlocal)
            {
@@ -313,6 +313,12 @@ void nary_impl(hipStream_t stream, F f, argument result, Arguments... args)
        nary_nonstandard_impl(stream, f, result, args...);
 }

+template <class F>
+void nary_impl(hipStream_t stream, F f, argument result)
+{
+    nary_standard_impl(stream, f, result);
+}
+
 template <class... Arguments>
 auto nary_nonstandard(hipStream_t stream, argument result, Arguments... args)
 {
@@ -396,7 +402,7 @@ inline auto nary(hipStream_t stream,

 } // namespace device
 } // namespace gpu
-} // namespace MIGRAPH_INLINE_NS
+} // namespace MIGRAPHX_INLINE_NS
 } // namespace migraphx

 #endif
--- a/src/targets/gpu/device/include/migraphx/gpu/device/tensor.hpp
+++ b/src/targets/gpu/device/include/migraphx/gpu/device/tensor.hpp
-#ifndef MIGRAPH_GUARD_RTGLIB_DEAVICE_TENSOR_HPP
-#define MIGRAPH_GUARD_RTGLIB_DEAVICE_TENSOR_HPP
+#ifndef MIGRAPHX_GUARD_RTGLIB_DEAVICE_TENSOR_HPP
+#define MIGRAPHX_GUARD_RTGLIB_DEAVICE_TENSOR_HPP

 #include <hip/hip_runtime.h>
 #include <migraphx/functional.hpp>
 #include <migraphx/config.hpp>

 namespace migraphx {
-inline namespace MIGRAPH_INLINE_NS {
+inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {
 namespace device {

@@ -87,7 +87,7 @@ struct hip_tensor_descriptor

 } // namespace device
 } // namespace gpu
-} // namespace MIGRAPH_INLINE_NS
+} // namespace MIGRAPHX_INLINE_NS
 } // namespace migraphx

 #endif
--- a/src/targets/gpu/device/include/migraphx/gpu/device/types.hpp
+++ b/src/targets/gpu/device/include/migraphx/gpu/device/types.hpp
@@ -5,14 +5,14 @@
    file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 ==============================================================================*/

-#ifndef MIGRAPH_GUARD_RTGLIB_GPU_DEVICE_TYPES_HPP
-#define MIGRAPH_GUARD_RTGLIB_GPU_DEVICE_TYPES_HPP
+#ifndef MIGRAPHX_GUARD_RTGLIB_GPU_DEVICE_TYPES_HPP
+#define MIGRAPHX_GUARD_RTGLIB_GPU_DEVICE_TYPES_HPP

 #include <migraphx/half.hpp>
 #include <migraphx/config.hpp>

 namespace migraphx {
-inline namespace MIGRAPH_INLINE_NS {
+inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {
 namespace device {

@@ -86,7 +86,7 @@ inline float to_hip_type(gpu_half x) { return x; }

 } // namespace device
 } // namespace gpu
-} // namespace MIGRAPH_INLINE_NS
+} // namespace MIGRAPHX_INLINE_NS
 } // namespace migraphx

 #endif
--- a/src/targets/gpu/device/log.cpp
+++ b/src/targets/gpu/device/log.cpp
+#include <migraphx/gpu/device/log.hpp>
+#include <migraphx/gpu/device/nary.hpp>
+#include <migraphx/gpu/device/types.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+namespace device {
+
+void log(hipStream_t stream, const argument& result, const argument& arg)
+{
+    nary(stream, result, arg)([](auto x) { return ::log(to_hip_type(x)); });
+}
+
+} // namespace device
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/targets/gpu/device/max.cpp
+++ b/src/targets/gpu/device/max.cpp
+#include <migraphx/gpu/device/max.hpp>
+#include <migraphx/gpu/device/nary.hpp>
+#include <migraphx/gpu/device/types.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+namespace device {
+
+void max(hipStream_t stream, const argument& result, const argument& arg1, const argument& arg2)
+{
+    nary(stream, result, arg1, arg2)(
+        [](auto x, auto y) { return std::max(to_hip_type(x), to_hip_type(y)); });
+}
+
+} // namespace device
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/targets/gpu/device/min.cpp
+++ b/src/targets/gpu/device/min.cpp
+#include <migraphx/gpu/device/min.hpp>
+#include <migraphx/gpu/device/nary.hpp>
+#include <migraphx/gpu/device/types.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+namespace device {
+
+void min(hipStream_t stream, const argument& result, const argument& arg1, const argument& arg2)
+{
+    nary(stream, result, arg1, arg2)(
+        [](auto x, auto y) { return std::min(to_hip_type(x), to_hip_type(y)); });
+}
+
+} // namespace device
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/targets/gpu/device/mul.cpp
+++ b/src/targets/gpu/device/mul.cpp
@@ -2,7 +2,7 @@
 #include <migraphx/gpu/device/nary.hpp>

 namespace migraphx {
-inline namespace MIGRAPH_INLINE_NS {
+inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {
 namespace device {

@@ -22,5 +22,5 @@ void mul(hipStream_t stream,

 } // namespace device
 } // namespace gpu
-} // namespace MIGRAPH_INLINE_NS
+} // namespace MIGRAPHX_INLINE_NS
 } // namespace migraphx
--- a/src/targets/gpu/device/pad.cpp
+++ b/src/targets/gpu/device/pad.cpp
+#include <migraphx/shape.hpp>
+#include <migraphx/argument.hpp>
+#include <migraphx/gpu/device/nary.hpp>
+#include <migraphx/gpu/device/pad.hpp>
+#include <migraphx/gpu/device/tensor.hpp>
+#include <migraphx/gpu/device/launch.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+namespace device {
+
+argument
+pad(hipStream_t stream, argument result, argument arg1, float value, std::vector<std::int64_t> pads)
+{
+    std::size_t nelements = arg1.get_shape().elements();
+
+    nary(stream, result)([=] { return value; });
+    visit_all(result, arg1)([&](auto output, auto input) {
+        visit_tensor_size(result.get_shape().lens().size(), [&](auto ndim) {
+            std::size_t offsets[ndim];
+            std::copy(pads.begin(), pads.begin() + ndim, offsets);
+            auto* outptr      = output.data();
+            const auto* inptr = input.data();
+            hip_tensor_descriptor<ndim> desc_input(input.get_shape());
+            hip_tensor_descriptor<ndim> desc_output(output.get_shape());
+            gs_launch(stream, nelements)([=](auto i) {
+                auto idx = desc_input.multi(i);
+                for(std::size_t j = 0; j < ndim; j++)
+                {
+                    idx[j] += offsets[j];
+                }
+                outptr[desc_output.linear(idx)] = inptr[i];
+            });
+        });
+    });
+    return result;
+}
+
+} // namespace device
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/targets/gpu/device/sin.cpp
+++ b/src/targets/gpu/device/sin.cpp
@@ -3,7 +3,7 @@
 #include <migraphx/gpu/device/types.hpp>

 namespace migraphx {
-inline namespace MIGRAPH_INLINE_NS {
+inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {
 namespace device {

@@ -14,5 +14,5 @@ void sin(hipStream_t stream, const argument& result, const argument& arg)

 } // namespace device
 } // namespace gpu
-} // namespace MIGRAPH_INLINE_NS
+} // namespace MIGRAPHX_INLINE_NS
 } // namespace migraphx
--- a/src/targets/gpu/device/sinh.cpp
+++ b/src/targets/gpu/device/sinh.cpp
+#include <migraphx/gpu/device/sinh.hpp>
+#include <migraphx/gpu/device/nary.hpp>
+#include <migraphx/gpu/device/types.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+namespace device {
+
+void sinh(hipStream_t stream, const argument& result, const argument& arg)
+{
+    nary(stream, result, arg)([](auto x) { return ::sinh(to_hip_type(x)); });
+}
+
+} // namespace device
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/targets/gpu/device/sub.cpp
+++ b/src/targets/gpu/device/sub.cpp
+#include <migraphx/gpu/device/sub.hpp>
+#include <migraphx/gpu/device/nary.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+namespace device {
+
+void sub(hipStream_t stream, const argument& result, const argument& arg1, const argument& arg2)
+{
+    nary(stream, result, arg1, arg2)([](auto x, auto y) { return y - x; });
+}
+
+} // namespace device
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/targets/gpu/device/tan.cpp
+++ b/src/targets/gpu/device/tan.cpp
+#include <migraphx/gpu/device/tan.hpp>
+#include <migraphx/gpu/device/nary.hpp>
+#include <migraphx/gpu/device/types.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+namespace device {
+
+void tan(hipStream_t stream, const argument& result, const argument& arg)
+{
+    nary(stream, result, arg)([](auto x) { return ::tan(to_hip_type(x)); });
+}
+
+} // namespace device
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/targets/gpu/eliminate_workspace.cpp
+++ b/src/targets/gpu/eliminate_workspace.cpp
@@ -9,14 +9,11 @@
 #include <migraphx/pass_config.hpp>

 namespace migraphx {
-inline namespace MIGRAPH_INLINE_NS {
+inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {

 void eliminate_workspace::apply(program& p) const
 {
-    if(!enabled(MIGRAPH_DISABLE_MEMORY_COLORING{}))
-        return;
-
    std::size_t n = 0;
    std::vector<instruction_ref> allocs;
    for(auto ins : iterator_for(p))
@@ -32,14 +29,17 @@ void eliminate_workspace::apply(program& p) const
            allocs.push_back(ins);
        }
    }
-    auto ws = p.add_parameter("workspace", shape{shape::int8_type, {n}});
-    for(auto&& a : allocs)
+    if(n > 0)
    {
-        p.replace_instruction(a, ws);
-        p.remove_instruction(a);
+        auto ws = p.add_parameter("workspace", shape{shape::int8_type, {n}});
+        for(auto&& a : allocs)
+        {
+            p.replace_instruction(a, ws);
+            p.remove_instruction(a);
+        }
    }
 }

 } // namespace gpu
-} // namespace MIGRAPH_INLINE_NS
+} // namespace MIGRAPHX_INLINE_NS
 } // namespace migraphx
--- a/src/targets/gpu/elu.cpp
+++ b/src/targets/gpu/elu.cpp
+#include <migraphx/gpu/elu.hpp>
+#include <migraphx/operators.hpp>
+#include <migraphx/manage_ptr.hpp>
+#include <migraphx/gpu/miopen.hpp>
+#include <utility>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+
+shape miopen_elu::compute_shape(const std::vector<shape>& inputs) const
+{
+    check_shapes{inputs, *this}.has(2).not_broadcasted();
+    return inputs.at(1);
+}
+
+argument miopen_elu::compute(context& ctx,
+                             const shape& output_shape,
+                             const std::vector<argument>& args) const
+{
+    float alpha = 1;
+    float beta  = 0;
+    auto x_desc = make_tensor(args[0].get_shape());
+    auto y_desc = make_tensor(output_shape);
+    miopenActivationForward(ctx.get_stream().get_miopen(),
+                            ad.get(),
+                            &alpha,
+                            x_desc.get(),
+                            args[0].implicit(),
+                            &beta,
+                            y_desc.get(),
+                            args[1].implicit());
+
+    return args[1];
+}
+
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx