Merge branch 'develop' into doc2

3a848f0d · Paul · 64e8e30a · d1e945da · 3a848f0d · 3a848f0d
Commit 3a848f0d authored Mar 19, 2020 by Paul
20 changed files
--- a/src/targets/gpu/device/sign.cpp
+++ b/src/targets/gpu/device/sign.cpp
@@ -9,7 +9,7 @@ namespace device {

 void sign(hipStream_t stream, const argument& result, const argument& arg)
 {
-    nary(stream, result, arg)([](auto x) { return (x > 0 ? 1 : ((x < 0) ? -1 : 0)); });
+    nary(stream, result, arg)([](auto x) __device__ { return (x > 0 ? 1 : ((x < 0) ? -1 : 0)); });
 }

 } // namespace device

--- a/src/targets/gpu/device/sin.cpp
+++ b/src/targets/gpu/device/sin.cpp
@@ -9,7 +9,7 @@ namespace device {

 void sin(hipStream_t stream, const argument& result, const argument& arg)
 {
-    nary(stream, result, arg)([](auto x) { return ::sin(to_hip_type(x)); });
+    nary(stream, result, arg)([](auto x) __device__ { return ::sin(to_hip_type(x)); });
 }

 } // namespace device

--- a/src/targets/gpu/device/sinh.cpp
+++ b/src/targets/gpu/device/sinh.cpp
@@ -9,7 +9,7 @@ namespace device {

 void sinh(hipStream_t stream, const argument& result, const argument& arg)
 {
-    nary(stream, result, arg)([](auto x) { return ::sinh(to_hip_type(x)); });
+    nary(stream, result, arg)([](auto x) __device__ { return ::sinh(to_hip_type(x)); });
 }

 } // namespace device

--- a/src/targets/gpu/device/softmax.cpp
+++ b/src/targets/gpu/device/softmax.cpp
@@ -12,11 +12,10 @@ inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {
 namespace device {

-void softmax(hipStream_t stream, const argument& result, const argument& arg, int axis)
+void softmax(hipStream_t stream, const argument& result, const argument& arg, int64_t axis)
 {
-    auto lens                = result.get_shape().lens();
-    auto batch_lens          = lens;
-    index_int batch_item_num = lens[axis];
+    auto batch_lens          = result.get_shape().lens();
+    index_int batch_item_num = batch_lens[axis];
    batch_lens[axis]         = 1;
    migraphx::shape batch_shape{result.get_shape().type(), batch_lens};

@@ -43,7 +42,7 @@ void softmax(hipStream_t stream, const argument& result, const argument& arg, in
                    return ::exp(to_hip_type(val));
                });

-            idx.local_stride(batch_item_num, [&](auto j) {
+            idx.local_stride(batch_item_num, [&](auto j) __device__ {
                data_idx[axis]   = j;
                auto val         = input[data_idx] - batch_max;
                output[data_idx] = ::exp(to_hip_type(val)) / batch_sum;

--- a/src/targets/gpu/device/sqdiff.cpp
+++ b/src/targets/gpu/device/sqdiff.cpp
@@ -8,7 +8,7 @@ namespace device {

 void sqdiff(hipStream_t stream, const argument& result, const argument& arg1, const argument& arg2)
 {
-    nary(stream, result, arg1, arg2)([](auto x, auto y) { return (x - y) * (x - y); });
+    nary(stream, result, arg1, arg2)([](auto x, auto y) __device__ { return (x - y) * (x - y); });
 }

 } // namespace device

--- a/src/targets/gpu/device/sqrt.cpp
+++ b/src/targets/gpu/device/sqrt.cpp
@@ -9,7 +9,7 @@ namespace device {

 void sqrt(hipStream_t stream, const argument& result, const argument& arg)
 {
-    nary(stream, result, arg)([](auto x) { return ::sqrt(to_hip_type(x)); });
+    nary(stream, result, arg)([](auto x) __device__ { return ::sqrt(to_hip_type(x)); });
 }

 } // namespace device

--- a/src/targets/gpu/device/sub.cpp
+++ b/src/targets/gpu/device/sub.cpp
@@ -8,7 +8,7 @@ namespace device {

 void sub(hipStream_t stream, const argument& result, const argument& arg1, const argument& arg2)
 {
-    nary(stream, result, arg1, arg2)([](auto x, auto y) { return x - y; });
+    nary(stream, result, arg1, arg2)([](auto x, auto y) __device__ { return x - y; });
 }

 } // namespace device

--- a/src/targets/gpu/device/tan.cpp
+++ b/src/targets/gpu/device/tan.cpp
@@ -9,7 +9,7 @@ namespace device {

 void tan(hipStream_t stream, const argument& result, const argument& arg)
 {
-    nary(stream, result, arg)([](auto x) { return ::tan(to_hip_type(x)); });
+    nary(stream, result, arg)([](auto x) __device__ { return ::tan(to_hip_type(x)); });
 }

 } // namespace device

--- a/src/targets/gpu/device/tanh.cpp
+++ b/src/targets/gpu/device/tanh.cpp
@@ -9,7 +9,7 @@ namespace device {

 void tanh(hipStream_t stream, const argument& result, const argument& arg)
 {
-    nary(stream, result, arg)([](auto x) { return ::tanh(to_hip_type(x)); });
+    nary(stream, result, arg)([](auto x) __device__ { return ::tanh(to_hip_type(x)); });
 }

 } // namespace device

--- a/src/targets/gpu/fuse_ops.cpp
+++ b/src/targets/gpu/fuse_ops.cpp
@@ -148,6 +148,12 @@ MIGRAPHX_PRED_MATCHER(fusable_conv, instruction_ref ins)
        return false;
    if(wei.lens()[1] > 512 and conv.algo != miopenConvolutionFwdAlgoWinograd)
        return false;
+
+    // Do not fuse non-symmetric input
+    auto input_lens = ins->inputs().at(0)->get_shape().lens();
+    if(input_lens[2] != input_lens[3] or wei.lens()[2] != wei.lens()[3])
+        return false;
+
    auto op = conv.op;
    // Dont fuse winograd for non-3x3s since there is no fused windograd for those configs
    if(conv.algo == miopenConvolutionFwdAlgoWinograd and wei.lens()[2] != 3 and

--- a/src/targets/gpu/include/migraphx/gpu/acosh.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/acosh.hpp
+#ifndef MIGRAPHX_GUARD_RTGLIB_ACOSH_HPP
+#define MIGRAPHX_GUARD_RTGLIB_ACOSH_HPP
+
+#include <migraphx/gpu/oper.hpp>
+#include <migraphx/gpu/device/acosh.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+
+struct hip_acosh : unary_device<hip_acosh, device::acosh>
+{
+};
+
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+
+#endif
--- a/src/targets/gpu/include/migraphx/gpu/asinh.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/asinh.hpp
+#ifndef MIGRAPHX_GUARD_RTGLIB_ASINH_HPP
+#define MIGRAPHX_GUARD_RTGLIB_ASINH_HPP
+
+#include <migraphx/gpu/oper.hpp>
+#include <migraphx/gpu/device/asinh.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+
+struct hip_asinh : unary_device<hip_asinh, device::asinh>
+{
+};
+
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+
+#endif
--- a/src/targets/gpu/include/migraphx/gpu/atanh.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/atanh.hpp
+#ifndef MIGRAPHX_GUARD_RTGLIB_ATANH_HPP
+#define MIGRAPHX_GUARD_RTGLIB_ATANH_HPP
+
+#include <migraphx/gpu/oper.hpp>
+#include <migraphx/gpu/device/atanh.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+
+struct hip_atanh : unary_device<hip_atanh, device::atanh>
+{
+};
+
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+
+#endif
--- a/src/targets/gpu/include/migraphx/gpu/deconvolution.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/deconvolution.hpp
+#ifndef MIGRAPHX_GUARD_RTGLIB_DECONVOLUTION_HPP
+#define MIGRAPHX_GUARD_RTGLIB_DECONVOLUTION_HPP
+
+#include <migraphx/shape.hpp>
+#include <migraphx/op/deconvolution.hpp>
+#include <migraphx/gpu/miopen.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+
+struct context;
+
+struct miopen_deconvolution
+{
+    op::deconvolution op;
+    shared<convolution_descriptor> cd;
+    miopenConvFwdAlgorithm_t algo{};
+    miopenHandle_t handle = nullptr;
+
+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        // TODO: Add algo
+        return op::convolution::reflect(self.op, f);
+    }
+
+    std::string name() const { return "gpu::deconv"; }
+    shape compute_shape(const std::vector<shape>& inputs) const;
+    argument
+    compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
+    shape compile(context& ctx, const shape& output_shape, std::vector<shape> inputs);
+    void finalize(context& ctx, const shape& output_shape, std::vector<shape> inputs);
+    std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
+    {
+        return shapes.size() - 1;
+    }
+};
+
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+
+#endif
--- a/src/targets/gpu/include/migraphx/gpu/device/acosh.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/device/acosh.hpp
+#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_ACOSH_HPP
+#define MIGRAPHX_GUARD_RTGLIB_DEVICE_ACOSH_HPP
+
+#include <migraphx/argument.hpp>
+#include <migraphx/config.hpp>
+#include <hip/hip_runtime_api.h>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+namespace device {
+
+void acosh(hipStream_t stream, const argument& result, const argument& arg);
+
+} // namespace device
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+
+#endif
--- a/src/targets/gpu/include/migraphx/gpu/device/arg_op.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/device/arg_op.hpp
@@ -72,9 +72,8 @@ template <class Op>
 void arg_op(Op op, hipStream_t stream, const argument& result, const argument& arg, int64_t axis)
 {
    auto arg_shape        = arg.get_shape();
-    auto lens             = arg_shape.lens();
-    auto batch_lens       = lens;
-    size_t batch_item_num = lens[axis];
+    auto batch_lens       = arg_shape.lens();
+    size_t batch_item_num = batch_lens[axis];
    batch_lens[axis]      = 1;
    migraphx::shape batch_shape{arg_shape.type(), batch_lens};


--- a/src/targets/gpu/include/migraphx/gpu/device/asinh.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/device/asinh.hpp
+#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_ASINH_HPP
+#define MIGRAPHX_GUARD_RTGLIB_DEVICE_ASINH_HPP
+
+#include <migraphx/argument.hpp>
+#include <migraphx/config.hpp>
+#include <hip/hip_runtime_api.h>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+namespace device {
+
+void asinh(hipStream_t stream, const argument& result, const argument& arg);
+
+} // namespace device
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+
+#endif
--- a/src/targets/gpu/include/migraphx/gpu/device/atanh.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/device/atanh.hpp
+#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_ATANH_HPP
+#define MIGRAPHX_GUARD_RTGLIB_DEVICE_ATANH_HPP
+
+#include <migraphx/argument.hpp>
+#include <migraphx/config.hpp>
+#include <hip/hip_runtime_api.h>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+namespace device {
+
+void atanh(hipStream_t stream, const argument& result, const argument& arg);
+
+} // namespace device
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+
+#endif
--- a/src/targets/gpu/include/migraphx/gpu/device/logsoftmax.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/device/logsoftmax.hpp
@@ -10,7 +10,7 @@ inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {
 namespace device {

-void logsoftmax(hipStream_t stream, const argument& result, const argument& arg, int axis);
+void logsoftmax(hipStream_t stream, const argument& result, const argument& arg, int64_t axis);

 } // namespace device
 } // namespace gpu

--- a/src/targets/gpu/include/migraphx/gpu/device/prelu.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/device/prelu.hpp
+
+#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_PRELU_HPP
+#define MIGRAPHX_GUARD_RTGLIB_DEVICE_PRELU_HPP
+
+#include <migraphx/argument.hpp>
+#include <migraphx/config.hpp>
+#include <hip/hip_runtime_api.h>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+namespace device {
+
+void prelu(hipStream_t stream, const argument& result, const argument& arg1, const argument& arg2);
+
+} // namespace device
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+
+#endif