Merge branch 'opt_log_softmax_new_device_code' into argmax_min

3855c6af · Shucai Xiao · b222af2f · 93eae2df · 3855c6af · 3855c6af
Commit 3855c6af authored Jun 25, 2019 by Shucai Xiao
7 changed files
--- a/src/targets/gpu/include/migraphx/gpu/device/gather.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/device/gather.hpp
@@ -10,10 +10,7 @@ inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {
 namespace device {
-argument gather(hipStream_t stream,
+argument gather(hipStream_t stream, argument result, argument arg1, argument arg2, int axis);
-                const migraphx::shape& output_shape,
-                std::vector<migraphx::argument> args,
-                int axis);
 } // namespace device
 } // namespace gpu

--- a/src/targets/gpu/include/migraphx/gpu/device/logsoftmax.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/device/logsoftmax.hpp
@@ -10,10 +10,7 @@ inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {
 namespace device {
-argument logsoftmax(hipStream_t stream,
+void logsoftmax(hipStream_t stream, argument result, argument arg, int axis);
-                    const migraphx::shape& output_shape,
-                    std::vector<migraphx::argument> args,
-                    int axis);
 } // namespace device
 } // namespace gpu

--- a/src/targets/gpu/include/migraphx/gpu/device/reduce_opers.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/device/reduce_opers.hpp
+#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_REDUCE_OPERS_HPP
+#define MIGRAPHX_GUARD_RTGLIB_DEVICE_REDUCE_OPERS_HPP
+#include <migraphx/gpu/hip.hpp>
+#include <migraphx/gpu/device/types.hpp>
+#include <migraphx/gpu/device/nary.hpp>
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+namespace device {
+template <class T>
+inline __device__ void reduce_max(T* data_ptr, size_t block_size, size_t thr_idx, size_t item_num)
+{
+    while(true)
+    {
+        auto stride = (item_num + 1) / 2;
+        auto size   = item_num / 2;
+        for(size_t i = thr_idx; i < size; i += block_size)
+        {
+            data_ptr[i] = ::max(to_hip_type(data_ptr[i]), to_hip_type(data_ptr[i + stride]));
+        }
+        __syncthreads();
+        item_num = stride;
+        if(item_num == 1)
+            break;
+    }
+    if(thr_idx == 0)
+    {
+        data_ptr[block_size] =
+            (data_ptr[0] < data_ptr[block_size]) ? data_ptr[block_size] : data_ptr[0];
+    }
+    __syncthreads();
+}
+template <class T>
+inline __device__ void reduce_sum(T* data_ptr, size_t block_size, size_t thr_idx, size_t item_num)
+{
+    while(true)
+    {
+        auto stride = (item_num + 1) / 2;
+        auto size   = item_num / 2;
+        for(size_t i = thr_idx; i < size; i += block_size)
+        {
+            data_ptr[i] += data_ptr[i + stride];
+        }
+        __syncthreads();
+        item_num = stride;
+        if(item_num == 1)
+            break;
+    }
+    if(thr_idx == 0)
+    {
+        data_ptr[block_size] += data_ptr[0];
+    }
+    __syncthreads();
+}
+} // namespace device
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+#endif
--- a/src/targets/gpu/include/migraphx/gpu/device/softmax.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/device/softmax.hpp
@@ -10,10 +10,7 @@ inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {
 namespace device {
-argument softmax(hipStream_t stream,
+void softmax(hipStream_t stream, argument result, argument arg, int axis);
-                 const migraphx::shape& output_shape,
-                 std::vector<migraphx::argument> args,
-                 int axis);
 } // namespace device
 } // namespace gpu

--- a/src/targets/gpu/logsoftmax.cpp
+++ b/src/targets/gpu/logsoftmax.cpp
@@ -15,11 +15,11 @@ shape hip_logsoftmax::compute_shape(const std::vector<shape>& inputs) const
    return op.compute_shape({inputs.at(0)});
 }
-argument hip_logsoftmax::compute(context& ctx,
+argument
-                                 const shape& output_shape,
+hip_logsoftmax::compute(context& ctx, const shape&, const std::vector<argument>& args) const
-                                 const std::vector<argument>& args) const
 {
-    return device::logsoftmax(ctx.get_stream().get(), output_shape, args, op.axis);
+    device::logsoftmax(ctx.get_stream().get(), args.back(), args.front(), op.axis);
+    return args.back();
 }
 } // namespace gpu

--- a/src/targets/gpu/softmax.cpp
+++ b/src/targets/gpu/softmax.cpp
@@ -37,11 +37,10 @@ shape hip_softmax::compute_shape(const std::vector<shape>& inputs) const
    return op.compute_shape({inputs.at(0)});
 }
-argument hip_softmax::compute(context& ctx,
+argument hip_softmax::compute(context& ctx, const shape&, const std::vector<argument>& args) const
-                              const shape& output_shape,
-                              const std::vector<argument>& args) const
 {
-    return device::softmax(ctx.get_stream().get(), output_shape, args, op.axis);
+    device::softmax(ctx.get_stream().get(), args.back(), args.front(), op.axis);
+    return args.back();
 }
 } // namespace gpu

--- a/test/gpu/miopen.cpp
+++ b/test/gpu/miopen.cpp
@@ -598,7 +598,7 @@ struct test_softmax : verify_program<test_softmax<Axis>>
    migraphx::program create_program() const
    {
        migraphx::program p;
-        migraphx::shape s{migraphx::shape::float_type, {3, 4, 5, 6}};
+        migraphx::shape s{migraphx::shape::float_type, {2080, 4, 1026, 6}};
        auto param = p.add_parameter("0", s);
        p.add_instruction(migraphx::op::softmax{Axis}, param);
@@ -3350,7 +3350,7 @@ struct test_logsoftmax : verify_program<test_logsoftmax<Axis>>
    migraphx::program create_program() const
    {
        migraphx::program p;
-        migraphx::shape s{migraphx::shape::float_type, {3, 4, 5, 6}};
+        migraphx::shape s{migraphx::shape::float_type, {1025, 4, 1025, 6}};
        auto param = p.add_parameter("0", s);
        p.add_instruction(migraphx::op::logsoftmax{Axis}, param);