fix bugs so unit tests can pass

2e7398f3 · Shucai Xiao · c9c08eb0 · 2e7398f3 · 2e7398f3 · 2e7398f3
Commit 2e7398f3 authored Jun 27, 2019 by Shucai Xiao
4 changed files
--- a/src/targets/gpu/device/argmax.cpp
+++ b/src/targets/gpu/device/argmax.cpp
@@ -5,6 +5,7 @@
 #include <migraphx/gpu/device/tensor.hpp>
 #include <migraphx/gpu/device/launch.hpp>
 #include <migraphx/gpu/device/types.hpp>
+#include <migraphx/gpu/device/reduce_opers.hpp>
 #include <migraphx/gpu/hip.hpp>
 namespace migraphx {
@@ -12,45 +13,6 @@ inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {
 namespace device {
-template <class T>
-inline __device__ void reduce_argmax(T* data_ptr,
-                                     int64_t* index_ptr,
-                                     std::size_t block_size,
-                                     std::size_t thr_idx,
-                                     std::size_t item_num,
-                                     std::size_t max_index)
-{
-    while(true)
-    {
-        auto stride = (item_num + 1) / 2;
-        auto size   = item_num / 2;
-        for(std::size_t i = thr_idx; i < size; i += block_size)
-        {
-            if(data_ptr[i] < data_ptr[i + stride])
-            {
-                data_ptr[i]  = data_ptr[i + stride];
-                index_ptr[i] = index_ptr[i + stride];
-            }
-        }
-        __syncthreads();
-        item_num = stride;
-        if(item_num == 1)
-            break;
-    }
-    if(thr_idx == 0)
-    {
-        if(data_ptr[max_index] < data_ptr[0])
-        {
-            data_ptr[max_index]  = data_ptr[0];
-            index_ptr[max_index] = index_ptr[0];
-        }
-    }
-    __syncthreads();
-}
 void argmax(hipStream_t stream, const argument& result, const argument& arg, int axis)
 {
    auto arg_shape        = arg.get_shape();
@@ -96,7 +58,8 @@ void argmax(hipStream_t stream, const argument& result, const argument& arg, int
                __syncthreads();
                auto item_num = (remaining_item_num > block_size) ? block_size : remaining_item_num;
-                reduce_argmax(lds_data, lds_index, block_size, thr_idx, item_num, max_block_size);
+                block_reduce_pair<type, pair_max_op<type, int64_t>>(lds_data, lds_index, pair_max_op<type, int64_t>{}, 
+                    block_size, thr_idx, item_num, max_block_size);
                remaining_item_num -= block_size;
            }

--- a/src/targets/gpu/device/argmin.cpp
+++ b/src/targets/gpu/device/argmin.cpp
@@ -5,6 +5,7 @@
 #include <migraphx/gpu/device/tensor.hpp>
 #include <migraphx/gpu/device/launch.hpp>
 #include <migraphx/gpu/device/types.hpp>
+#include <migraphx/gpu/device/reduce_opers.hpp>
 #include <migraphx/gpu/hip.hpp>
 namespace migraphx {
@@ -12,45 +13,6 @@ inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {
 namespace device {
-template <class T>
-inline __device__ void reduce_argmin(T* data_ptr,
-                                     int64_t* index_ptr,
-                                     std::size_t block_size,
-                                     std::size_t thr_idx,
-                                     std::size_t item_num,
-                                     std::size_t min_index)
-{
-    while(true)
-    {
-        auto stride = (item_num + 1) / 2;
-        auto size   = item_num / 2;
-        for(std::size_t i = thr_idx; i < size; i += block_size)
-        {
-            if(data_ptr[i] > data_ptr[i + stride])
-            {
-                data_ptr[i]  = data_ptr[i + stride];
-                index_ptr[i] = index_ptr[i + stride];
-            }
-        }
-        __syncthreads();
-        item_num = stride;
-        if(item_num == 1)
-            break;
-    }
-    if(thr_idx == 0)
-    {
-        if(data_ptr[min_index] > data_ptr[0])
-        {
-            data_ptr[min_index]  = data_ptr[0];
-            index_ptr[min_index] = index_ptr[0];
-        }
-    }
-    __syncthreads();
-}
 void argmin(hipStream_t stream, const argument& result, const argument& arg, int axis)
 {
    auto arg_shape        = arg.get_shape();
@@ -96,7 +58,8 @@ void argmin(hipStream_t stream, const argument& result, const argument& arg, int
                __syncthreads();
                auto item_num = (remaining_item_num > block_size) ? block_size : remaining_item_num;
-                reduce_argmin(lds_data, lds_index, block_size, thr_idx, item_num, max_block_size);
+                block_reduce_pair<type, pair_min_op<type, int64_t>>(lds_data, lds_index, pair_min_op<type, int64_t>{}, 
+                    block_size, thr_idx, item_num, max_block_size);
                remaining_item_num -= block_size;
            }

--- a/src/targets/gpu/include/migraphx/gpu/device/reduce_opers.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/device/reduce_opers.hpp
@@ -64,14 +64,25 @@ template <class T, class F>
 struct pair_max_op
 {
    using type = std::pair<T, F>;
-    type operator()(type x, type y) const { return (x.first > y.first) ? x : y; }
+    // This implementation is to ensure when multiple values
+    // are of max, the min index is returned
+    type operator()(type x, type y) const { 
+        if (x.first > y.first)
+            return x;
+        else if (x.first < y.first)
+            return y;
+        else
+        {
+            return (x.second < y.second) ? x : y;
+        }
+    }
 };
 template <class T, class F>
 struct pair_min_op
 {
    using type = std::pair<T, F>;
-    type operator()(type x, type y) const { return (x.first < y.first) ? x : y; }
+    type operator()(type x, type y) const { return (x < y) ? x : y; }
 };
 template <class T, class Op>

--- a/test/gpu/miopen.cpp
+++ b/test/gpu/miopen.cpp
@@ -598,7 +598,7 @@ struct test_softmax : verify_program<test_softmax<Axis>>
    migraphx::program create_program() const
    {
        migraphx::program p;
-        migraphx::shape s{migraphx::shape::float_type, {2080, 4, 1026, 6}};
+        migraphx::shape s{migraphx::shape::float_type, {2, 4, 1026, 6}};
        auto param = p.add_parameter("0", s);
        p.add_instruction(migraphx::op::softmax{Axis}, param);
@@ -617,7 +617,7 @@ struct test_arg_ops : verify_program<test_arg_ops<T, Axis, KeepDims>>
    migraphx::program create_program() const
    {
        migraphx::program p;
-        migraphx::shape s{migraphx::shape::float_type, {2, 3, 4, 5}};
+        migraphx::shape s{migraphx::shape::float_type, {2, 3, 4, 1025}};
        auto param = p.add_parameter("data", s);
        p.add_instruction(T{Axis, KeepDims}, param);
@@ -3376,7 +3376,7 @@ struct test_logsoftmax : verify_program<test_logsoftmax<Axis>>
    migraphx::program create_program() const
    {
        migraphx::program p;
-        migraphx::shape s{migraphx::shape::float_type, {1025, 4, 1025, 6}};
+        migraphx::shape s{migraphx::shape::float_type, {17, 4, 1025, 6}};
        auto param = p.add_parameter("0", s);
        p.add_instruction(migraphx::op::logsoftmax{Axis}, param);