fix cppcheck error.

633199a1 · Shucai Xiao · 65702b8a · 633199a1 · 633199a1
Commit 633199a1 authored Jun 25, 2019 by Shucai Xiao
Showing with 6 additions and 6 deletions

src/targets/gpu/device/logsoftmax.cpp src/targets/gpu/device/logsoftmax.cpp +4 -4

src/targets/gpu/include/migraphx/gpu/device/reduce_opers.hpp src/targets/gpu/include/migraphx/gpu/device/reduce_opers.hpp +2 -2

No files found.
--- a/src/targets/gpu/device/logsoftmax.cpp
+++ b/src/targets/gpu/device/logsoftmax.cpp
@@ -42,14 +42,14 @@ void logsoftmax(hipStream_t stream, const argument& result, const argument& arg,
                size_t blk_idx = idx.group;
                using type = device_type<std::remove_cv_t<typename decltype(output)::value_type>>;
-                MIGRAPHX_DEVICE_SHARED type lds_data[max_block_size + 2];
+                MIGRAPHX_DEVICE_SHARED type lds_data[max_block_size + 1];
                auto batch_idx = desc_batch.multi(blk_idx);
                auto data_idx  = batch_idx;
                // load data to lds and compute the batch max
                size_t remaining_item_num = batch_item_num;
-                size_t thread_num    = (batch_item_num + block_size - 1) / block_size * block_size;
+                size_t round_item_num    = (batch_item_num + block_size - 1) / block_size * block_size;
                lds_data[block_size] = input_ptr[0];
-                for(size_t i = thr_idx; i < thread_num; i += block_size)
+                for(size_t i = thr_idx; i < round_item_num; i += block_size)
                {
                    if(i < batch_item_num)
                    {
@@ -70,7 +70,7 @@ void logsoftmax(hipStream_t stream, const argument& result, const argument& arg,
                lds_data[block_size] = 0;
                remaining_item_num   = batch_item_num;
-                for(size_t i = thr_idx; i < thread_num; i += block_size)
+                for(size_t i = thr_idx; i < round_item_num; i += block_size)
                {
                    if(i < batch_item_num)
                    {

--- a/src/targets/gpu/include/migraphx/gpu/device/reduce_opers.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/device/reduce_opers.hpp
@@ -11,7 +11,7 @@ namespace gpu {
 namespace device {
 template <class T>
-__device__ void reduce_max(T* data_ptr, size_t block_size, size_t thr_idx, size_t item_num)
+inline __device__ void reduce_max(T* data_ptr, size_t block_size, size_t thr_idx, size_t item_num)
 {
    auto stride = (item_num + 1) / 2;
    while(true)
@@ -39,7 +39,7 @@ __device__ void reduce_max(T* data_ptr, size_t block_size, size_t thr_idx, size_
 }
 template <class T>
-__device__ void reduce_sum(T* data_ptr, size_t block_size, size_t thr_idx, size_t item_num)
+inline __device__ void reduce_sum(T* data_ptr, size_t block_size, size_t thr_idx, size_t item_num)
 {
    auto stride = (item_num + 1) / 2;
    while(true)