add std namespace for size_t

5a9bb616 · Shucai Xiao · f8fa90bd · 22500e6c · 5a9bb616 · 5a9bb616
Commit 5a9bb616 authored Jun 25, 2019 by Shucai Xiao
4 changed files
--- a/src/targets/cpu/lowering.cpp
+++ b/src/targets/cpu/lowering.cpp
@@ -533,7 +533,7 @@ struct cpu_softmax
    {
        argument result{output_shape};
        auto batch_lens     = output_shape.lens();
-        size_t n_dims       = batch_lens[op.axis];
+        std::size_t n_dims  = batch_lens[op.axis];
        batch_lens[op.axis] = 1;
        shape batch_shape{shape::int32_type, batch_lens};

@@ -544,26 +544,26 @@ struct cpu_softmax
            std::vector<value_type> batch_sum(batch_shape.elements(), value_type(0));
            par_for(batch_shape.elements(), [&](auto i) {
                auto idx = batch_shape.multi(i);
-                for(size_t j = 0; j < n_dims; ++j)
+                for(std::size_t j = 0; j < n_dims; ++j)
                {
                    idx[op.axis] = j;
                    batch_max[i] = std::max(batch_max[i], input(idx.begin(), idx.end()));
                }

-                for(size_t j = 0; j < n_dims; ++j)
+                for(std::size_t j = 0; j < n_dims; ++j)
                {
-                    idx[op.axis]  = j;
-                    size_t index  = output_shape.index(idx);
-                    output[index] = std::exp(input[index] - batch_max[i]);
+                    idx[op.axis]      = j;
+                    std::size_t index = output_shape.index(idx);
+                    output[index]     = std::exp(input[index] - batch_max[i]);
                }

-                for(size_t j = 0; j < n_dims; ++j)
+                for(std::size_t j = 0; j < n_dims; ++j)
                {
                    idx[op.axis] = j;
                    batch_sum[i] += output(idx.begin(), idx.end());
                }

-                for(size_t j = 0; j < n_dims; ++j)
+                for(std::size_t j = 0; j < n_dims; ++j)
                {
                    idx[op.axis] = j;
                    output(idx.begin(), idx.end()) /= batch_sum[i];
@@ -591,7 +591,7 @@ struct cpu_logsoftmax
    {
        argument result{output_shape};
        auto batch_lens     = output_shape.lens();
-        size_t n_dims       = batch_lens[op.axis];
+        std::size_t n_dims  = batch_lens[op.axis];
        batch_lens[op.axis] = 1;
        shape batch_shape{shape::int32_type, batch_lens};

@@ -605,20 +605,20 @@ struct cpu_logsoftmax

            par_for(batch_shape.elements(), [&](auto i) {
                auto idx = batch_shape.multi(i);
-                for(size_t j = 0; j < n_dims; ++j)
+                for(std::size_t j = 0; j < n_dims; ++j)
                {
                    idx[op.axis] = j;
                    batch_max[i] = std::max(batch_max[i], input(idx.begin(), idx.end()));
                }

-                for(size_t j = 0; j < n_dims; ++j)
+                for(std::size_t j = 0; j < n_dims; ++j)
                {
-                    idx[op.axis]  = j;
-                    size_t index  = output_shape.index(idx);
-                    output[index] = input[index] - batch_max[i];
+                    idx[op.axis]      = j;
+                    std::size_t index = output_shape.index(idx);
+                    output[index]     = input[index] - batch_max[i];
                }

-                for(size_t j = 0; j < n_dims; ++j)
+                for(std::size_t j = 0; j < n_dims; ++j)
                {
                    idx[op.axis] = j;
                    batch_sum[i] += std::exp(output(idx.begin(), idx.end()));
@@ -626,7 +626,7 @@ struct cpu_logsoftmax

                batch_sum[i] = std::log(batch_sum[i]);

-                for(size_t j = 0; j < n_dims; ++j)
+                for(std::size_t j = 0; j < n_dims; ++j)
                {
                    idx[op.axis] = j;
                    output(idx.begin(), idx.end()) -= batch_sum[i];
@@ -655,7 +655,7 @@ struct cpu_argmax
    {
        argument result{output_shape};
        auto batch_lens       = args.front().get_shape().lens();
-        size_t batch_item_num = batch_lens[op.axis];
+        std::size_t batch_item_num = batch_lens[op.axis];
        batch_lens[op.axis]   = 1;
        shape batch_shape{shape::int32_type, batch_lens};

@@ -665,7 +665,7 @@ struct cpu_argmax
                    auto data_idx     = batch_shape.multi(i);
                    auto max_val      = input[i];
                    int64_t max_index = 0;
-                    for(size_t j = 1; j < batch_item_num; ++j)
+                    for(std::size_t j = 1; j < batch_item_num; ++j)
                    {
                        data_idx[op.axis] = j;
                        if(max_val < input(data_idx.begin(), data_idx.end()))
@@ -701,7 +701,7 @@ struct cpu_argmin
    {
        argument result{output_shape};
        auto batch_lens       = args.front().get_shape().lens();
-        size_t batch_item_num = batch_lens[op.axis];
+        std::size_t batch_item_num = batch_lens[op.axis];
        batch_lens[op.axis]   = 1;
        shape batch_shape{shape::int32_type, batch_lens};

@@ -711,7 +711,7 @@ struct cpu_argmin
                    auto data_idx     = batch_shape.multi(i);
                    auto min_val      = input[i];
                    int64_t min_index = 0;
-                    for(size_t j = 1; j < batch_item_num; ++j)
+                    for(std::size_t j = 1; j < batch_item_num; ++j)
                    {
                        data_idx[op.axis] = j;
                        if(min_val > input(data_idx.begin(), data_idx.end()))

--- a/src/targets/gpu/device/logsoftmax.cpp
+++ b/src/targets/gpu/device/logsoftmax.cpp
@@ -23,26 +23,27 @@ void logsoftmax(hipStream_t stream, const argument& result, const argument& arg,

    hip_visit_all(result, arg, batch_shape)([&](auto output, auto input, auto batch) {
        // use one block for items in one batch.
-        const size_t max_block_size = 1024;
-        size_t block_size           = 1;
+        const std::size_t max_block_size = 1024;
+        std::size_t block_size           = 1;
        while(block_size < max_block_size and block_size < batch_item_num)
        {
            block_size *= 2;
        }

        launch(stream, batch_shape.elements() * block_size, block_size)([=](auto idx) __device__ {
-            size_t thr_idx = idx.local;
-            size_t blk_idx = idx.group;
-            using type     = device_type<std::remove_cv_t<typename decltype(output)::value_type>>;
+            std::size_t thr_idx = idx.local;
+            std::size_t blk_idx = idx.group;
+            using type = device_type<std::remove_cv_t<typename decltype(output)::value_type>>;

            MIGRAPHX_DEVICE_SHARED type lds_data[max_block_size + 1];
            auto batch_idx = batch.multi(blk_idx);
            auto data_idx  = batch_idx;
            // load data to lds and compute the batch max
-            size_t remaining_item_num = batch_item_num;
-            size_t round_item_num     = (batch_item_num + block_size - 1) / block_size * block_size;
-            lds_data[max_block_size]  = input[0];
-            for(size_t i = thr_idx; i < round_item_num; i += block_size)
+            std::size_t remaining_item_num = batch_item_num;
+            std::size_t round_item_num =
+                (batch_item_num + block_size - 1) / block_size * block_size;
+            lds_data[max_block_size] = input[0];
+            for(std::size_t i = thr_idx; i < round_item_num; i += block_size)
            {
                if(i < batch_item_num)
                {
@@ -62,7 +63,7 @@ void logsoftmax(hipStream_t stream, const argument& result, const argument& arg,

            lds_data[max_block_size] = 0;
            remaining_item_num       = batch_item_num;
-            for(size_t i = thr_idx; i < round_item_num; i += block_size)
+            for(std::size_t i = thr_idx; i < round_item_num; i += block_size)
            {
                if(i < batch_item_num)
                {
@@ -81,7 +82,7 @@ void logsoftmax(hipStream_t stream, const argument& result, const argument& arg,

            auto log_batch_sum = ::log(to_hip_type(lds_data[max_block_size])) + batch_max;

-            for(size_t i = thr_idx; i < batch_item_num; i += block_size)
+            for(std::size_t i = thr_idx; i < batch_item_num; i += block_size)
            {
                data_idx[axis]   = i;
                output[data_idx] = input[data_idx] - log_batch_sum;

--- a/src/targets/gpu/device/softmax.cpp
+++ b/src/targets/gpu/device/softmax.cpp
@@ -15,34 +15,35 @@ namespace device {

 void softmax(hipStream_t stream, const argument& result, const argument& arg, int axis)
 {
-    auto lens             = result.get_shape().lens();
-    auto batch_lens       = lens;
-    size_t batch_item_num = lens[axis];
-    batch_lens[axis]      = 1;
+    auto lens                  = result.get_shape().lens();
+    auto batch_lens            = lens;
+    std::size_t batch_item_num = lens[axis];
+    batch_lens[axis]           = 1;
    migraphx::shape batch_shape{result.get_shape().type(), batch_lens};

    hip_visit_all(result, arg, batch_shape)([&](auto output, auto input, auto batch) {
        // use one block for items in one batch.
-        const size_t max_block_size = 1024;
-        size_t block_size           = 1;
+        const std::size_t max_block_size = 1024;
+        std::size_t block_size           = 1;
        while(block_size < max_block_size and block_size < batch_item_num)
        {
            block_size *= 2;
        }

        launch(stream, batch_shape.elements() * block_size, block_size)([=](auto idx) __device__ {
-            size_t thr_idx = idx.local;
-            size_t blk_idx = idx.group;
-            using type     = device_type<std::remove_cv_t<typename decltype(output)::value_type>>;
+            std::size_t thr_idx = idx.local;
+            std::size_t blk_idx = idx.group;
+            using type = device_type<std::remove_cv_t<typename decltype(output)::value_type>>;

            MIGRAPHX_DEVICE_SHARED type lds_data[max_block_size + 1];
            auto batch_idx = batch.multi(blk_idx);
            auto data_idx  = batch_idx;
            // load data to lds and compute the batch max
-            size_t remaining_item_num = batch_item_num;
-            size_t round_item_num     = (batch_item_num + block_size - 1) / block_size * block_size;
-            lds_data[max_block_size]  = input[0];
-            for(size_t i = thr_idx; i < round_item_num; i += block_size)
+            std::size_t remaining_item_num = batch_item_num;
+            std::size_t round_item_num =
+                (batch_item_num + block_size - 1) / block_size * block_size;
+            lds_data[max_block_size] = input[0];
+            for(std::size_t i = thr_idx; i < round_item_num; i += block_size)
            {
                if(i < batch_item_num)
                {
@@ -63,7 +64,7 @@ void softmax(hipStream_t stream, const argument& result, const argument& arg, in

            lds_data[max_block_size] = 0;
            remaining_item_num       = batch_item_num;
-            for(size_t i = thr_idx; i < round_item_num; i += block_size)
+            for(std::size_t i = thr_idx; i < round_item_num; i += block_size)
            {
                if(i < batch_item_num)
                {
@@ -81,7 +82,7 @@ void softmax(hipStream_t stream, const argument& result, const argument& arg, in
            }
            auto batch_sum = lds_data[max_block_size];

-            for(size_t i = thr_idx; i < batch_item_num; i += block_size)
+            for(std::size_t i = thr_idx; i < batch_item_num; i += block_size)
            {
                data_idx[axis]   = i;
                auto val         = input[data_idx] - batch_max;

--- a/src/targets/gpu/include/migraphx/gpu/device/reduce_opers.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/device/reduce_opers.hpp
@@ -11,14 +11,17 @@ namespace gpu {
 namespace device {

 template <class T>
-inline __device__ void
-reduce_max(T* data_ptr, size_t block_size, size_t thr_idx, size_t item_num, size_t max_index)
+inline __device__ void reduce_max(T* data_ptr,
+                                  std::size_t block_size,
+                                  std::size_t thr_idx,
+                                  std::size_t item_num,
+                                  std::size_t max_index)
 {
    while(true)
    {
        auto stride = (item_num + 1) / 2;
        auto size   = item_num / 2;
-        for(size_t i = thr_idx; i < size; i += block_size)
+        for(std::size_t i = thr_idx; i < size; i += block_size)
        {
            data_ptr[i] = ::max(to_hip_type(data_ptr[i]), to_hip_type(data_ptr[i + stride]));
        }
@@ -39,14 +42,17 @@ reduce_max(T* data_ptr, size_t block_size, size_t thr_idx, size_t item_num, size
 }

 template <class T>
-inline __device__ void
-reduce_min(T* data_ptr, size_t block_size, size_t thr_idx, size_t item_num, size_t min_index)
+inline __device__ void reduce_min(T* data_ptr,
+                                  std::size_t block_size,
+                                  std::size_t thr_idx,
+                                  std::size_t item_num,
+                                  std::size_t min_index)
 {
    while(true)
    {
        auto stride = (item_num + 1) / 2;
        auto size   = item_num / 2;
-        for(size_t i = thr_idx; i < size; i += block_size)
+        for(std::size_t i = thr_idx; i < size; i += block_size)
        {
            data_ptr[i] = ::min(to_hip_type(data_ptr[i]), to_hip_type(data_ptr[i + stride]));
        }
@@ -69,16 +75,16 @@ reduce_min(T* data_ptr, size_t block_size, size_t thr_idx, size_t item_num, size
 template <class T>
 inline __device__ void reduce_argmax(T* data_ptr,
                                     int64_t* index_ptr,
-                                     size_t block_size,
-                                     size_t thr_idx,
-                                     size_t item_num,
-                                     size_t max_index)
+                                     std::size_t block_size,
+                                     std::size_t thr_idx,
+                                     std::size_t item_num,
+                                     std::size_t max_index)
 {
    while(true)
    {
        auto stride = (item_num + 1) / 2;
        auto size   = item_num / 2;
-        for(size_t i = thr_idx; i < size; i += block_size)
+        for(std::size_t i = thr_idx; i < size; i += block_size)
        {
            if(data_ptr[i] < data_ptr[i + stride])
            {
@@ -108,16 +114,16 @@ inline __device__ void reduce_argmax(T* data_ptr,
 template <class T>
 inline __device__ void reduce_argmin(T* data_ptr,
                                     int64_t* index_ptr,
-                                     size_t block_size,
-                                     size_t thr_idx,
-                                     size_t item_num,
-                                     size_t min_index)
+                                     std::size_t block_size,
+                                     std::size_t thr_idx,
+                                     std::size_t item_num,
+                                     std::size_t min_index)
 {
    while(true)
    {
        auto stride = (item_num + 1) / 2;
        auto size   = item_num / 2;
-        for(size_t i = thr_idx; i < size; i += block_size)
+        for(std::size_t i = thr_idx; i < size; i += block_size)
        {
            if(data_ptr[i] > data_ptr[i + stride])
            {
@@ -145,14 +151,17 @@ inline __device__ void reduce_argmin(T* data_ptr,
 }

 template <class T>
-inline __device__ void
-reduce_sum(T* data_ptr, size_t block_size, size_t thr_idx, size_t item_num, size_t sum_index)
+inline __device__ void reduce_sum(T* data_ptr,
+                                  std::size_t block_size,
+                                  std::size_t thr_idx,
+                                  std::size_t item_num,
+                                  std::size_t sum_index)
 {
    while(true)
    {
        auto stride = (item_num + 1) / 2;
        auto size   = item_num / 2;
-        for(size_t i = thr_idx; i < size; i += block_size)
+        for(std::size_t i = thr_idx; i < size; i += block_size)
        {
            data_ptr[i] += data_ptr[i + stride];
        }