clang format

22500e6c · Shucai Xiao · ea932b63 · 22500e6c · 22500e6c · 22500e6c
Commit 22500e6c authored Jun 25, 2019 by Shucai Xiao
4 changed files
--- a/src/targets/cpu/lowering.cpp
+++ b/src/targets/cpu/lowering.cpp
@@ -533,7 +533,7 @@ struct cpu_softmax
    {
        argument result{output_shape};
        auto batch_lens     = output_shape.lens();
-        std::size_t n_dims       = batch_lens[op.axis];
+        std::size_t n_dims  = batch_lens[op.axis];
        batch_lens[op.axis] = 1;
        shape batch_shape{shape::int32_type, batch_lens};
@@ -552,9 +552,9 @@ struct cpu_softmax
                for(std::size_t j = 0; j < n_dims; ++j)
                {
-                    idx[op.axis]  = j;
+                    idx[op.axis]      = j;
-                    std::size_t index  = output_shape.index(idx);
+                    std::size_t index = output_shape.index(idx);
-                    output[index] = std::exp(input[index] - batch_max[i]);
+                    output[index]     = std::exp(input[index] - batch_max[i]);
                }
                for(std::size_t j = 0; j < n_dims; ++j)
@@ -591,7 +591,7 @@ struct cpu_logsoftmax
    {
        argument result{output_shape};
        auto batch_lens     = output_shape.lens();
-        std::size_t n_dims       = batch_lens[op.axis];
+        std::size_t n_dims  = batch_lens[op.axis];
        batch_lens[op.axis] = 1;
        shape batch_shape{shape::int32_type, batch_lens};
@@ -613,9 +613,9 @@ struct cpu_logsoftmax
                for(std::size_t j = 0; j < n_dims; ++j)
                {
-                    idx[op.axis]  = j;
+                    idx[op.axis]      = j;
-                    std::size_t index  = output_shape.index(idx);
+                    std::size_t index = output_shape.index(idx);
-                    output[index] = input[index] - batch_max[i];
+                    output[index]     = input[index] - batch_max[i];
                }
                for(std::size_t j = 0; j < n_dims; ++j)

--- a/src/targets/gpu/device/logsoftmax.cpp
+++ b/src/targets/gpu/device/logsoftmax.cpp
@@ -33,15 +33,16 @@ void logsoftmax(hipStream_t stream, const argument& result, const argument& arg,
        launch(stream, batch_shape.elements() * block_size, block_size)([=](auto idx) __device__ {
            std::size_t thr_idx = idx.local;
            std::size_t blk_idx = idx.group;
-            using type     = device_type<std::remove_cv_t<typename decltype(output)::value_type>>;
+            using type = device_type<std::remove_cv_t<typename decltype(output)::value_type>>;
            MIGRAPHX_DEVICE_SHARED type lds_data[max_block_size + 1];
            auto batch_idx = batch.multi(blk_idx);
            auto data_idx  = batch_idx;
            // load data to lds and compute the batch max
            std::size_t remaining_item_num = batch_item_num;
-            std::size_t round_item_num     = (batch_item_num + block_size - 1) / block_size * block_size;
+            std::size_t round_item_num =
-            lds_data[max_block_size]  = input[0];
+                (batch_item_num + block_size - 1) / block_size * block_size;
+            lds_data[max_block_size] = input[0];
            for(std::size_t i = thr_idx; i < round_item_num; i += block_size)
            {
                if(i < batch_item_num)

--- a/src/targets/gpu/device/softmax.cpp
+++ b/src/targets/gpu/device/softmax.cpp
@@ -15,10 +15,10 @@ namespace device {
 void softmax(hipStream_t stream, const argument& result, const argument& arg, int axis)
 {
-    auto lens             = result.get_shape().lens();
+    auto lens                  = result.get_shape().lens();
-    auto batch_lens       = lens;
+    auto batch_lens            = lens;
    std::size_t batch_item_num = lens[axis];
-    batch_lens[axis]      = 1;
+    batch_lens[axis]           = 1;
    migraphx::shape batch_shape{result.get_shape().type(), batch_lens};
    hip_visit_all(result, arg, batch_shape)([&](auto output, auto input, auto batch) {
@@ -33,15 +33,16 @@ void softmax(hipStream_t stream, const argument& result, const argument& arg, in
        launch(stream, batch_shape.elements() * block_size, block_size)([=](auto idx) __device__ {
            std::size_t thr_idx = idx.local;
            std::size_t blk_idx = idx.group;
-            using type     = device_type<std::remove_cv_t<typename decltype(output)::value_type>>;
+            using type = device_type<std::remove_cv_t<typename decltype(output)::value_type>>;
            MIGRAPHX_DEVICE_SHARED type lds_data[max_block_size + 1];
            auto batch_idx = batch.multi(blk_idx);
            auto data_idx  = batch_idx;
            // load data to lds and compute the batch max
            std::size_t remaining_item_num = batch_item_num;
-            std::size_t round_item_num     = (batch_item_num + block_size - 1) / block_size * block_size;
+            std::size_t round_item_num =
-            lds_data[max_block_size]  = input[0];
+                (batch_item_num + block_size - 1) / block_size * block_size;
+            lds_data[max_block_size] = input[0];
            for(std::size_t i = thr_idx; i < round_item_num; i += block_size)
            {
                if(i < batch_item_num)

--- a/src/targets/gpu/include/migraphx/gpu/device/reduce_opers.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/device/reduce_opers.hpp
@@ -11,8 +11,11 @@ namespace gpu {
 namespace device {
 template <class T>
-inline __device__ void
+inline __device__ void reduce_max(T* data_ptr,
-reduce_max(T* data_ptr, std::size_t block_size, std::size_t thr_idx, std::size_t item_num, std::size_t max_index)
+                                  std::size_t block_size,
+                                  std::size_t thr_idx,
+                                  std::size_t item_num,
+                                  std::size_t max_index)
 {
    while(true)
    {
@@ -39,8 +42,11 @@ reduce_max(T* data_ptr, std::size_t block_size, std::size_t thr_idx, std::size_t
 }
 template <class T>
-inline __device__ void
+inline __device__ void reduce_min(T* data_ptr,
-reduce_min(T* data_ptr, std::size_t block_size, std::size_t thr_idx, std::size_t item_num, std::size_t min_index)
+                                  std::size_t block_size,
+                                  std::size_t thr_idx,
+                                  std::size_t item_num,
+                                  std::size_t min_index)
 {
    while(true)
    {
@@ -67,8 +73,11 @@ reduce_min(T* data_ptr, std::size_t block_size, std::size_t thr_idx, std::size_t
 }
 template <class T>
-inline __device__ void
+inline __device__ void reduce_sum(T* data_ptr,
-reduce_sum(T* data_ptr, std::size_t block_size, std::size_t thr_idx, std::size_t item_num, std::size_t sum_index)
+                                  std::size_t block_size,
+                                  std::size_t thr_idx,
+                                  std::size_t item_num,
+                                  std::size_t sum_index)
 {
    while(true)
    {