Unverified Commit bd3fe59e authored by Hongzhi (Steve), Chen's avatar Hongzhi (Steve), Chen Committed by GitHub
Browse files

[Misc] ClangFormat auto fix. (#4685)



* Auto fix c++.

* reformat
Co-authored-by: default avatarSteve <ubuntu@ip-172-31-34-29.ap-northeast-1.compute.internal>
parent 89a4cc4d
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
* \brief Array index select GPU implementation * \brief Array index select GPU implementation
*/ */
#include <dgl/array.h> #include <dgl/array.h>
#include "../../runtime/cuda/cuda_common.h" #include "../../runtime/cuda/cuda_common.h"
#include "./array_index_select.cuh" #include "./array_index_select.cuh"
#include "./utils.h" #include "./utils.h"
...@@ -13,7 +14,7 @@ using runtime::NDArray; ...@@ -13,7 +14,7 @@ using runtime::NDArray;
namespace aten { namespace aten {
namespace impl { namespace impl {
template<DGLDeviceType XPU, typename DType, typename IdType> template <DGLDeviceType XPU, typename DType, typename IdType>
NDArray IndexSelect(NDArray array, IdArray index) { NDArray IndexSelect(NDArray array, IdArray index) {
cudaStream_t stream = runtime::getCurrentCUDAStream(); cudaStream_t stream = runtime::getCurrentCUDAStream();
const DType* array_data = static_cast<DType*>(array->data); const DType* array_data = static_cast<DType*>(array->data);
...@@ -29,24 +30,25 @@ NDArray IndexSelect(NDArray array, IdArray index) { ...@@ -29,24 +30,25 @@ NDArray IndexSelect(NDArray array, IdArray index) {
// use index->ctx for pinned array // use index->ctx for pinned array
NDArray ret = NDArray::Empty(shape, array->dtype, index->ctx); NDArray ret = NDArray::Empty(shape, array->dtype, index->ctx);
if (len == 0) if (len == 0) return ret;
return ret;
DType* ret_data = static_cast<DType*>(ret->data); DType* ret_data = static_cast<DType*>(ret->data);
if (num_feat == 1) { if (num_feat == 1) {
const int nt = cuda::FindNumThreads(len); const int nt = cuda::FindNumThreads(len);
const int nb = (len + nt - 1) / nt; const int nb = (len + nt - 1) / nt;
CUDA_KERNEL_CALL(IndexSelectSingleKernel, nb, nt, 0, stream, CUDA_KERNEL_CALL(
array_data, idx_data, len, arr_len, ret_data); IndexSelectSingleKernel, nb, nt, 0, stream, array_data, idx_data, len,
arr_len, ret_data);
} else { } else {
dim3 block(256, 1); dim3 block(256, 1);
while (static_cast<int64_t>(block.x) >= 2*num_feat) { while (static_cast<int64_t>(block.x) >= 2 * num_feat) {
block.x /= 2; block.x /= 2;
block.y *= 2; block.y *= 2;
} }
const dim3 grid((len+block.y-1)/block.y); const dim3 grid((len + block.y - 1) / block.y);
CUDA_KERNEL_CALL(IndexSelectMultiKernel, grid, block, 0, stream, CUDA_KERNEL_CALL(
array_data, num_feat, idx_data, len, arr_len, ret_data); IndexSelectMultiKernel, grid, block, 0, stream, array_data, num_feat,
idx_data, len, arr_len, ret_data);
} }
return ret; return ret;
} }
...@@ -78,8 +80,9 @@ DType IndexSelect(NDArray array, int64_t index) { ...@@ -78,8 +80,9 @@ DType IndexSelect(NDArray array, int64_t index) {
DType ret = 0; DType ret = 0;
#endif #endif
device->CopyDataFromTo( device->CopyDataFromTo(
static_cast<DType*>(array->data) + index, 0, reinterpret_cast<DType*>(&ret), 0, static_cast<DType*>(array->data) + index, 0,
sizeof(DType), array->ctx, DGLContext{kDGLCPU, 0}, array->dtype); reinterpret_cast<DType*>(&ret), 0, sizeof(DType), array->ctx,
DGLContext{kDGLCPU, 0}, array->dtype);
return reinterpret_cast<DType&>(ret); return reinterpret_cast<DType&>(ret);
} }
......
...@@ -12,11 +12,9 @@ namespace aten { ...@@ -12,11 +12,9 @@ namespace aten {
namespace impl { namespace impl {
template <typename DType, typename IdType> template <typename DType, typename IdType>
__global__ void IndexSelectSingleKernel(const DType* array, __global__ void IndexSelectSingleKernel(
const IdType* index, const DType* array, const IdType* index, const int64_t length,
const int64_t length, const int64_t arr_len, DType* out) {
const int64_t arr_len,
DType* out) {
int tx = blockIdx.x * blockDim.x + threadIdx.x; int tx = blockIdx.x * blockDim.x + threadIdx.x;
int stride_x = gridDim.x * blockDim.x; int stride_x = gridDim.x * blockDim.x;
while (tx < length) { while (tx < length) {
...@@ -28,22 +26,18 @@ __global__ void IndexSelectSingleKernel(const DType* array, ...@@ -28,22 +26,18 @@ __global__ void IndexSelectSingleKernel(const DType* array,
template <typename DType, typename IdType> template <typename DType, typename IdType>
__global__ void IndexSelectMultiKernel( __global__ void IndexSelectMultiKernel(
const DType* const array, const DType* const array, const int64_t num_feat, const IdType* const index,
const int64_t num_feat, const int64_t length, const int64_t arr_len, DType* const out) {
const IdType* const index, int64_t out_row = blockIdx.x * blockDim.y + threadIdx.y;
const int64_t length,
const int64_t arr_len,
DType* const out) {
int64_t out_row = blockIdx.x*blockDim.y+threadIdx.y;
const int64_t stride = blockDim.y*gridDim.x; const int64_t stride = blockDim.y * gridDim.x;
while (out_row < length) { while (out_row < length) {
int64_t col = threadIdx.x; int64_t col = threadIdx.x;
const int64_t in_row = index[out_row]; const int64_t in_row = index[out_row];
assert(in_row >= 0 && in_row < arr_len); assert(in_row >= 0 && in_row < arr_len);
while (col < num_feat) { while (col < num_feat) {
out[out_row*num_feat+col] = array[in_row*num_feat+col]; out[out_row * num_feat + col] = array[in_row * num_feat + col];
col += blockDim.x; col += blockDim.x;
} }
out_row += stride; out_row += stride;
...@@ -51,11 +45,9 @@ __global__ void IndexSelectMultiKernel( ...@@ -51,11 +45,9 @@ __global__ void IndexSelectMultiKernel(
} }
template <typename DType, typename IdType> template <typename DType, typename IdType>
__global__ void IndexScatterSingleKernel(const DType* array, __global__ void IndexScatterSingleKernel(
const IdType* index, const DType* array, const IdType* index, const int64_t length,
const int64_t length, const int64_t arr_len, DType* out) {
const int64_t arr_len,
DType* out) {
int tx = blockIdx.x * blockDim.x + threadIdx.x; int tx = blockIdx.x * blockDim.x + threadIdx.x;
int stride_x = gridDim.x * blockDim.x; int stride_x = gridDim.x * blockDim.x;
while (tx < length) { while (tx < length) {
...@@ -67,22 +59,18 @@ __global__ void IndexScatterSingleKernel(const DType* array, ...@@ -67,22 +59,18 @@ __global__ void IndexScatterSingleKernel(const DType* array,
template <typename DType, typename IdType> template <typename DType, typename IdType>
__global__ void IndexScatterMultiKernel( __global__ void IndexScatterMultiKernel(
const DType* const array, const DType* const array, const int64_t num_feat, const IdType* const index,
const int64_t num_feat, const int64_t length, const int64_t arr_len, DType* const out) {
const IdType* const index, int64_t in_row = blockIdx.x * blockDim.y + threadIdx.y;
const int64_t length,
const int64_t arr_len,
DType* const out) {
int64_t in_row = blockIdx.x*blockDim.y+threadIdx.y;
const int64_t stride = blockDim.y*gridDim.x; const int64_t stride = blockDim.y * gridDim.x;
while (in_row < length) { while (in_row < length) {
int64_t col = threadIdx.x; int64_t col = threadIdx.x;
const int64_t out_row = index[in_row]; const int64_t out_row = index[in_row];
assert(out_row >= 0 && out_row < arr_len); assert(out_row >= 0 && out_row < arr_len);
while (col < num_feat) { while (col < num_feat) {
out[out_row*num_feat+col] = array[in_row*num_feat+col]; out[out_row * num_feat + col] = array[in_row * num_feat + col];
col += blockDim.x; col += blockDim.x;
} }
in_row += stride; in_row += stride;
......
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
*/ */
#include <dgl/bcast.h> #include <dgl/bcast.h>
#include <dmlc/logging.h> #include <dmlc/logging.h>
#include <algorithm> #include <algorithm>
namespace dgl { namespace dgl {
...@@ -17,11 +18,9 @@ namespace { ...@@ -17,11 +18,9 @@ namespace {
bool UseBcast(const std::string& op, NDArray lhs, NDArray rhs) { bool UseBcast(const std::string& op, NDArray lhs, NDArray rhs) {
if (op == "copy_lhs" || op == "copy_rhs") if (op == "copy_lhs" || op == "copy_rhs")
return false; // broadcasting is not required for copy_u/copy_e return false; // broadcasting is not required for copy_u/copy_e
if (lhs->ndim != rhs->ndim) if (lhs->ndim != rhs->ndim) return true;
return true;
for (int i = 1; i < lhs->ndim; ++i) { for (int i = 1; i < lhs->ndim; ++i) {
if (lhs->shape[i] != rhs->shape[i]) if (lhs->shape[i] != rhs->shape[i]) return true;
return true;
} }
return false; return false;
} }
...@@ -38,10 +37,8 @@ BcastOff CalcBcastOff(const std::string& op, NDArray lhs, NDArray rhs) { ...@@ -38,10 +37,8 @@ BcastOff CalcBcastOff(const std::string& op, NDArray lhs, NDArray rhs) {
BcastOff rst; BcastOff rst;
rst.lhs_len = 1; rst.lhs_len = 1;
rst.rhs_len = 1; rst.rhs_len = 1;
for (int i = 1; i < lhs->ndim; ++i) for (int i = 1; i < lhs->ndim; ++i) rst.lhs_len *= lhs->shape[i];
rst.lhs_len *= lhs->shape[i]; for (int i = 1; i < rhs->ndim; ++i) rst.rhs_len *= rhs->shape[i];
for (int i = 1; i < rhs->ndim; ++i)
rst.rhs_len *= rhs->shape[i];
rst.use_bcast = UseBcast(op, lhs, rhs); rst.use_bcast = UseBcast(op, lhs, rhs);
rst.reduce_size = 1; // defaults to 1, except for the case op == 'dot'. rst.reduce_size = 1; // defaults to 1, except for the case op == 'dot'.
if (rst.use_bcast) { if (rst.use_bcast) {
...@@ -49,15 +46,19 @@ BcastOff CalcBcastOff(const std::string& op, NDArray lhs, NDArray rhs) { ...@@ -49,15 +46,19 @@ BcastOff CalcBcastOff(const std::string& op, NDArray lhs, NDArray rhs) {
int out_len = 1, j = 0; int out_len = 1, j = 0;
if (op == "dot") { if (op == "dot") {
rst.reduce_size = lhs->shape[lhs->ndim - 1]; // set reduce_size for dot. rst.reduce_size = lhs->shape[lhs->ndim - 1]; // set reduce_size for dot.
++j; // do not consider reduce axis in computing lhs_offset and rhs_offset. ++j; // do not consider reduce axis in computing lhs_offset and
// rhs_offset.
} }
int stride_l = 1, stride_r = 1; int stride_l = 1, stride_r = 1;
rst.lhs_offset.push_back(0); // lhs_offset[0] is always 0 rst.lhs_offset.push_back(0); // lhs_offset[0] is always 0
rst.rhs_offset.push_back(0); // rhs_offset[0] is always 0 rst.rhs_offset.push_back(0); // rhs_offset[0] is always 0
for (; j < max_ndim; ++j) { // iterate the axis from back to front. for (; j < max_ndim; ++j) { // iterate the axis from back to front.
// dl refers to the size of lhs array in the current axis, likewise for dr. // dl refers to the size of lhs array in the current axis, likewise for
const int dl = (lhs->ndim - 1 - j < 1) ? 1 : lhs->shape[lhs->ndim - 1 - j]; // dr.
const int dr = (rhs->ndim - 1 - j < 1) ? 1 : rhs->shape[rhs->ndim - 1 - j]; const int dl =
(lhs->ndim - 1 - j < 1) ? 1 : lhs->shape[lhs->ndim - 1 - j];
const int dr =
(rhs->ndim - 1 - j < 1) ? 1 : rhs->shape[rhs->ndim - 1 - j];
for (int i = 1; i < std::max(dl, dr); ++i) { for (int i = 1; i < std::max(dl, dr); ++i) {
for (int k = 0; k < out_len; ++k) { for (int k = 0; k < out_len; ++k) {
/* Explaination: /* Explaination:
...@@ -79,8 +80,10 @@ BcastOff CalcBcastOff(const std::string& op, NDArray lhs, NDArray rhs) { ...@@ -79,8 +80,10 @@ BcastOff CalcBcastOff(const std::string& op, NDArray lhs, NDArray rhs) {
} else { } else {
rst.out_len = (op == "copy_rhs") ? rst.rhs_len : rst.lhs_len; rst.out_len = (op == "copy_rhs") ? rst.rhs_len : rst.lhs_len;
if (op == "dot") { if (op == "dot") {
rst.reduce_size = lhs->shape[lhs->ndim - 1]; // set reduce_size for dot. // set reduce_size for dot.
rst.out_len /= rst.reduce_size; // out_len is divied by reduce_size in dot. rst.reduce_size = lhs->shape[lhs->ndim - 1];
// out_len is divied by reduce_size in dot.
rst.out_len /= rst.reduce_size;
} }
} }
return rst; return rst;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment