"tests/git@developer.sourcefind.cn:OpenDAS/dgl.git" did not exist on "56614f320f3a8924de2967f2b3628666aa68b970"
Unverified Commit d6d517bb authored by Minjie Wang's avatar Minjie Wang Committed by GitHub
Browse files

[Kernel] CUDA CSR2COO COOSort COO2CSR (#1620)



* add cuda source

* moving codes from kernel2 branch

* operator overloading

* Better error message for unsupported device

* fix c tests

* coo sort using cusparse

* move test_rpc to distributed

* lint

* address comments and add utests
Co-authored-by: default avatarJinjing Zhou <VoVAllen@users.noreply.github.com>
Co-authored-by: default avatarChao Ma <mctt90@gmail.com>
Co-authored-by: default avatarxiang song(charlie.song) <classicxsong@gmail.com>
parent 61f007c4
...@@ -890,15 +890,45 @@ IdArray VecToIdArray(const std::vector<T>& vec, ...@@ -890,15 +890,45 @@ IdArray VecToIdArray(const std::vector<T>& vec,
* DeviceSpecificImplementation<XPU>(...); * DeviceSpecificImplementation<XPU>(...);
* }); * });
*/ */
#define ATEN_XPU_SWITCH(val, XPU, ...) do { \ #define ATEN_XPU_SWITCH(val, XPU, op, ...) do { \
if ((val) == kDLCPU) { \ if ((val) == kDLCPU) { \
constexpr auto XPU = kDLCPU; \ constexpr auto XPU = kDLCPU; \
{__VA_ARGS__} \ {__VA_ARGS__} \
} else { \ } else { \
LOG(FATAL) << "Device type: " << (val) << " is not supported."; \ LOG(FATAL) << "Operator " << (op) << " does not support " \
<< dgl::runtime::DeviceTypeCode2Str(val) \
<< " device."; \
} \ } \
} while (0) } while (0)
/*
* Dispatch according to device:
*
* XXX(minjie): temporary macro that allows CUDA operator
*
* ATEN_XPU_SWITCH(array->ctx.device_type, XPU, {
* // Now XPU is a placeholder for array->ctx.device_type
* DeviceSpecificImplementation<XPU>(...);
* });
*/
#ifdef DGL_USE_CUDA
#define ATEN_XPU_SWITCH_CUDA(val, XPU, op, ...) do { \
if ((val) == kDLCPU) { \
constexpr auto XPU = kDLCPU; \
{__VA_ARGS__} \
} else if ((val) == kDLGPU) { \
constexpr auto XPU = kDLGPU; \
{__VA_ARGS__} \
} else { \
LOG(FATAL) << "Operator " << (op) << " does not support " \
<< dgl::runtime::DeviceTypeCode2Str(val) \
<< " device."; \
} \
} while (0)
#else // DGL_USE_CUDA
#define ATEN_XPU_SWITCH_CUDA ATEN_XPU_SWITCH
#endif // DGL_USE_CUDA
/* /*
* Dispatch according to integral type (either int32 or int64): * Dispatch according to integral type (either int32 or int64):
* *
...@@ -1011,17 +1041,17 @@ IdArray VecToIdArray(const std::vector<T>& vec, ...@@ -1011,17 +1041,17 @@ IdArray VecToIdArray(const std::vector<T>& vec,
} while (0) } while (0)
// Macro to dispatch according to device context and index type. // Macro to dispatch according to device context and index type.
#define ATEN_CSR_SWITCH(csr, XPU, IdType, ...) \ #define ATEN_CSR_SWITCH(csr, XPU, IdType, op, ...) \
ATEN_XPU_SWITCH((csr).indptr->ctx.device_type, XPU, { \ ATEN_XPU_SWITCH((csr).indptr->ctx.device_type, XPU, op, { \
ATEN_ID_TYPE_SWITCH((csr).indptr->dtype, IdType, { \ ATEN_ID_TYPE_SWITCH((csr).indptr->dtype, IdType, { \
{__VA_ARGS__} \ {__VA_ARGS__} \
}); \ }); \
}); });
// Macro to dispatch according to device context and index type. // Macro to dispatch according to device context and index type.
#define ATEN_COO_SWITCH(coo, XPU, IdType, ...) \ #define ATEN_COO_SWITCH(coo, XPU, IdType, op, ...) \
ATEN_XPU_SWITCH((coo).row->ctx.device_type, XPU, { \ ATEN_XPU_SWITCH((coo).row->ctx.device_type, XPU, op, { \
ATEN_ID_TYPE_SWITCH((coo).row->dtype, IdType, { \ ATEN_ID_TYPE_SWITCH((coo).row->dtype, IdType, { \
{__VA_ARGS__} \ {__VA_ARGS__} \
}); \ }); \
}); });
......
...@@ -17,15 +17,8 @@ ...@@ -17,15 +17,8 @@
#include "serializer.h" #include "serializer.h"
#include "shared_mem.h" #include "shared_mem.h"
/*! \brief Check whether two data types are the same.*/ // forward declaration
inline bool operator == (const DLDataType& ty1, const DLDataType& ty2) { inline std::ostream& operator << (std::ostream& os, DGLType t);
return ty1.code == ty2.code && ty1.bits == ty2.bits && ty1.lanes == ty2.lanes;
}
/*! \brief Check whether two device contexts are the same.*/
inline bool operator == (const DLContext& ctx1, const DLContext& ctx2) {
return ctx1.device_type == ctx2.device_type && ctx1.device_id == ctx2.device_id;
}
namespace dgl { namespace dgl {
...@@ -210,6 +203,12 @@ class NDArray { ...@@ -210,6 +203,12 @@ class NDArray {
* \brief Get the size of the array in the number of bytes. * \brief Get the size of the array in the number of bytes.
*/ */
size_t GetSize() const; size_t GetSize() const;
/*!
* \brief Get the number of elements in this array.
*/
int64_t NumElements() const;
/*! /*!
* \brief Create a NDArray backed by a dlpack tensor. * \brief Create a NDArray backed by a dlpack tensor.
* *
...@@ -464,6 +463,110 @@ inline bool SaveDLTensor(dmlc::Stream* strm, ...@@ -464,6 +463,110 @@ inline bool SaveDLTensor(dmlc::Stream* strm,
return true; return true;
} }
/*!
* \brief Convert type code to its name
* \param type_code The type code .
* \return The name of type code.
*/
inline const char* TypeCode2Str(int type_code) {
switch (type_code) {
case kDLInt: return "int";
case kDLUInt: return "uint";
case kDLFloat: return "float";
case kStr: return "str";
case kBytes: return "bytes";
case kHandle: return "handle";
case kNull: return "NULL";
case kObjectHandle: return "ObjectHandle";
case kArrayHandle: return "ArrayHandle";
case kDGLType: return "DGLType";
case kDGLContext: return "DGLContext";
case kFuncHandle: return "FunctionHandle";
case kModuleHandle: return "ModuleHandle";
case kNDArrayContainer: return "NDArrayContainer";
default: LOG(FATAL) << "unknown type_code="
<< static_cast<int>(type_code); return "";
}
}
/*!
* \brief Convert device type code to its name
* \param device_type The device type code.
* \return The name of the device.
*/
inline const char* DeviceTypeCode2Str(DLDeviceType device_type) {
switch (device_type) {
case kDLCPU: return "cpu";
case kDLGPU: return "cuda";
case kDLCPUPinned: return "cpu_pinned";
case kDLOpenCL: return "opencl";
case kDLVulkan: return "vulkan";
case kDLMetal: return "metal";
case kDLVPI: return "vpi";
case kDLROCM: return "rocm";
default: LOG(FATAL) << "Unknown device type code="
<< static_cast<int>(device_type); return "";
}
}
/*!
* \brief convert a string to DGL type.
* \param s The string to be converted.
* \return The corresponding dgl type.
*/
inline DGLType String2DGLType(std::string s) {
DGLType t;
t.bits = 32; t.lanes = 1;
const char* scan;
if (s.substr(0, 3) == "int") {
t.code = kDLInt; scan = s.c_str() + 3;
} else if (s.substr(0, 4) == "uint") {
t.code = kDLUInt; scan = s.c_str() + 4;
} else if (s.substr(0, 5) == "float") {
t.code = kDLFloat; scan = s.c_str() + 5;
} else if (s.substr(0, 6) == "handle") {
t.code = kHandle;
t.bits = 64; // handle uses 64 bit by default.
scan = s.c_str() + 6;
} else {
scan = s.c_str();
LOG(FATAL) << "unknown type " << s;
}
char* xdelim; // emulate sscanf("%ux%u", bits, lanes)
uint8_t bits = static_cast<uint8_t>(strtoul(scan, &xdelim, 10));
if (bits != 0) t.bits = bits;
if (*xdelim == 'x') {
t.lanes = static_cast<uint16_t>(strtoul(xdelim + 1, nullptr, 10));
}
return t;
}
/*!
* \brief convert a DGL type to string.
* \param t The type to be converted.
* \return The corresponding dgl type in string.
*/
inline std::string DGLType2String(DGLType t) {
#ifndef _LIBCPP_SGX_NO_IOSTREAMS
std::ostringstream os;
os << t;
return os.str();
#else
std::string repr = "";
repr += TypeCode2Str(t.code);
if (t.code == kHandle) return repr;
repr += std::to_string(static_cast<int>(t.bits));
if (t.lanes != 1) {
repr += "x" + std::to_string(static_cast<int>(t.lanes));
}
return repr;
#endif
}
// macro to check type code.
#define DGL_CHECK_TYPE_CODE(CODE, T) \
CHECK_EQ(CODE, T) << " expected " \
<< TypeCode2Str(T) << " but get " << TypeCode2Str(CODE) \
} // namespace runtime } // namespace runtime
} // namespace dgl } // namespace dgl
...@@ -472,4 +575,46 @@ namespace dmlc { ...@@ -472,4 +575,46 @@ namespace dmlc {
DMLC_DECLARE_TRAITS(has_saveload, dgl::runtime::NDArray, true); DMLC_DECLARE_TRAITS(has_saveload, dgl::runtime::NDArray, true);
} // namespace dmlc } // namespace dmlc
///////////////// Operator overloading for DLDataType /////////////////
/*! \brief Check whether two data types are the same.*/
inline bool operator == (const DLDataType& ty1, const DLDataType& ty2) {
return ty1.code == ty2.code && ty1.bits == ty2.bits && ty1.lanes == ty2.lanes;
}
/*! \brief Check whether two data types are different.*/
inline bool operator != (const DLDataType& ty1, const DLDataType& ty2) {
return !(ty1 == ty2);
}
#ifndef _LIBCPP_SGX_NO_IOSTREAMS
inline std::ostream& operator << (std::ostream& os, DGLType t) {
os << dgl::runtime::TypeCode2Str(t.code);
if (t.code == kHandle) return os;
os << static_cast<int>(t.bits);
if (t.lanes != 1) {
os << 'x' << static_cast<int>(t.lanes);
}
return os;
}
#endif
///////////////// Operator overloading for DLContext /////////////////
/*! \brief Check whether two device contexts are the same.*/
inline bool operator == (const DLContext& ctx1, const DLContext& ctx2) {
return ctx1.device_type == ctx2.device_type && ctx1.device_id == ctx2.device_id;
}
/*! \brief Check whether two device contexts are different.*/
inline bool operator != (const DLContext& ctx1, const DLContext& ctx2) {
return !(ctx1 == ctx2);
}
#ifndef _LIBCPP_SGX_NO_IOSTREAMS
inline std::ostream& operator << (std::ostream& os, const DLContext& ctx) {
return os << dgl::runtime::DeviceTypeCode2Str(ctx.device_type) << ":" << ctx.device_id;
}
#endif
#endif // DGL_RUNTIME_NDARRAY_H_ #endif // DGL_RUNTIME_NDARRAY_H_
...@@ -295,32 +295,6 @@ class DGLArgs { ...@@ -295,32 +295,6 @@ class DGLArgs {
inline DGLArgValue operator[](int i) const; inline DGLArgValue operator[](int i) const;
}; };
/*!
* \brief Convert type code to its name
* \param type_code The type code .
* \return The name of type code.
*/
inline const char* TypeCode2Str(int type_code);
/*!
* \brief convert a string to DGL type.
* \param s The string to be converted.
* \return The corresponding dgl type.
*/
inline DGLType String2DGLType(std::string s);
/*!
* \brief convert a DGL type to string.
* \param t The type to be converted.
* \return The corresponding dgl type in string.
*/
inline std::string DGLType2String(DGLType t);
// macro to check type code.
#define DGL_CHECK_TYPE_CODE(CODE, T) \
CHECK_EQ(CODE, T) << " expected " \
<< TypeCode2Str(T) << " but get " << TypeCode2Str(CODE) \
/*! /*!
* \brief Type traits to mark if a class is dgl extension type. * \brief Type traits to mark if a class is dgl extension type.
* *
...@@ -826,83 +800,6 @@ class DGLRetValue : public DGLPODValue_ { ...@@ -826,83 +800,6 @@ class DGLRetValue : public DGLPODValue_ {
}; };
// implementation details // implementation details
inline const char* TypeCode2Str(int type_code) {
switch (type_code) {
case kDLInt: return "int";
case kDLUInt: return "uint";
case kDLFloat: return "float";
case kStr: return "str";
case kBytes: return "bytes";
case kHandle: return "handle";
case kNull: return "NULL";
case kObjectHandle: return "ObjectHandle";
case kArrayHandle: return "ArrayHandle";
case kDGLType: return "DGLType";
case kDGLContext: return "DGLContext";
case kFuncHandle: return "FunctionHandle";
case kModuleHandle: return "ModuleHandle";
case kNDArrayContainer: return "NDArrayContainer";
default: LOG(FATAL) << "unknown type_code="
<< static_cast<int>(type_code); return "";
}
}
#ifndef _LIBCPP_SGX_NO_IOSTREAMS
inline std::ostream& operator<<(std::ostream& os, DGLType t) { // NOLINT(*)
os << TypeCode2Str(t.code);
if (t.code == kHandle) return os;
os << static_cast<int>(t.bits);
if (t.lanes != 1) {
os << 'x' << static_cast<int>(t.lanes);
}
return os;
}
#endif
inline std::string DGLType2String(DGLType t) {
#ifndef _LIBCPP_SGX_NO_IOSTREAMS
std::ostringstream os;
os << t;
return os.str();
#else
std::string repr = "";
repr += TypeCode2Str(t.code);
if (t.code == kHandle) return repr;
repr += std::to_string(static_cast<int>(t.bits));
if (t.lanes != 1) {
repr += "x" + std::to_string(static_cast<int>(t.lanes));
}
return repr;
#endif
}
inline DGLType String2DGLType(std::string s) {
DGLType t;
t.bits = 32; t.lanes = 1;
const char* scan;
if (s.substr(0, 3) == "int") {
t.code = kDLInt; scan = s.c_str() + 3;
} else if (s.substr(0, 4) == "uint") {
t.code = kDLUInt; scan = s.c_str() + 4;
} else if (s.substr(0, 5) == "float") {
t.code = kDLFloat; scan = s.c_str() + 5;
} else if (s.substr(0, 6) == "handle") {
t.code = kHandle;
t.bits = 64; // handle uses 64 bit by default.
scan = s.c_str() + 6;
} else {
scan = s.c_str();
LOG(FATAL) << "unknown type " << s;
}
char* xdelim; // emulate sscanf("%ux%u", bits, lanes)
uint8_t bits = static_cast<uint8_t>(strtoul(scan, &xdelim, 10));
if (bits != 0) t.bits = bits;
if (*xdelim == 'x') {
t.lanes = static_cast<uint16_t>(strtoul(xdelim + 1, nullptr, 10));
}
return t;
}
inline DGLArgValue DGLArgs::operator[](int i) const { inline DGLArgValue DGLArgs::operator[](int i) const {
CHECK_LT(i, num_args) CHECK_LT(i, num_args)
<< "not enough argument passed, " << "not enough argument passed, "
......
...@@ -900,7 +900,7 @@ class HeteroGraphIndex(ObjectBase): ...@@ -900,7 +900,7 @@ class HeteroGraphIndex(ObjectBase):
HeteroGraphIndex HeteroGraphIndex
""" """
g = self.get_relation_graph(etype) g = self.get_relation_graph(etype)
return g.asbits(self.bits_needed(etype or 0)).copy_to(ctx) return g.copy_to(ctx).asbits(self.bits_needed(etype or 0))
def get_csr_shuffle_order(self, etype): def get_csr_shuffle_order(self, etype):
"""Return the edge shuffling order when a coo graph is converted to csr format """Return the edge shuffling order when a coo graph is converted to csr format
......
This diff is collapsed.
/*!
* Copyright (c) 2019 by Contributors
* \file array/cuda/array_op_impl.cu
* \brief Array operator GPU implementation
*/
#include <dgl/array.h>
#include "../../runtime/cuda/cuda_common.h"
namespace dgl {
using runtime::NDArray;
namespace aten {
namespace impl {
int FindNumThreads(int dim, int max_nthrs) {
int ret = max_nthrs;
while (ret > dim) {
ret = ret >> 1;
}
return ret;
}
///////////////////////////// Range /////////////////////////////
template <typename IdType>
__global__ void _RangeKernel(IdType* out, IdType low, IdType length) {
int tx = blockIdx.x * blockDim.x + threadIdx.x;
int stride_x = gridDim.x * blockDim.x;
while (tx < length) {
out[tx] = low + tx;
tx += stride_x;
}
}
template <DLDeviceType XPU, typename IdType>
IdArray Range(IdType low, IdType high, DLContext ctx) {
CHECK(high >= low) << "high must be bigger than low";
const IdType length = high - low;
IdArray ret = NewIdArray(length, ctx, sizeof(IdType) * 8);
if (length == 0)
return ret;
IdType* ret_data = static_cast<IdType*>(ret->data);
auto* thr_entry = runtime::CUDAThreadEntry::ThreadLocal();
int nt = FindNumThreads(length, 1024);
int nb = (length + nt - 1) / nt;
_RangeKernel<IdType><<<nb, nt, 0, thr_entry->stream>>>(ret_data, low, length);
return ret;
}
template IdArray Range<kDLGPU, int32_t>(int32_t, int32_t, DLContext);
template IdArray Range<kDLGPU, int64_t>(int64_t, int64_t, DLContext);
///////////////////////////// AsNumBits /////////////////////////////
template <typename InType, typename OutType>
__global__ void _CastKernel(const InType* in, OutType* out, size_t length) {
int tx = blockIdx.x * blockDim.x + threadIdx.x;
int stride_x = gridDim.x * blockDim.x;
while (tx < length) {
out[tx] = in[tx];
tx += stride_x;
}
}
template <DLDeviceType XPU, typename IdType>
IdArray AsNumBits(IdArray arr, uint8_t bits) {
const std::vector<int64_t> shape(arr->shape, arr->shape + arr->ndim);
IdArray ret = IdArray::Empty(shape, DLDataType{kDLInt, bits, 1}, arr->ctx);
const int64_t length = ret.NumElements();
auto* thr_entry = runtime::CUDAThreadEntry::ThreadLocal();
int nt = FindNumThreads(length, 1024);
int nb = (length + nt - 1) / nt;
if (bits == 32) {
_CastKernel<IdType, int32_t><<<nb, nt, 0, thr_entry->stream>>>(
static_cast<IdType*>(arr->data), static_cast<int32_t*>(ret->data), length);
} else {
_CastKernel<IdType, int64_t><<<nb, nt, 0, thr_entry->stream>>>(
static_cast<IdType*>(arr->data), static_cast<int64_t*>(ret->data), length);
}
return ret;
}
template IdArray AsNumBits<kDLGPU, int32_t>(IdArray arr, uint8_t bits);
template IdArray AsNumBits<kDLGPU, int64_t>(IdArray arr, uint8_t bits);
} // namespace impl
} // namespace aten
} // namespace dgl
/*!
* Copyright (c) 2020 by Contributors
* \file array/cuda/coo2csr.cc
* \brief COO2CSR
*/
#include <dgl/array.h>
#include "../../runtime/cuda/cuda_common.h"
namespace dgl {
using runtime::NDArray;
namespace aten {
namespace impl {
template <DLDeviceType XPU, typename IdType>
CSRMatrix COOToCSR(COOMatrix coo) {
CHECK(sizeof(IdType) == 4) << "CUDA COOToCSR does not support int64.";
auto* thr_entry = runtime::CUDAThreadEntry::ThreadLocal();
auto device = runtime::DeviceAPI::Get(coo.row->ctx);
// allocate cusparse handle if needed
if (!thr_entry->cusparse_handle) {
CUSPARSE_CALL(cusparseCreate(&(thr_entry->cusparse_handle)));
}
CUSPARSE_CALL(cusparseSetStream(thr_entry->cusparse_handle, thr_entry->stream));
NDArray row = coo.row, col = coo.col, data = coo.data;
int32_t* row_ptr = static_cast<int32_t*>(row->data);
int32_t* col_ptr = static_cast<int32_t*>(col->data);
int32_t* data_ptr = aten::IsNullArray(data) ? nullptr : static_cast<int32_t*>(data->data);
if (!coo.row_sorted) {
// make a copy of row and col because sort is done in-place
row = row.CopyTo(row->ctx);
col = col.CopyTo(col->ctx);
row_ptr = static_cast<int32_t*>(row->data);
col_ptr = static_cast<int32_t*>(col->data);
if (aten::IsNullArray(data)) {
// create the index array
data = aten::Range(0, row->shape[0], row->dtype.bits, row->ctx);
data_ptr = static_cast<int32_t*>(data->data);
}
// sort row
size_t workspace_size = 0;
CUSPARSE_CALL(cusparseXcoosort_bufferSizeExt(
thr_entry->cusparse_handle,
coo.num_rows, coo.num_cols,
row->shape[0],
row_ptr,
col_ptr,
&workspace_size));
void* workspace = device->AllocWorkspace(row->ctx, workspace_size);
CUSPARSE_CALL(cusparseXcoosortByRow(
thr_entry->cusparse_handle,
coo.num_rows, coo.num_cols,
row->shape[0],
row_ptr,
col_ptr,
data_ptr,
workspace));
device->FreeWorkspace(row->ctx, workspace);
}
NDArray indptr = aten::NewIdArray(coo.num_rows + 1, row->ctx, row->dtype.bits);
int32_t* indptr_ptr = static_cast<int32_t*>(indptr->data);
CUSPARSE_CALL(cusparseXcoo2csr(
thr_entry->cusparse_handle,
row_ptr,
row->shape[0],
coo.num_rows,
indptr_ptr,
CUSPARSE_INDEX_BASE_ZERO));
return CSRMatrix(coo.num_rows, coo.num_cols,
indptr, col, data, false);
}
template CSRMatrix COOToCSR<kDLGPU, int32_t>(COOMatrix coo);
template CSRMatrix COOToCSR<kDLGPU, int64_t>(COOMatrix coo);
} // namespace impl
} // namespace aten
} // namespace dgl
/*!
* Copyright (c) 2020 by Contributors
* \file array/cuda/coo_sort.cc
* \brief Sort COO index
*/
#include <dgl/array.h>
#include "../../runtime/cuda/cuda_common.h"
namespace dgl {
using runtime::NDArray;
namespace aten {
namespace impl {
template <DLDeviceType XPU, typename IdType>
COOMatrix COOSort(COOMatrix coo, bool sort_column) {
CHECK(sizeof(IdType) == 4) << "CUDA COOSort does not support int64.";
auto* thr_entry = runtime::CUDAThreadEntry::ThreadLocal();
auto device = runtime::DeviceAPI::Get(coo.row->ctx);
// allocate cusparse handle if needed
if (!thr_entry->cusparse_handle) {
CUSPARSE_CALL(cusparseCreate(&(thr_entry->cusparse_handle)));
}
CUSPARSE_CALL(cusparseSetStream(thr_entry->cusparse_handle, thr_entry->stream));
NDArray row = coo.row.CopyTo(coo.row->ctx);
NDArray col = coo.col.CopyTo(coo.col->ctx);
NDArray data;
if (aten::IsNullArray(coo.data)) {
// create the index array
data = aten::Range(0, row->shape[0], row->dtype.bits, row->ctx);
} else {
data = coo.data.CopyTo(coo.data->ctx);
}
int32_t* row_ptr = static_cast<int32_t*>(row->data);
int32_t* col_ptr = static_cast<int32_t*>(col->data);
int32_t* data_ptr = static_cast<int32_t*>(data->data);
// sort row
size_t workspace_size = 0;
CUSPARSE_CALL(cusparseXcoosort_bufferSizeExt(
thr_entry->cusparse_handle,
coo.num_rows, coo.num_cols,
row->shape[0],
row_ptr,
col_ptr,
&workspace_size));
void* workspace = device->AllocWorkspace(row->ctx, workspace_size);
CUSPARSE_CALL(cusparseXcoosortByRow(
thr_entry->cusparse_handle,
coo.num_rows, coo.num_cols,
row->shape[0],
row_ptr,
col_ptr,
data_ptr,
workspace));
device->FreeWorkspace(row->ctx, workspace);
if (sort_column) {
// First create a row indptr array and then call csrsort
int32_t* indptr = static_cast<int32_t*>(
device->AllocWorkspace(row->ctx, (coo.num_rows + 1) * sizeof(IdType)));
CUSPARSE_CALL(cusparseXcoo2csr(
thr_entry->cusparse_handle,
row_ptr,
row->shape[0],
coo.num_rows,
indptr,
CUSPARSE_INDEX_BASE_ZERO));
CUSPARSE_CALL(cusparseXcsrsort_bufferSizeExt(
thr_entry->cusparse_handle,
coo.num_rows,
coo.num_cols,
row->shape[0],
indptr,
col_ptr,
&workspace_size));
void* workspace = device->AllocWorkspace(row->ctx, workspace_size);
cusparseMatDescr_t descr;
CUSPARSE_CALL(cusparseCreateMatDescr(&descr));
CUSPARSE_CALL(cusparseXcsrsort(
thr_entry->cusparse_handle,
coo.num_rows,
coo.num_cols,
row->shape[0],
descr,
indptr,
col_ptr,
data_ptr,
workspace));
CUSPARSE_CALL(cusparseDestroyMatDescr(descr));
device->FreeWorkspace(row->ctx, workspace);
device->FreeWorkspace(row->ctx, indptr);
}
return COOMatrix(coo.num_rows, coo.num_cols,
row, col, data, true, sort_column);
}
template COOMatrix COOSort<kDLGPU, int32_t>(COOMatrix coo, bool sort_column);
template COOMatrix COOSort<kDLGPU, int64_t>(COOMatrix coo, bool sort_column);
} // namespace impl
} // namespace aten
} // namespace dgl
/*!
* Copyright (c) 2020 by Contributors
* \file array/cuda/csr2coo.cc
* \brief CSR2COO
*/
#include <dgl/array.h>
#include "../../runtime/cuda/cuda_common.h"
namespace dgl {
using runtime::NDArray;
namespace aten {
namespace impl {
template <DLDeviceType XPU, typename IdType>
COOMatrix CSRToCOO(CSRMatrix csr) {
CHECK(sizeof(IdType) == 4) << "CUDA CSRToCOO does not support int64.";
auto* thr_entry = runtime::CUDAThreadEntry::ThreadLocal();
// allocate cusparse handle if needed
if (!thr_entry->cusparse_handle) {
CUSPARSE_CALL(cusparseCreate(&(thr_entry->cusparse_handle)));
}
CUSPARSE_CALL(cusparseSetStream(thr_entry->cusparse_handle, thr_entry->stream));
NDArray indptr = csr.indptr, indices = csr.indices, data = csr.data;
const int32_t* indptr_ptr = static_cast<int32_t*>(indptr->data);
NDArray row = aten::NewIdArray(indices->shape[0], indptr->ctx, indptr->dtype.bits);
int32_t* row_ptr = static_cast<int32_t*>(row->data);
CUSPARSE_CALL(cusparseXcsr2coo(
thr_entry->cusparse_handle,
indptr_ptr,
indices->shape[0],
csr.num_rows,
row_ptr,
CUSPARSE_INDEX_BASE_ZERO));
return COOMatrix(csr.num_rows, csr.num_cols,
row, indices, data,
true, csr.sorted);
}
template COOMatrix CSRToCOO<kDLGPU, int32_t>(CSRMatrix csr);
template COOMatrix CSRToCOO<kDLGPU, int64_t>(CSRMatrix csr);
template <DLDeviceType XPU, typename IdType>
COOMatrix CSRToCOODataAsOrder(CSRMatrix csr) {
COOMatrix coo = CSRToCOO<XPU, IdType>(csr);
if (aten::IsNullArray(coo.data))
return coo;
auto* thr_entry = runtime::CUDAThreadEntry::ThreadLocal();
auto device = runtime::DeviceAPI::Get(coo.row->ctx);
// allocate cusparse handle if needed
if (!thr_entry->cusparse_handle) {
CUSPARSE_CALL(cusparseCreate(&(thr_entry->cusparse_handle)));
}
CUSPARSE_CALL(cusparseSetStream(thr_entry->cusparse_handle, thr_entry->stream));
NDArray row = coo.row, col = coo.col, data = coo.data;
int32_t* row_ptr = static_cast<int32_t*>(row->data);
int32_t* col_ptr = static_cast<int32_t*>(col->data);
int32_t* data_ptr = static_cast<int32_t*>(data->data);
size_t workspace_size = 0;
CUSPARSE_CALL(cusparseXcoosort_bufferSizeExt(
thr_entry->cusparse_handle,
coo.num_rows, coo.num_cols,
row->shape[0],
data_ptr,
row_ptr,
&workspace_size));
void* workspace = device->AllocWorkspace(row->ctx, workspace_size);
CUSPARSE_CALL(cusparseXcoosortByRow(
thr_entry->cusparse_handle,
coo.num_rows, coo.num_cols,
row->shape[0],
data_ptr,
row_ptr,
col_ptr,
workspace));
device->FreeWorkspace(row->ctx, workspace);
return coo;
}
template COOMatrix CSRToCOODataAsOrder<kDLGPU, int32_t>(CSRMatrix csr);
template COOMatrix CSRToCOODataAsOrder<kDLGPU, int64_t>(CSRMatrix csr);
} // namespace impl
} // namespace aten
} // namespace dgl
/*!
* Copyright (c) 2020 by Contributors
* \file array/cuda/csr_transpose.cc
* \brief CSR transpose (convert to CSC)
*/
#include <dgl/array.h>
#include "../../runtime/cuda/cuda_common.h"
namespace dgl {
using runtime::NDArray;
namespace aten {
namespace impl {
template <DLDeviceType XPU, typename IdType>
CSRMatrix CSRTranspose(CSRMatrix csr) {
CHECK(sizeof(IdType) == 4) << "CUDA CSR2CSC does not support int64.";
auto* thr_entry = runtime::CUDAThreadEntry::ThreadLocal();
// allocate cusparse handle if needed
if (!thr_entry->cusparse_handle) {
CUSPARSE_CALL(cusparseCreate(&(thr_entry->cusparse_handle)));
}
CUSPARSE_CALL(cusparseSetStream(thr_entry->cusparse_handle, thr_entry->stream));
NDArray indptr = csr.indptr, indices = csr.indices, data = csr.data;
const int64_t nnz = indices->shape[0];
const auto& ctx = indptr->ctx;
const auto bits = indptr->dtype.bits;
if (aten::IsNullArray(data))
data = aten::Range(0, nnz, bits, ctx);
const int32_t* indptr_ptr = static_cast<int32_t*>(indptr->data);
const int32_t* indices_ptr = static_cast<int32_t*>(indices->data);
const void* data_ptr = data->data;
NDArray t_indptr = aten::NewIdArray(csr.num_cols + 1, ctx, bits);
NDArray t_indices = aten::NewIdArray(nnz, ctx, bits);
NDArray t_data = aten::NewIdArray(nnz, ctx, bits);
int32_t* t_indptr_ptr = static_cast<int32_t*>(t_indptr->data);
int32_t* t_indices_ptr = static_cast<int32_t*>(t_indices->data);
void* t_data_ptr = t_data->data;
#if __CUDA_API_VERSION >= 10010
auto device = runtime::DeviceAPI::Get(csr.indptr->ctx);
// workspace
size_t workspace_size;
CUSPARSE_CALL(cusparseCsr2cscEx2_bufferSize(
thr_entry->cusparse_handle,
csr.num_rows, csr.num_cols, nnz,
data_ptr, indptr_ptr, indices_ptr,
t_data_ptr, t_indptr_ptr, t_indices_ptr,
CUDA_R_32F,
CUSPARSE_ACTION_NUMERIC,
CUSPARSE_INDEX_BASE_ZERO,
CUSPARSE_CSR2CSC_ALG1, // see cusparse doc for reference
&workspace_size));
void* workspace = device->AllocWorkspace(ctx, workspace_size);
CUSPARSE_CALL(cusparseCsr2cscEx2(
thr_entry->cusparse_handle,
csr.num_rows, csr.num_cols, nnz,
data_ptr, indptr_ptr, indices_ptr,
t_data_ptr, t_indptr_ptr, t_indices_ptr,
CUSPARSE_ACTION_NUMERIC,
CUSPARSE_INDEX_BASE_ZERO,
CUSPARSE_CSR2CSC_ALG1, // see cusparse doc for reference
workspace));
device->FreeWorkspace(ctx, workspace);
#else
CUSPARSE_CALL(cusparseScsr2csc(
thr_entry->cusparse_handle,
csr.num_rows, csr.num_cols, nnz,
static_cast<const float*>(data_ptr), indptr_ptr, indices_ptr,
static_cast<float*>(t_data_ptr), t_indices_ptr, t_indptr_ptr,
CUSPARSE_ACTION_NUMERIC,
CUSPARSE_INDEX_BASE_ZERO));
#endif
return CSRMatrix(csr.num_cols, csr.num_rows,
t_indptr, t_indices, t_data,
false);
}
template CSRMatrix CSRTranspose<kDLGPU, int32_t>(CSRMatrix csr);
template CSRMatrix CSRTranspose<kDLGPU, int64_t>(CSRMatrix csr);
} // namespace impl
} // namespace aten
} // namespace dgl
...@@ -15,24 +15,6 @@ ...@@ -15,24 +15,6 @@
#include <vector> #include <vector>
#include <string> #include <string>
using dgl::runtime::operator<<;
/*! \brief Output the string representation of device context.*/
inline std::ostream& operator<<(std::ostream& os, const DLContext& ctx) {
std::string device_name;
switch (ctx.device_type) {
case kDLCPU:
device_name = "CPU";
break;
case kDLGPU:
device_name = "GPU";
break;
default:
device_name = "Unknown device";
}
return os << device_name << ":" << ctx.device_id;
}
namespace dgl { namespace dgl {
// Communicator handler type // Communicator handler type
......
...@@ -275,7 +275,7 @@ FlattenedHeteroGraphPtr HeteroGraph::Flatten( ...@@ -275,7 +275,7 @@ FlattenedHeteroGraphPtr HeteroGraph::Flatten(
const int64_t bits = NumBits(); const int64_t bits = NumBits();
if (bits == 32) { if (bits == 32) {
return FlattenImpl<int32_t>(etypes); return FlattenImpl<int32_t>(etypes);
} else if (bits == 64) { } else {
return FlattenImpl<int64_t>(etypes); return FlattenImpl<int64_t>(etypes);
} }
} }
......
...@@ -51,7 +51,7 @@ std::pair<IdArray, TypeArray> RandomWalk( ...@@ -51,7 +51,7 @@ std::pair<IdArray, TypeArray> RandomWalk(
TypeArray vtypes; TypeArray vtypes;
IdArray vids; IdArray vids;
ATEN_XPU_SWITCH(hg->Context().device_type, XPU, { ATEN_XPU_SWITCH(hg->Context().device_type, XPU, "RandomWalk", {
ATEN_ID_TYPE_SWITCH(seeds->dtype, IdxType, { ATEN_ID_TYPE_SWITCH(seeds->dtype, IdxType, {
vtypes = impl::GetNodeTypesFromMetapath<XPU, IdxType>(hg, metapath); vtypes = impl::GetNodeTypesFromMetapath<XPU, IdxType>(hg, metapath);
vids = impl::RandomWalk<XPU, IdxType>(hg, seeds, metapath, prob); vids = impl::RandomWalk<XPU, IdxType>(hg, seeds, metapath, prob);
...@@ -72,7 +72,7 @@ std::pair<IdArray, TypeArray> RandomWalkWithRestart( ...@@ -72,7 +72,7 @@ std::pair<IdArray, TypeArray> RandomWalkWithRestart(
TypeArray vtypes; TypeArray vtypes;
IdArray vids; IdArray vids;
ATEN_XPU_SWITCH(hg->Context().device_type, XPU, { ATEN_XPU_SWITCH(hg->Context().device_type, XPU, "RandomWalkWithRestart", {
ATEN_ID_TYPE_SWITCH(seeds->dtype, IdxType, { ATEN_ID_TYPE_SWITCH(seeds->dtype, IdxType, {
vtypes = impl::GetNodeTypesFromMetapath<XPU, IdxType>(hg, metapath); vtypes = impl::GetNodeTypesFromMetapath<XPU, IdxType>(hg, metapath);
vids = impl::RandomWalkWithRestart<XPU, IdxType>(hg, seeds, metapath, prob, restart_prob); vids = impl::RandomWalkWithRestart<XPU, IdxType>(hg, seeds, metapath, prob, restart_prob);
...@@ -93,7 +93,7 @@ std::pair<IdArray, TypeArray> RandomWalkWithStepwiseRestart( ...@@ -93,7 +93,7 @@ std::pair<IdArray, TypeArray> RandomWalkWithStepwiseRestart(
TypeArray vtypes; TypeArray vtypes;
IdArray vids; IdArray vids;
ATEN_XPU_SWITCH(hg->Context().device_type, XPU, { ATEN_XPU_SWITCH(hg->Context().device_type, XPU, "RandomWalkWithStepwiseRestart", {
ATEN_ID_TYPE_SWITCH(seeds->dtype, IdxType, { ATEN_ID_TYPE_SWITCH(seeds->dtype, IdxType, {
vtypes = impl::GetNodeTypesFromMetapath<XPU, IdxType>(hg, metapath); vtypes = impl::GetNodeTypesFromMetapath<XPU, IdxType>(hg, metapath);
vids = impl::RandomWalkWithStepwiseRestart<XPU, IdxType>( vids = impl::RandomWalkWithStepwiseRestart<XPU, IdxType>(
......
...@@ -123,6 +123,14 @@ size_t NDArray::GetSize() const { ...@@ -123,6 +123,14 @@ size_t NDArray::GetSize() const {
return GetDataSize(data_->dl_tensor); return GetDataSize(data_->dl_tensor);
} }
int64_t NDArray::NumElements() const {
int64_t size = 1;
for (int i = 0; i < data_->dl_tensor.ndim; ++i) {
size *= data_->dl_tensor.shape[i];
}
return size;
}
bool NDArray::IsContiguous() const { bool NDArray::IsContiguous() const {
CHECK(data_ != nullptr); CHECK(data_ != nullptr);
if (data_->dl_tensor.strides == nullptr) if (data_->dl_tensor.strides == nullptr)
......
...@@ -3,6 +3,12 @@ ...@@ -3,6 +3,12 @@
#include <dgl/runtime/ndarray.h> #include <dgl/runtime/ndarray.h>
static constexpr DLContext CTX = DLContext{kDLCPU, 0};
static constexpr DLContext CPU = DLContext{kDLCPU, 0};
#ifdef DGL_USE_CUDA
static constexpr DLContext GPU = DLContext{kDLGPU, 0};
#endif
template <typename T> template <typename T>
inline T* Ptr(dgl::runtime::NDArray nd) { inline T* Ptr(dgl::runtime::NDArray nd) {
return static_cast<T*>(nd->data); return static_cast<T*>(nd->data);
...@@ -29,6 +35,9 @@ inline bool ArrayEQ(dgl::runtime::NDArray a1, dgl::runtime::NDArray a2) { ...@@ -29,6 +35,9 @@ inline bool ArrayEQ(dgl::runtime::NDArray a1, dgl::runtime::NDArray a2) {
return false; return false;
num *= a1->shape[i]; num *= a1->shape[i];
} }
if (a1->ctx != a2->ctx) return false;
a1 = a1.CopyTo(CPU);
a2 = a2.CopyTo(CPU);
for (int64_t i = 0; i < num; ++i) for (int64_t i = 0; i < num; ++i)
if (static_cast<T*>(a1->data)[i] != static_cast<T*>(a2->data)[i]) if (static_cast<T*>(a1->data)[i] != static_cast<T*>(a2->data)[i])
return false; return false;
...@@ -46,6 +55,4 @@ inline bool IsInArray(dgl::runtime::NDArray a, T x) { ...@@ -46,6 +55,4 @@ inline bool IsInArray(dgl::runtime::NDArray a, T x) {
return false; return false;
} }
static constexpr DLContext CTX = DLContext{kDLCPU, 0};
#endif // TEST_COMMON_H_ #endif // TEST_COMMON_H_
...@@ -25,14 +25,22 @@ TEST(ArrayTest, TestCreate) { ...@@ -25,14 +25,22 @@ TEST(ArrayTest, TestCreate) {
ASSERT_EQ(Len(a), 0); ASSERT_EQ(Len(a), 0);
}; };
TEST(ArrayTest, TestRange) { void _TestRange(DLContext ctx) {
IdArray a = aten::Range(10, 10, 64, CTX); IdArray a = aten::Range(10, 10, 64, ctx);
ASSERT_EQ(Len(a), 0); ASSERT_EQ(Len(a), 0);
a = aten::Range(10, 20, 32, CTX); a = aten::Range(10, 20, 32, ctx);
ASSERT_EQ(Len(a), 10); ASSERT_EQ(Len(a), 10);
ASSERT_EQ(a->dtype.bits, 32); ASSERT_EQ(a->dtype.bits, 32);
a = a.CopyTo(CPU);
for (int i = 0; i < 10; ++i) for (int i = 0; i < 10; ++i)
ASSERT_EQ(Ptr<int32_t>(a)[i], i + 10); ASSERT_EQ(Ptr<int32_t>(a)[i], i + 10);
}
TEST(ArrayTest, TestRange) {
_TestRange(CPU);
#ifdef DGL_USE_CUDA
_TestRange(GPU);
#endif
}; };
TEST(ArrayTest, TestFull) { TEST(ArrayTest, TestFull) {
...@@ -61,12 +69,20 @@ TEST(ArrayTest, TestClone) { ...@@ -61,12 +69,20 @@ TEST(ArrayTest, TestClone) {
} }
}; };
TEST(ArrayTest, TestAsNumBits) { void _TestNumBits(DLContext ctx) {
IdArray a = aten::Range(0, 10, 32, CTX); IdArray a = aten::Range(0, 10, 32, ctx);
a = aten::AsNumBits(a, 64); a = aten::AsNumBits(a, 64);
ASSERT_EQ(a->dtype.bits, 64); ASSERT_EQ(a->dtype.bits, 64);
a = a.CopyTo(CPU);
for (int i = 0; i < 10; ++i) for (int i = 0; i < 10; ++i)
ASSERT_EQ(PI64(a)[i], i); ASSERT_EQ(PI64(a)[i], i);
}
TEST(ArrayTest, TestAsNumBits) {
_TestNumBits(CPU);
#ifdef DGL_USE_CUDA
_TestNumBits(GPU);
#endif
}; };
template <typename IDX> template <typename IDX>
......
...@@ -8,7 +8,7 @@ using namespace dgl::runtime; ...@@ -8,7 +8,7 @@ using namespace dgl::runtime;
namespace { namespace {
template <typename IDX> template <typename IDX>
aten::CSRMatrix CSR1() { aten::CSRMatrix CSR1(DLContext ctx = CTX) {
// [[0, 1, 1, 0, 0], // [[0, 1, 1, 0, 0],
// [1, 0, 0, 0, 0], // [1, 0, 0, 0, 0],
// [0, 0, 1, 1, 0], // [0, 0, 1, 1, 0],
...@@ -16,14 +16,14 @@ aten::CSRMatrix CSR1() { ...@@ -16,14 +16,14 @@ aten::CSRMatrix CSR1() {
// data: [0, 2, 3, 1, 4] // data: [0, 2, 3, 1, 4]
return aten::CSRMatrix( return aten::CSRMatrix(
4, 5, 4, 5,
aten::VecToIdArray(std::vector<IDX>({0, 2, 3, 5, 5}), sizeof(IDX)*8, CTX), aten::VecToIdArray(std::vector<IDX>({0, 2, 3, 5, 5}), sizeof(IDX)*8, ctx),
aten::VecToIdArray(std::vector<IDX>({1, 2, 0, 2, 3}), sizeof(IDX)*8, CTX), aten::VecToIdArray(std::vector<IDX>({1, 2, 0, 2, 3}), sizeof(IDX)*8, ctx),
aten::VecToIdArray(std::vector<IDX>({0, 2, 3, 1, 4}), sizeof(IDX)*8, CTX), aten::VecToIdArray(std::vector<IDX>({0, 2, 3, 1, 4}), sizeof(IDX)*8, ctx),
false); false);
} }
template <typename IDX> template <typename IDX>
aten::CSRMatrix CSR2() { aten::CSRMatrix CSR2(DLContext ctx = CTX) {
// has duplicate entries // has duplicate entries
// [[0, 1, 2, 0, 0], // [[0, 1, 2, 0, 0],
// [1, 0, 0, 0, 0], // [1, 0, 0, 0, 0],
...@@ -32,14 +32,14 @@ aten::CSRMatrix CSR2() { ...@@ -32,14 +32,14 @@ aten::CSRMatrix CSR2() {
// data: [0, 2, 5, 3, 1, 4] // data: [0, 2, 5, 3, 1, 4]
return aten::CSRMatrix( return aten::CSRMatrix(
4, 5, 4, 5,
aten::VecToIdArray(std::vector<IDX>({0, 3, 4, 6, 6}), sizeof(IDX)*8, CTX), aten::VecToIdArray(std::vector<IDX>({0, 3, 4, 6, 6}), sizeof(IDX)*8, ctx),
aten::VecToIdArray(std::vector<IDX>({1, 2, 2, 0, 2, 3}), sizeof(IDX)*8, CTX), aten::VecToIdArray(std::vector<IDX>({1, 2, 2, 0, 2, 3}), sizeof(IDX)*8, ctx),
aten::VecToIdArray(std::vector<IDX>({0, 2, 5, 3, 1, 4}), sizeof(IDX)*8, CTX), aten::VecToIdArray(std::vector<IDX>({0, 2, 5, 3, 1, 4}), sizeof(IDX)*8, ctx),
false); false);
} }
template <typename IDX> template <typename IDX>
aten::COOMatrix COO1() { aten::COOMatrix COO1(DLContext ctx = CTX) {
// [[0, 1, 1, 0, 0], // [[0, 1, 1, 0, 0],
// [1, 0, 0, 0, 0], // [1, 0, 0, 0, 0],
// [0, 0, 1, 1, 0], // [0, 0, 1, 1, 0],
...@@ -49,12 +49,12 @@ aten::COOMatrix COO1() { ...@@ -49,12 +49,12 @@ aten::COOMatrix COO1() {
// col : [1, 2, 2, 0, 3] // col : [1, 2, 2, 0, 3]
return aten::COOMatrix( return aten::COOMatrix(
4, 5, 4, 5,
aten::VecToIdArray(std::vector<IDX>({0, 2, 0, 1, 2}), sizeof(IDX)*8, CTX), aten::VecToIdArray(std::vector<IDX>({0, 2, 0, 1, 2}), sizeof(IDX)*8, ctx),
aten::VecToIdArray(std::vector<IDX>({1, 2, 2, 0, 3}), sizeof(IDX)*8, CTX)); aten::VecToIdArray(std::vector<IDX>({1, 2, 2, 0, 3}), sizeof(IDX)*8, ctx));
} }
template <typename IDX> template <typename IDX>
aten::COOMatrix COO2() { aten::COOMatrix COO2(DLContext ctx = CTX) {
// has duplicate entries // has duplicate entries
// [[0, 1, 2, 0, 0], // [[0, 1, 2, 0, 0],
// [1, 0, 0, 0, 0], // [1, 0, 0, 0, 0],
...@@ -65,40 +65,40 @@ aten::COOMatrix COO2() { ...@@ -65,40 +65,40 @@ aten::COOMatrix COO2() {
// col : [1, 2, 2, 0, 3, 2] // col : [1, 2, 2, 0, 3, 2]
return aten::COOMatrix( return aten::COOMatrix(
4, 5, 4, 5,
aten::VecToIdArray(std::vector<IDX>({0, 2, 0, 1, 2, 0}), sizeof(IDX)*8, CTX), aten::VecToIdArray(std::vector<IDX>({0, 2, 0, 1, 2, 0}), sizeof(IDX)*8, ctx),
aten::VecToIdArray(std::vector<IDX>({1, 2, 2, 0, 3, 2}), sizeof(IDX)*8, CTX)); aten::VecToIdArray(std::vector<IDX>({1, 2, 2, 0, 3, 2}), sizeof(IDX)*8, ctx));
} }
template <typename IDX> template <typename IDX>
aten::CSRMatrix SR_CSR3() { aten::CSRMatrix SR_CSR3(DLContext ctx) {
// [[0, 1, 2, 0, 0], // [[0, 1, 2, 0, 0],
// [1, 0, 0, 0, 0], // [1, 0, 0, 0, 0],
// [0, 0, 1, 1, 0], // [0, 0, 1, 1, 0],
// [0, 0, 0, 0, 0]] // [0, 0, 0, 0, 0]]
return aten::CSRMatrix( return aten::CSRMatrix(
4, 5, 4, 5,
aten::VecToIdArray(std::vector<IDX>({0, 3, 4, 6, 6}), sizeof(IDX)*8, CTX), aten::VecToIdArray(std::vector<IDX>({0, 3, 4, 6, 6}), sizeof(IDX)*8, ctx),
aten::VecToIdArray(std::vector<IDX>({2, 1, 2, 0, 2, 3}), sizeof(IDX)*8, CTX), aten::VecToIdArray(std::vector<IDX>({2, 1, 2, 0, 2, 3}), sizeof(IDX)*8, ctx),
aten::VecToIdArray(std::vector<IDX>({0, 2, 5, 3, 1, 4}), sizeof(IDX)*8, CTX), aten::VecToIdArray(std::vector<IDX>({0, 2, 5, 3, 1, 4}), sizeof(IDX)*8, ctx),
false); false);
} }
template <typename IDX> template <typename IDX>
aten::CSRMatrix SRC_CSR3() { aten::CSRMatrix SRC_CSR3(DLContext ctx) {
// [[0, 1, 2, 0, 0], // [[0, 1, 2, 0, 0],
// [1, 0, 0, 0, 0], // [1, 0, 0, 0, 0],
// [0, 0, 1, 1, 0], // [0, 0, 1, 1, 0],
// [0, 0, 0, 0, 0]] // [0, 0, 0, 0, 0]]
return aten::CSRMatrix( return aten::CSRMatrix(
4, 5, 4, 5,
aten::VecToIdArray(std::vector<IDX>({0, 3, 4, 6, 6}), sizeof(IDX)*8, CTX), aten::VecToIdArray(std::vector<IDX>({0, 3, 4, 6, 6}), sizeof(IDX)*8, ctx),
aten::VecToIdArray(std::vector<IDX>({1, 2, 2, 0, 2, 3}), sizeof(IDX)*8, CTX), aten::VecToIdArray(std::vector<IDX>({1, 2, 2, 0, 2, 3}), sizeof(IDX)*8, ctx),
aten::VecToIdArray(std::vector<IDX>({2, 0, 5, 3, 1, 4}), sizeof(IDX)*8, CTX), aten::VecToIdArray(std::vector<IDX>({2, 0, 5, 3, 1, 4}), sizeof(IDX)*8, ctx),
false); false);
} }
template <typename IDX> template <typename IDX>
aten::COOMatrix COO3() { aten::COOMatrix COO3(DLContext ctx) {
// has duplicate entries // has duplicate entries
// [[0, 1, 2, 0, 0], // [[0, 1, 2, 0, 0],
// [1, 0, 0, 0, 0], // [1, 0, 0, 0, 0],
...@@ -108,11 +108,11 @@ aten::COOMatrix COO3() { ...@@ -108,11 +108,11 @@ aten::COOMatrix COO3() {
// col : [2, 2, 1, 0, 3, 2] // col : [2, 2, 1, 0, 3, 2]
return aten::COOMatrix( return aten::COOMatrix(
4, 5, 4, 5,
aten::VecToIdArray(std::vector<IDX>({0, 2, 0, 1, 2, 0}), sizeof(IDX)*8, CTX), aten::VecToIdArray(std::vector<IDX>({0, 2, 0, 1, 2, 0}), sizeof(IDX)*8, ctx),
aten::VecToIdArray(std::vector<IDX>({2, 2, 1, 0, 3, 2}), sizeof(IDX)*8, CTX)); aten::VecToIdArray(std::vector<IDX>({2, 2, 1, 0, 3, 2}), sizeof(IDX)*8, ctx));
} }
} } // namespace
template <typename IDX> template <typename IDX>
void _TestCSRIsNonZero() { void _TestCSRIsNonZero() {
...@@ -227,8 +227,8 @@ TEST(SpmatTest, TestCSRGetDataAndIndices) { ...@@ -227,8 +227,8 @@ TEST(SpmatTest, TestCSRGetDataAndIndices) {
} }
template <typename IDX> template <typename IDX>
void _TestCSRTranspose() { void _TestCSRTranspose(DLContext ctx) {
auto csr = CSR2<IDX>(); auto csr = CSR2<IDX>(ctx);
auto csr_t = aten::CSRTranspose(csr); auto csr_t = aten::CSRTranspose(csr);
// [[0, 1, 0, 0], // [[0, 1, 0, 0],
// [1, 0, 0, 0], // [1, 0, 0, 0],
...@@ -238,29 +238,32 @@ void _TestCSRTranspose() { ...@@ -238,29 +238,32 @@ void _TestCSRTranspose() {
// data: [3, 0, 2, 5, 1, 4] // data: [3, 0, 2, 5, 1, 4]
ASSERT_EQ(csr_t.num_rows, 5); ASSERT_EQ(csr_t.num_rows, 5);
ASSERT_EQ(csr_t.num_cols, 4); ASSERT_EQ(csr_t.num_cols, 4);
auto tp = aten::VecToIdArray(std::vector<IDX>({0, 1, 2, 5, 6, 6}), sizeof(IDX)*8, CTX); auto tp = aten::VecToIdArray(std::vector<IDX>({0, 1, 2, 5, 6, 6}), sizeof(IDX)*8, ctx);
auto ti = aten::VecToIdArray(std::vector<IDX>({1, 0, 0, 0, 2, 2}), sizeof(IDX)*8, CTX); auto ti = aten::VecToIdArray(std::vector<IDX>({1, 0, 0, 0, 2, 2}), sizeof(IDX)*8, ctx);
auto td = aten::VecToIdArray(std::vector<IDX>({3, 0, 2, 5, 1, 4}), sizeof(IDX)*8, CTX); auto td = aten::VecToIdArray(std::vector<IDX>({3, 0, 2, 5, 1, 4}), sizeof(IDX)*8, ctx);
ASSERT_TRUE(ArrayEQ<IDX>(csr_t.indptr, tp)); ASSERT_TRUE(ArrayEQ<IDX>(csr_t.indptr, tp));
ASSERT_TRUE(ArrayEQ<IDX>(csr_t.indices, ti)); ASSERT_TRUE(ArrayEQ<IDX>(csr_t.indices, ti));
ASSERT_TRUE(ArrayEQ<IDX>(csr_t.data, td)); ASSERT_TRUE(ArrayEQ<IDX>(csr_t.data, td));
} }
TEST(SpmatTest, TestCSRTranspose) { TEST(SpmatTest, TestCSRTranspose) {
_TestCSRTranspose<int32_t>(); _TestCSRTranspose<int32_t>(CPU);
_TestCSRTranspose<int64_t>(); _TestCSRTranspose<int64_t>(CPU);
#ifdef DGL_USE_CUDA
_TestCSRTranspose<int32_t>(GPU);
#endif
} }
template <typename IDX> template <typename IDX>
void _TestCSRToCOO() { void _TestCSRToCOO(DLContext ctx) {
auto csr = CSR2<IDX>(); auto csr = CSR2<IDX>(ctx);
{ {
auto coo = CSRToCOO(csr, false); auto coo = CSRToCOO(csr, false);
ASSERT_EQ(coo.num_rows, 4); ASSERT_EQ(coo.num_rows, 4);
ASSERT_EQ(coo.num_cols, 5); ASSERT_EQ(coo.num_cols, 5);
auto tr = aten::VecToIdArray(std::vector<IDX>({0, 0, 0, 1, 2, 2}), sizeof(IDX)*8, CTX); auto tr = aten::VecToIdArray(std::vector<IDX>({0, 0, 0, 1, 2, 2}), sizeof(IDX)*8, ctx);
auto tc = aten::VecToIdArray(std::vector<IDX>({1, 2, 2, 0, 2, 3}), sizeof(IDX)*8, CTX); auto tc = aten::VecToIdArray(std::vector<IDX>({1, 2, 2, 0, 2, 3}), sizeof(IDX)*8, ctx);
auto td = aten::VecToIdArray(std::vector<IDX>({0, 2, 5, 3, 1, 4}), sizeof(IDX)*8, CTX); auto td = aten::VecToIdArray(std::vector<IDX>({0, 2, 5, 3, 1, 4}), sizeof(IDX)*8, ctx);
ASSERT_TRUE(ArrayEQ<IDX>(coo.row, tr)); ASSERT_TRUE(ArrayEQ<IDX>(coo.row, tr));
ASSERT_TRUE(ArrayEQ<IDX>(coo.col, tc)); ASSERT_TRUE(ArrayEQ<IDX>(coo.col, tc));
ASSERT_TRUE(ArrayEQ<IDX>(coo.data, td)); ASSERT_TRUE(ArrayEQ<IDX>(coo.data, td));
...@@ -269,15 +272,18 @@ void _TestCSRToCOO() { ...@@ -269,15 +272,18 @@ void _TestCSRToCOO() {
auto coo = CSRToCOO(csr, true); auto coo = CSRToCOO(csr, true);
ASSERT_EQ(coo.num_rows, 4); ASSERT_EQ(coo.num_rows, 4);
ASSERT_EQ(coo.num_cols, 5); ASSERT_EQ(coo.num_cols, 5);
auto tcoo = COO2<IDX>(); auto tcoo = COO2<IDX>(ctx);
ASSERT_TRUE(ArrayEQ<IDX>(coo.row, tcoo.row)); ASSERT_TRUE(ArrayEQ<IDX>(coo.row, tcoo.row));
ASSERT_TRUE(ArrayEQ<IDX>(coo.col, tcoo.col)); ASSERT_TRUE(ArrayEQ<IDX>(coo.col, tcoo.col));
} }
} }
TEST(SpmatTest, TestCSRToCOO) { TEST(SpmatTest, TestCSRToCOO) {
_TestCSRToCOO<int32_t>(); _TestCSRToCOO<int32_t>(CPU);
_TestCSRToCOO<int64_t>(); _TestCSRToCOO<int64_t>(CPU);
#if DGL_USE_CUDA
_TestCSRToCOO<int32_t>(GPU);
#endif
} }
template <typename IDX> template <typename IDX>
...@@ -355,48 +361,40 @@ TEST(SpmatTest, TestCSRHasDuplicate) { ...@@ -355,48 +361,40 @@ TEST(SpmatTest, TestCSRHasDuplicate) {
} }
template <typename IDX> template <typename IDX>
void _TestCOOToCSR() { void _TestCOOToCSR(DLContext ctx) {
auto coo = COO1<IDX>(); auto coo = COO1<IDX>(ctx);
auto csr = CSR1<IDX>(); auto csr = CSR1<IDX>(ctx);
auto tcsr = aten::COOToCSR(coo); auto tcsr = aten::COOToCSR(coo);
ASSERT_EQ(coo.num_rows, csr.num_rows); ASSERT_EQ(coo.num_rows, csr.num_rows);
ASSERT_EQ(coo.num_cols, csr.num_cols); ASSERT_EQ(coo.num_cols, csr.num_cols);
ASSERT_TRUE(ArrayEQ<IDX>(csr.indptr, tcsr.indptr)); ASSERT_TRUE(ArrayEQ<IDX>(csr.indptr, tcsr.indptr));
ASSERT_TRUE(ArrayEQ<IDX>(csr.indices, tcsr.indices));
ASSERT_TRUE(ArrayEQ<IDX>(csr.data, tcsr.data));
coo = COO2<IDX>(); coo = COO2<IDX>(ctx);
csr = CSR2<IDX>(); csr = CSR2<IDX>(ctx);
tcsr = aten::COOToCSR(coo); tcsr = aten::COOToCSR(coo);
ASSERT_EQ(coo.num_rows, csr.num_rows); ASSERT_EQ(coo.num_rows, csr.num_rows);
ASSERT_EQ(coo.num_cols, csr.num_cols); ASSERT_EQ(coo.num_cols, csr.num_cols);
ASSERT_TRUE(ArrayEQ<IDX>(csr.indptr, tcsr.indptr)); ASSERT_TRUE(ArrayEQ<IDX>(csr.indptr, tcsr.indptr));
ASSERT_TRUE(ArrayEQ<IDX>(csr.indices, tcsr.indices));
ASSERT_TRUE(ArrayEQ<IDX>(csr.data, tcsr.data));
coo = COO1<IDX>(); coo = COO1<IDX>(ctx);
auto rs_coo = aten::COOSort(coo, false); auto rs_coo = aten::COOSort(coo, false);
auto rs_csr = CSR1<IDX>(); auto rs_csr = CSR1<IDX>(ctx);
auto rs_tcsr = aten::COOToCSR(rs_coo); auto rs_tcsr = aten::COOToCSR(rs_coo);
ASSERT_EQ(coo.num_rows, rs_tcsr.num_rows); ASSERT_EQ(coo.num_rows, rs_tcsr.num_rows);
ASSERT_EQ(coo.num_cols, rs_tcsr.num_cols); ASSERT_EQ(coo.num_cols, rs_tcsr.num_cols);
ASSERT_TRUE(ArrayEQ<IDX>(rs_csr.indptr, rs_tcsr.indptr)); ASSERT_TRUE(ArrayEQ<IDX>(rs_csr.indptr, rs_tcsr.indptr));
ASSERT_TRUE(ArrayEQ<IDX>(rs_csr.indices, rs_tcsr.indices));
ASSERT_TRUE(ArrayEQ<IDX>(rs_csr.data, rs_tcsr.data));
coo = COO3<IDX>(); coo = COO3<IDX>(ctx);
rs_coo = aten::COOSort(coo, false); rs_coo = aten::COOSort(coo, false);
rs_csr = SR_CSR3<IDX>(); rs_csr = SR_CSR3<IDX>(ctx);
rs_tcsr = aten::COOToCSR(rs_coo); rs_tcsr = aten::COOToCSR(rs_coo);
ASSERT_EQ(coo.num_rows, rs_tcsr.num_rows); ASSERT_EQ(coo.num_rows, rs_tcsr.num_rows);
ASSERT_EQ(coo.num_cols, rs_tcsr.num_cols); ASSERT_EQ(coo.num_cols, rs_tcsr.num_cols);
ASSERT_TRUE(ArrayEQ<IDX>(rs_csr.indptr, rs_tcsr.indptr)); ASSERT_TRUE(ArrayEQ<IDX>(rs_csr.indptr, rs_tcsr.indptr));
ASSERT_TRUE(ArrayEQ<IDX>(rs_csr.indices, rs_tcsr.indices));
ASSERT_TRUE(ArrayEQ<IDX>(rs_csr.data, rs_tcsr.data));
coo = COO1<IDX>(); coo = COO1<IDX>(ctx);
auto src_coo = aten::COOSort(coo, true); auto src_coo = aten::COOSort(coo, true);
auto src_csr = CSR1<IDX>(); auto src_csr = CSR1<IDX>(ctx);
auto src_tcsr = aten::COOToCSR(src_coo); auto src_tcsr = aten::COOToCSR(src_coo);
ASSERT_EQ(coo.num_rows, src_tcsr.num_rows); ASSERT_EQ(coo.num_rows, src_tcsr.num_rows);
ASSERT_EQ(coo.num_cols, src_tcsr.num_cols); ASSERT_EQ(coo.num_cols, src_tcsr.num_cols);
...@@ -404,9 +402,9 @@ void _TestCOOToCSR() { ...@@ -404,9 +402,9 @@ void _TestCOOToCSR() {
ASSERT_TRUE(ArrayEQ<IDX>(src_csr.indices, src_tcsr.indices)); ASSERT_TRUE(ArrayEQ<IDX>(src_csr.indices, src_tcsr.indices));
ASSERT_TRUE(ArrayEQ<IDX>(src_csr.data, src_tcsr.data)); ASSERT_TRUE(ArrayEQ<IDX>(src_csr.data, src_tcsr.data));
coo = COO3<IDX>(); coo = COO3<IDX>(ctx);
src_coo = aten::COOSort(coo, true); src_coo = aten::COOSort(coo, true);
src_csr = SRC_CSR3<IDX>(); src_csr = SRC_CSR3<IDX>(ctx);
src_tcsr = aten::COOToCSR(src_coo); src_tcsr = aten::COOToCSR(src_coo);
ASSERT_EQ(coo.num_rows, src_tcsr.num_rows); ASSERT_EQ(coo.num_rows, src_tcsr.num_rows);
ASSERT_EQ(coo.num_cols, src_tcsr.num_cols); ASSERT_EQ(coo.num_cols, src_tcsr.num_cols);
...@@ -416,8 +414,11 @@ void _TestCOOToCSR() { ...@@ -416,8 +414,11 @@ void _TestCOOToCSR() {
} }
TEST(SpmatTest, TestCOOToCSR) { TEST(SpmatTest, TestCOOToCSR) {
_TestCOOToCSR<int32_t>(); _TestCOOToCSR<int32_t>(CPU);
_TestCOOToCSR<int64_t>(); _TestCOOToCSR<int64_t>(CPU);
#ifdef DGL_USE_CUDA
_TestCOOToCSR<int32_t>(GPU);
#endif
} }
template <typename IDX> template <typename IDX>
...@@ -434,8 +435,8 @@ TEST(SpmatTest, TestCOOHasDuplicate) { ...@@ -434,8 +435,8 @@ TEST(SpmatTest, TestCOOHasDuplicate) {
} }
template <typename IDX> template <typename IDX>
void _TestCOOSort() { void _TestCOOSort(DLContext ctx) {
auto coo = COO3<IDX>(); auto coo = COO3<IDX>(ctx);
auto sr_coo = COOSort(coo, false); auto sr_coo = COOSort(coo, false);
ASSERT_EQ(coo.num_rows, sr_coo.num_rows); ASSERT_EQ(coo.num_rows, sr_coo.num_rows);
ASSERT_EQ(coo.num_cols, sr_coo.num_cols); ASSERT_EQ(coo.num_cols, sr_coo.num_cols);
...@@ -460,25 +461,22 @@ void _TestCOOSort() { ...@@ -460,25 +461,22 @@ void _TestCOOSort() {
// row : [0, 0, 0, 1, 2, 2] // row : [0, 0, 0, 1, 2, 2]
// col : [1, 2, 2, 0, 2, 3] // col : [1, 2, 2, 0, 2, 3]
auto sort_row = aten::VecToIdArray( auto sort_row = aten::VecToIdArray(
std::vector<IDX>({0, 0, 0, 1, 2, 2}), sizeof(IDX)*8, CTX); std::vector<IDX>({0, 0, 0, 1, 2, 2}), sizeof(IDX)*8, ctx);
auto unsort_col = aten::VecToIdArray(
std::vector<IDX>({2, 1, 2, 0, 2, 3}), sizeof(IDX)*8, CTX);
auto unsort_col_data = aten::VecToIdArray(
std::vector<IDX>({0, 2, 5, 3, 1, 4}), sizeof(IDX)*8, CTX);
auto sort_col = aten::VecToIdArray( auto sort_col = aten::VecToIdArray(
std::vector<IDX>({1, 2, 2, 0, 2, 3}), sizeof(IDX)*8, CTX); std::vector<IDX>({1, 2, 2, 0, 2, 3}), sizeof(IDX)*8, ctx);
auto sort_col_data = aten::VecToIdArray( auto sort_col_data = aten::VecToIdArray(
std::vector<IDX>({2, 0, 5, 3, 1, 4}), sizeof(IDX)*8, CTX); std::vector<IDX>({2, 0, 5, 3, 1, 4}), sizeof(IDX)*8, ctx);
ASSERT_TRUE(ArrayEQ<IDX>(sr_coo.row, sort_row)); ASSERT_TRUE(ArrayEQ<IDX>(sr_coo.row, sort_row));
ASSERT_TRUE(ArrayEQ<IDX>(sr_coo.col, unsort_col));
ASSERT_TRUE(ArrayEQ<IDX>(sr_coo.data, unsort_col_data));
ASSERT_TRUE(ArrayEQ<IDX>(src_coo.row, sort_row)); ASSERT_TRUE(ArrayEQ<IDX>(src_coo.row, sort_row));
ASSERT_TRUE(ArrayEQ<IDX>(src_coo.col, sort_col)); ASSERT_TRUE(ArrayEQ<IDX>(src_coo.col, sort_col));
ASSERT_TRUE(ArrayEQ<IDX>(src_coo.data, sort_col_data)); ASSERT_TRUE(ArrayEQ<IDX>(src_coo.data, sort_col_data));
} }
TEST(SpmatTest, TestCOOSort) { TEST(SpmatTest, TestCOOSort) {
_TestCOOSort<int32_t>(); _TestCOOSort<int32_t>(CPU);
_TestCOOSort<int64_t>(); _TestCOOSort<int64_t>(CPU);
#ifdef DGL_USE_CUDA
_TestCOOSort<int32_t>(GPU);
#endif
} }
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment