Unverified Commit 06438d70 authored by czkkkkkk's avatar czkkkkkk Committed by GitHub
Browse files

[Sparse] Link to DGL (#4877)

parent 6e1cc7da
......@@ -251,6 +251,9 @@ if(NOT MSVC)
target_include_directories(dgl PRIVATE third_party/tensorpipe)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall")
# Avoid exposing third-party symbols when using DGL as a library.
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wl,--exclude-libs,ALL")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wl,--exclude-libs,ALL")
endif(NOT MSVC)
# Compile TVM Runtime and Featgraph
......@@ -315,24 +318,6 @@ if(BUILD_TORCH)
add_dependencies(dgl tensoradapter_pytorch)
endif(BUILD_TORCH)
if(BUILD_SPARSE)
file(TO_NATIVE_PATH ${CMAKE_CURRENT_BINARY_DIR} BINDIR)
file(TO_NATIVE_PATH ${CMAKE_COMMAND} CMAKE_CMD)
# TODO(zhenkun): MSVC support?
file(TO_NATIVE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/dgl_sparse/build.sh BUILD_SCRIPT)
add_custom_target(
dgl_sparse
${CMAKE_COMMAND} -E env
CMAKE_COMMAND=${CMAKE_CMD}
CUDA_TOOLKIT_ROOT_DIR=${CUDA_TOOLKIT_ROOT_DIR}
USE_CUDA=${USE_CUDA}
BINDIR=${CMAKE_CURRENT_BINARY_DIR}
bash ${BUILD_SCRIPT} ${TORCH_PYTHON_INTERPS}
DEPENDS ${BUILD_SCRIPT}
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/dgl_sparse)
add_dependencies(dgl dgl_sparse)
endif(BUILD_SPARSE)
# Installation rules
install(TARGETS dgl DESTINATION lib${LIB_SUFFIX})
......@@ -364,3 +349,8 @@ if(BUILD_CPP_TEST)
target_link_libraries(rpc_server dgl)
endif(NOT MSVC)
endif(BUILD_CPP_TEST)
if(BUILD_SPARSE)
set(DGL_INCLUDE "${CMAKE_CURRENT_SOURCE_DIR}/include")
add_subdirectory(dgl_sparse)
endif(BUILD_SPARSE)
......@@ -260,7 +260,7 @@ macro(dgl_config_cuda out_variable)
if(MSVC AND NOT USE_MSVC_MT)
string(CONCAT CXX_HOST_FLAGS ${CXX_HOST_FLAGS} ",/MD")
endif()
list(APPEND CUDA_NVCC_FLAGS "-Xcompiler ,${CXX_HOST_FLAGS}")
list(APPEND CUDA_NVCC_FLAGS "-Xcompiler" "${CXX_HOST_FLAGS}")
# 1. Add arch flags
dgl_select_nvcc_arch_flags(NVCC_FLAGS_ARCH)
......
cmake_minimum_required(VERSION 3.5)
project(dgl_sparse C CXX)
# Find PyTorch cmake files and PyTorch versions with the python interpreter $PYTHON_INTERP
# Find PyTorch cmake files and PyTorch versions with the python interpreter $TORCH_PYTHON_INTERPS
# ("python3" or "python" if empty)
if(NOT PYTHON_INTERP)
find_program(PYTHON_INTERP NAMES python3 python)
if(NOT TORCH_PYTHON_INTERPS)
find_program(TORCH_PYTHON_INTERPS NAMES python3 python)
endif()
message(STATUS "Using Python interpreter: ${PYTHON_INTERP}")
message(STATUS "Using Python interpreter: ${TORCH_PYTHON_INTERPS}")
file(TO_NATIVE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/find_cmake.py FIND_CMAKE_PY)
execute_process(
COMMAND ${PYTHON_INTERP} ${FIND_CMAKE_PY}
COMMAND ${TORCH_PYTHON_INTERPS} ${FIND_CMAKE_PY}
OUTPUT_VARIABLE TORCH_PREFIX_VER
OUTPUT_STRIP_TRAILING_WHITESPACE)
message(STATUS "find_cmake.py output: ${TORCH_PREFIX_VER}")
......@@ -27,8 +24,8 @@ find_package(Torch REQUIRED)
set(LIB_DGL_SPARSE_NAME "dgl_sparse_pytorch_${TORCH_VER}")
set(SPARSE_DIR "${CMAKE_SOURCE_DIR}/src")
set(SPARSE_INCLUDE "${CMAKE_SOURCE_DIR}/include")
set(SPARSE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/src")
set(SPARSE_INCLUDE "${CMAKE_CURRENT_SOURCE_DIR}/include")
file(GLOB SPARSE_HEADERS ${SPARSE_INCLUDE})
file(GLOB SPARSE_SRC
${SPARSE_DIR}/*.cc
......@@ -38,7 +35,10 @@ target_include_directories(
${LIB_DGL_SPARSE_NAME} PRIVATE ${SPARSE_DIR} ${SPARSE_HEADERS})
target_link_libraries(${LIB_DGL_SPARSE_NAME} "${TORCH_LIBRARIES}")
add_subdirectory("${CMAKE_SOURCE_DIR}/../third_party/dmlc-core" "${CMAKE_SOURCE_DIR}/build/third_party/dmlc-core")
target_include_directories(${LIB_DGL_SPARSE_NAME} PRIVATE "${CMAKE_SOURCE_DIR}/../third_party/dmlc-core/include")
target_include_directories(${LIB_DGL_SPARSE_NAME} PRIVATE "${CMAKE_SOURCE_DIR}/third_party/dmlc-core/include")
target_link_libraries(${LIB_DGL_SPARSE_NAME} dmlc)
set(GOOGLE_TEST 0) # Turn off dmlc-core test
target_include_directories(${LIB_DGL_SPARSE_NAME} PRIVATE ${DGL_INCLUDE})
target_link_libraries(${LIB_DGL_SPARSE_NAME} dgl)
/**
* Copyright (c) 2022 by Contributors
* @file dgl_headers.h
* @brief DGL headers used in the sparse library. This is a workaround to
* avoid the macro naming conflict between dmlc/logging.h and torch logger. This
* file includes all the DGL headers used in the sparse library and
* undefines logging macros defined in dmlc/logging.h. There are two rules to
* use this file. (1) All DGL headers used in the sparse library should be and
* only be registered in this file. (2) When including Pytorch headers, this
* file should be included in advance.
*/
#ifndef SPARSE_DGL_HEADERS_H_
#define SPARSE_DGL_HEADERS_H_
#include <dgl/aten/coo.h>
#include <dgl/aten/csr.h>
#include <dgl/runtime/dlpack_convert.h>
#include <dmlc/logging.h>
#undef CHECK
#undef CHECK_OP
#undef CHECK_EQ
#undef CHECK_NE
#undef CHECK_LE
#undef CHECK_LT
#undef CHECK_GE
#undef CHECK_GT
#undef CHECK_NOTNULL
#undef DCHECK
#undef DCHECK_EQ
#undef DCHECK_NE
#undef DCHECK_LE
#undef DCHECK_LT
#undef DCHECK_GE
#undef DCHECK_GT
#undef DCHECK_NOTNULL
#undef VLOG
#undef LOG
#undef DLOG
#undef LOG_IF
#endif // SPARSE_DGL_HEADERS_H_
/**
* Copyright (c) 2022 by Contributors
* @file sparse/elementwise_op.h
* @brief DGL C++ sparse elementwise operators
* @brief DGL C++ sparse elementwise operators.
*/
#ifndef SPARSE_ELEMENTWISE_OP_H_
#define SPARSE_ELEMENTWISE_OP_H_
#include <sparse/sparse_matrix.h>
#include <torch/custom_class.h>
namespace dgl {
namespace sparse {
......
/**
* Copyright (c) 2022 by Contributors
* @file sparse/sparse_format.h
* @brief DGL C++ sparse format header.
*/
#ifndef SPARSE_SPARSE_FORMAT_H_
#define SPARSE_SPARSE_FORMAT_H_
// clang-format off
#include <sparse/dgl_headers.h>
// clang-format on
#include <torch/custom_class.h>
#include <torch/script.h>
#include <memory>
namespace dgl {
namespace sparse {
/** @brief SparseFormat enumeration */
enum SparseFormat { kCOO, kCSR, kCSC };
/** @brief CSR sparse structure */
struct CSR {
// CSR format index pointer array of the matrix
torch::Tensor indptr;
// CSR format index array of the matrix
torch::Tensor indices;
// The element order of the sparse format. In the SparseMatrix, we have data
// (value_) for each non-zero value. The order of non-zero values in (value_)
// may differ from the order of non-zero entries in CSR. So we store
// `value_indices` in CSR to indicate its relative non-zero value order to the
// SparseMatrix. With `value_indices`, we can retrieve the correct value for
// CSR, i.e., `value_[value_indices]`. If `value_indices` is not defined, this
// CSR follows the same non-zero value order as the SparseMatrix.
torch::optional<torch::Tensor> value_indices;
};
/** @brief COO sparse structure */
struct COO {
// COO format row array of the matrix
torch::Tensor row;
// COO format column array of the matrix
torch::Tensor col;
};
/**
* @brief Convert a CSR format to COO format
* @param num_rows Number of rows of the sparse format
* @param num_cols Number of cols of the sparse format
* @param csr CSR sparse format
* @return COO sparse format
*/
std::shared_ptr<COO> CSRToCOO(
int64_t num_rows, int64_t num_cols, const std::shared_ptr<CSR> csr);
} // namespace sparse
} // namespace dgl
#endif // SPARSE_SPARSE_FORMAT_H_
/**
* Copyright (c) 2022 by Contributors
* @file sparse/sparse_matrix.h
* @brief DGL C++ sparse matrix header
* @brief DGL C++ sparse matrix header.
*/
#ifndef SPARSE_SPARSE_MATRIX_H_
#define SPARSE_SPARSE_MATRIX_H_
// clang-format off
#include <sparse/dgl_headers.h>
// clang-format on
#include <sparse/sparse_format.h>
#include <torch/custom_class.h>
#include <torch/script.h>
......@@ -15,33 +20,6 @@
namespace dgl {
namespace sparse {
/** @brief SparseFormat enumeration */
enum SparseFormat { kCOO, kCSR, kCSC };
/** @brief CSR sparse structure */
struct CSR {
// CSR format index pointer array of the matrix
torch::Tensor indptr;
// CSR format index array of the matrix
torch::Tensor indices;
// The element order of the sparse format. In the SparseMatrix, we have data
// (value_) for each non-zero value. The order of non-zero values in (value_)
// may differ from the order of non-zero entries in CSR. So we store
// `value_indices` in CSR to indicate its relative non-zero value order to the
// SparseMatrix. With `value_indices`, we can retrieve the correct value for
// CSR, i.e., `value_[value_indices]`. If `value_indices` is not defined, this
// CSR follows the same non-zero value order as the SparseMatrix.
torch::optional<torch::Tensor> value_indices;
};
/** @brief COO sparse structure */
struct COO {
// COO format row array of the matrix
torch::Tensor row;
// COO format column array of the matrix
torch::Tensor col;
};
/** @brief SparseMatrix bound to Python */
class SparseMatrix : public torch::CustomClassHolder {
public:
......
/**
* Copyright (c) 2022 by Contributors
* @file elementwise_op.cc
* @brief DGL C++ sparse elementwise operator implementation
* @brief DGL C++ sparse elementwise operator implementation.
*/
#include <dmlc/logging.h>
// clang-format off
#include <sparse/dgl_headers.h>
// clang-format on
#include <sparse/elementwise_op.h>
#include <sparse/sparse_matrix.h>
#include <torch/custom_class.h>
#include <torch/script.h>
#include <memory>
......
/**
* Copyright (c) 2022 by Contributors
* @file python_binding.cc
* @brief DGL sparse library Python binding
* @brief DGL sparse library Python binding.
*/
// clang-format off
#include <sparse/dgl_headers.h>
// clang-format on
#include <sparse/elementwise_op.h>
#include <sparse/sparse_matrix.h>
#include <torch/custom_class.h>
......
/**
* Copyright (c) 2022 by Contributors
* @file sparse_format.cc
* @brief DGL C++ sparse format implementations.
*/
// clang-format off
#include <sparse/dgl_headers.h>
// clang-format on
#include <sparse/sparse_format.h>
#include "./utils.h"
namespace dgl {
namespace sparse {
std::shared_ptr<COO> CSRToCOO(
int64_t num_rows, int64_t num_cols, const std::shared_ptr<CSR> csr) {
auto indptr = TorchTensorToDGLArray(csr->indptr);
auto indices = TorchTensorToDGLArray(csr->indices);
bool data_as_order = false;
runtime::NDArray data = aten::NullArray();
if (csr->value_indices.has_value()) {
data_as_order = true;
data = TorchTensorToDGLArray(csr->value_indices.value());
}
auto dgl_csr = aten::CSRMatrix(num_rows, num_cols, indptr, indices, data);
auto dgl_coo = aten::CSRToCOO(dgl_csr, data_as_order);
auto row = DGLArrayToTorchTensor(dgl_coo.row);
auto col = DGLArrayToTorchTensor(dgl_coo.col);
return std::make_shared<COO>(COO{row, col});
}
} // namespace sparse
} // namespace dgl
/**
* Copyright (c) 2022 by Contributors
* @file sparse_matrix.cc
* @brief DGL C++ sparse matrix implementations
* @brief DGL C++ sparse matrix implementations.
*/
#include <dmlc/logging.h>
// clang-format off
#include <sparse/dgl_headers.h>
// clang-format on
#include <c10/util/Logging.h>
#include <sparse/elementwise_op.h>
#include <sparse/sparse_matrix.h>
#include <torch/script.h>
namespace dgl {
namespace sparse {
......@@ -114,8 +119,19 @@ std::vector<torch::Tensor> SparseMatrix::CSCTensors() {
void SparseMatrix::SetValue(torch::Tensor value) { value_ = value; }
// TODO(zhenkun): format conversion
void SparseMatrix::_CreateCOO() {}
void SparseMatrix::_CreateCOO() {
if (HasCOO()) {
return;
}
if (HasCSR()) {
coo_ = CSRToCOO(shape_[0], shape_[1], csr_);
} else if (HasCSC()) {
// TODO(zhenkun)
} else {
LOG(FATAL) << "SparseMatrix does not have any sparse format";
}
}
void SparseMatrix::_CreateCSR() {}
void SparseMatrix::_CreateCSC() {}
......
......@@ -6,8 +6,14 @@
#ifndef DGL_SPARSE_UTILS_H_
#define DGL_SPARSE_UTILS_H_
#include <dmlc/logging.h>
// clang-format off
#include <sparse/dgl_headers.h>
// clang-format on
#include <ATen/DLConvertor.h>
#include <sparse/sparse_matrix.h>
#include <torch/custom_class.h>
#include <torch/script.h>
namespace dgl {
namespace sparse {
......@@ -45,6 +51,16 @@ inline static void ElementwiseOpSanityCheck(
<< ", " << B->shape()[1] << "])";
}
/** @brief Convert a Torch tensor to a DGL array. */
inline static runtime::NDArray TorchTensorToDGLArray(torch::Tensor tensor) {
return runtime::DLPackConvert::FromDLPack(at::toDLPack(tensor));
}
/** @brief Convert a DGL array to a Torch tensor. */
inline static torch::Tensor DGLArrayToTorchTensor(runtime::NDArray array) {
return at::fromDLPack(runtime::DLPackConvert::ToDLPack(array));
}
} // namespace sparse
} // namespace dgl
......
......@@ -122,3 +122,33 @@ def test_set_val():
new_val = torch.zeros(nnz).to(ctx)
A.val = new_val
assert torch.allclose(new_val, A.val)
@pytest.mark.parametrize("dense_dim", [None, 4])
@pytest.mark.parametrize("indptr", [[0, 0, 1, 4], (0, 1, 2, 4)])
@pytest.mark.parametrize("indices", [(0, 1, 2, 3), (1, 4, 3, 2)])
@pytest.mark.parametrize("shape", [None, (3, 5)])
def test_csr_to_coo(dense_dim, indptr, indices, shape):
ctx = F.ctx()
val_shape = (len(indices),)
if dense_dim is not None:
val_shape += (dense_dim,)
val = torch.randn(val_shape).to(ctx)
indptr = torch.tensor(indptr).to(ctx)
indices = torch.tensor(indices).to(ctx)
mat = create_from_csr(indptr, indices, val, shape)
if shape is None:
shape = (indptr.numel() - 1, torch.max(indices).item() + 1)
row = torch.arange(0, indptr.shape[0] - 1).to(ctx).repeat_interleave(torch.diff(indptr))
col = indices
mat_row, mat_col, mat_val = mat.coo()
assert mat.shape == shape
assert mat.nnz == row.numel()
assert mat.device == row.device
assert mat.dtype == val.dtype
assert torch.allclose(mat_val, val)
assert torch.allclose(mat_row, row)
assert torch.allclose(mat_col, col)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment