Unverified Commit ab2bd1f1 authored by Israt Nisa's avatar Israt Nisa Committed by GitHub
Browse files

[Feature] Add cuda support for Sparse Matrix multiplication, summation and masking (#2782)



* init cuda support

* cuSPARSE err

* passed unittest for csr_mm/SpGEMM. int64 not supported

* Debugging cuSPARSE error 3

* csrgeam only supports int32?

* disabling int64 for cuda

* refactor and add CSRMask

* lint

* oops

* remove todo

* rewrite CSRMask with CSRGetData

* lint

* fix test

* address comments

* lint

* fix

* addresses comments and rename BUG_ON
Co-authored-by: default avatarIsrat Nisa <nisisrat@amazon.com>
Co-authored-by: default avatarUbuntu <ubuntu@ip-172-31-30-71.ec2.internal>
Co-authored-by: default avatarQuan Gan <coin2028@hotmail.com>
Co-authored-by: default avatarJinjing Zhou <VoVAllen@users.noreply.github.com>
Co-authored-by: default avatarMinjie Wang <wmjlyjemaine@gmail.com>
parent e18c2ab4
......@@ -96,7 +96,14 @@ void BackwardSegmentCmp(NDArray feat,
/*!
* \brief Sparse-sparse matrix multiplication
*
* \note B is transposed (i.e. in CSC format).
* \param A The left operand.
* \param A_weights The weights of matrix as a 1D tensor.
* \param B The right operand.
* \param B_weights The weights of matrix as a 1D tensor.
*
* \note GPU implementation will cast the indices to 32 bit.
* \note The zero entries in the result are not removed.
* \note The CSR matrix should not have duplicate entries.
*/
template <int XPU, typename IdType, typename DType>
std::pair<CSRMatrix, NDArray> CSRMM(
......@@ -107,18 +114,19 @@ std::pair<CSRMatrix, NDArray> CSRMM(
/*!
* \brief Sparse-sparse matrix summation.
*
* \param A The sparse matrices with the same size.
* \param A_weights The weights of each sparse matrix as a 1D tensor.
*
* \note GPU implementation will cast the indices to 32 bit.
* \note The zero entries in the result are not removed.
* \note The CSR matrix should not have duplicate entries.
*/
template <int XPU, typename IdType, typename DType>
std::pair<CSRMatrix, NDArray> CSRSum(
const std::vector<CSRMatrix>& A,
const std::vector<NDArray>& A_weights);
/*!
* \brief Return a sparse matrix with the values of A but nonzero entry locations of B.
*/
template <int XPU, typename IdType, typename DType>
NDArray CSRMask(const CSRMatrix& A, NDArray A_weights, const CSRMatrix& B);
} // namespace aten
} // namespace dgl
......
......@@ -737,12 +737,12 @@ IdArray MapIds(IdArray ids, IdArray range_starts, IdArray range_ends, IdArray ty
IdType id = ids_data[i];
auto it = std::lower_bound(range_end_data, range_end_data + num_ranges, id);
// The range must exist.
BUG_ON(it != range_end_data + num_ranges);
BUG_IF_FAIL(it != range_end_data + num_ranges);
size_t range_id = it - range_end_data;
int type_id = range_id % num_types;
types_data[i] = type_id;
int part_id = range_id / num_types;
BUG_ON(part_id < num_parts);
BUG_IF_FAIL(part_id < num_parts);
if (part_id == 0) {
per_type_ids_data[i] = id - range_start_data[range_id];
} else {
......
......@@ -327,7 +327,7 @@ std::pair<dgl_id_t, dgl_id_t> COO::FindEdge(dgl_id_t eid) const {
EdgeArray COO::FindEdges(IdArray eids) const {
CHECK(aten::IsValidIdArray(eids)) << "Invalid edge id array";
BUG_ON(aten::IsNullArray(adj_.data)) <<
BUG_IF_FAIL(aten::IsNullArray(adj_.data)) <<
"FindEdges requires the internal COO matrix not having EIDs.";
return EdgeArray{aten::IndexSelect(adj_.row, eids),
aten::IndexSelect(adj_.col, eids),
......
......@@ -235,7 +235,7 @@ class UnitGraph::COO : public BaseHeteroGraph {
EdgeArray FindEdges(dgl_type_t etype, IdArray eids) const override {
CHECK(aten::IsValidIdArray(eids)) << "Invalid edge id array";
BUG_ON(aten::IsNullArray(adj_.data)) <<
BUG_IF_FAIL(aten::IsNullArray(adj_.data)) <<
"FindEdges requires the internal COO matrix not having EIDs.";
return EdgeArray{aten::IndexSelect(adj_.row, eids),
aten::IndexSelect(adj_.col, eids),
......
......@@ -17,6 +17,14 @@
extern "C" void NDArrayDLPackDeleter(DLManagedTensor* tensor);
namespace dgl {
constexpr DLDataType DLDataTypeTraits<int32_t>::dtype;
constexpr DLDataType DLDataTypeTraits<int64_t>::dtype;
constexpr DLDataType DLDataTypeTraits<uint32_t>::dtype;
constexpr DLDataType DLDataTypeTraits<uint64_t>::dtype;
constexpr DLDataType DLDataTypeTraits<float>::dtype;
constexpr DLDataType DLDataTypeTraits<double>::dtype;
namespace runtime {
inline void VerifyDataType(DLDataType dtype) {
......@@ -251,7 +259,7 @@ template<typename T>
NDArray NDArray::FromVector(const std::vector<T>& vec, DLContext ctx) {
const DLDataType dtype = DLDataTypeTraits<T>::dtype;
int64_t size = static_cast<int64_t>(vec.size());
NDArray ret = NDArray::Empty({size}, dtype, DLContext{kDLCPU, 0});
NDArray ret = NDArray::Empty({size}, dtype, ctx);
DeviceAPI::Get(ctx)->CopyDataFromTo(
vec.data(),
0,
......
import numpy as np
import scipy.sparse as ssp
import dgl
from utils import parametrize_dtype
import backend as F
def _random_simple_graph(idtype, dtype, ctx, M, N, max_nnz, srctype, dsttype, etype):
src = np.random.randint(0, M, (max_nnz,))
dst = np.random.randint(0, N, (max_nnz,))
val = np.random.randn(max_nnz)
a = ssp.csr_matrix((val, (src, dst)), shape=(M, N))
a.sum_duplicates()
a = a.tocoo()
A = dgl.heterograph(
{('A', 'AB', 'B'): (
F.copy_to(F.tensor(a.row, dtype=idtype), ctx),
F.copy_to(F.tensor(a.col, dtype=idtype), ctx))},
num_nodes_dict={'A': a.shape[0], 'B': a.shape[1]})
A.edata['w'] = F.copy_to(F.tensor(a.data, dtype=dtype), ctx)
return a, A
@parametrize_dtype
def test_csrmm(idtype):
for dtype in [F.float32, F.float64]:
a, A = _random_simple_graph(idtype, dtype, F.ctx(), 500, 600, 9000, 'A', 'B', 'AB')
b, B = _random_simple_graph(idtype, dtype, F.ctx(), 600, 700, 9000, 'B', 'C', 'BC')
C, C_weights = dgl.sparse.csrmm(A._graph, A.edata['w'], B._graph, B.edata['w'], 2)
C_adj = C.adjacency_matrix_scipy(0, True, 'csr')
C_adj.data = F.asnumpy(C_weights)
C_adj = F.tensor(C_adj.todense(), dtype=dtype)
c = F.tensor((a * b).todense(), dtype=dtype)
assert F.allclose(C_adj, c)
@parametrize_dtype
def test_csrsum(idtype):
for dtype in [F.float32, F.float64]:
a, A = _random_simple_graph(idtype, dtype, F.ctx(), 500, 600, 9000, 'A', 'B', 'AB')
b, B = _random_simple_graph(idtype, dtype, F.ctx(), 500, 600, 9000, 'A', 'B', 'AB')
C, C_weights = dgl.sparse.csrsum([A._graph, B._graph], [A.edata['w'], B.edata['w']])
C_adj = C.adjacency_matrix_scipy(0, True, 'csr')
C_adj.data = F.asnumpy(C_weights)
C_adj = F.tensor(C_adj.todense(), dtype=dtype)
c = F.tensor((a + b).todense(), dtype=dtype)
assert F.allclose(C_adj, c)
@parametrize_dtype
def test_csrmask(idtype):
for dtype in [F.float32, F.float64]:
a, A = _random_simple_graph(idtype, dtype, F.ctx(), 500, 600, 9000, 'A', 'B', 'AB')
b, B = _random_simple_graph(idtype, dtype, F.ctx(), 500, 600, 9000, 'A', 'B', 'AB')
C = dgl.sparse.csrmask(A._graph, A.edata['w'], B._graph)
c = F.tensor(a.tocsr()[b != 0], dtype)
assert F.allclose(C, c)
if __name__ == '__main__':
test_csrmm(F.int32)
test_csrmm(F.int64)
test_csrsum(F.int32)
test_csrsum(F.int64)
test_csrmask(F.int32)
test_csrmask(F.int64)
......@@ -283,6 +283,5 @@ def test_segment_reduce(reducer):
assert F.allclose(grad1, grad2)
print('backward passed')
if __name__ == '__main__':
test_spmm(F.int32, graphs[0], spmm_shapes[0], 'mul', 'sum')
......@@ -64,9 +64,10 @@ std::pair<aten::CSRMatrix, NDArray> CSR_A(DLContext ctx = CTX) {
auto csr = aten::CSRMatrix(
4, 5,
NDArray::FromVector(std::vector<IdType>({0, 2, 4, 7, 8}), ctx),
NDArray::FromVector(std::vector<IdType>({2, 3, 2, 3, 0, 1, 3, 4}), ctx));
NDArray::FromVector(std::vector<IdType>({2, 3, 2, 3, 0, 1, 3, 4}), ctx),
NDArray::FromVector(std::vector<IdType>({1, 0, 2, 3, 4, 5, 6, 7}), ctx));
auto weights = NDArray::FromVector(
std::vector<DType>({1.0, 0.7, 0.5, 0.0, 0.4, 0.7, 0.2, 0.2}), ctx);
std::vector<DType>({0.7, 1.0, 0.5, 0.0, 0.4, 0.7, 0.2, 0.2}), ctx);
return {csr, weights};
}
......@@ -162,7 +163,8 @@ template <typename IdType, typename DType>
void _TestCsrmask(DLContext ctx = CTX) {
auto A = CSR_A<IdType, DType>(ctx);
auto C = CSR_C<IdType, DType>(ctx);
auto A_mask_C = aten::CSRMask(A.first, A.second, C.first);
auto C_coo = CSRToCOO(C.first, false);
auto A_mask_C = aten::CSRGetData<DType>(A.first, C_coo.row, C_coo.col, A.second, 0);
auto A_mask_C2 = CSR_A_mask_C<DType>(ctx);
ASSERT_TRUE(ArrayEQ<DType>(A_mask_C, A_mask_C2));
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment