"git@developer.sourcefind.cn:OpenDAS/dgl.git" did not exist on "bf649d940c7316f7031bf723bdcbcf2da44ab7e7"
Unverified Commit fa5ff2fc authored by Minjie Wang's avatar Minjie Wang Committed by GitHub
Browse files

[Sparse] Add coalesce and has_duplicate to SparseMatrix (#5071)

* add coalesce() and has_duplicate()

* Add duplicate check in spspmm

* black

* fix

* clang-format

* lintrunner

* minor fix

* fix
parent 8c3e7830
...@@ -116,6 +116,23 @@ class SparseMatrix : public torch::CustomClassHolder { ...@@ -116,6 +116,23 @@ class SparseMatrix : public torch::CustomClassHolder {
*/ */
c10::intrusive_ptr<SparseMatrix> Transpose() const; c10::intrusive_ptr<SparseMatrix> Transpose() const;
/**
* @brief Return a new coalesced matrix.
*
* A coalesced sparse matrix satisfies the following properties:
* - the indices of the non-zero elements are unique,
* - the indices are sorted in lexicographical order.
*
* @return A coalesced sparse matrix.
*/
c10::intrusive_ptr<SparseMatrix> Coalesce();
/**
* @brief Return true if this sparse matrix contains duplicate indices.
* @return A bool flag.
*/
bool HasDuplicate();
private: private:
/** @brief Create the COO format for the sparse matrix internally */ /** @brief Create the COO format for the sparse matrix internally */
void _CreateCOO(); void _CreateCOO();
......
...@@ -29,7 +29,9 @@ TORCH_LIBRARY(dgl_sparse, m) { ...@@ -29,7 +29,9 @@ TORCH_LIBRARY(dgl_sparse, m) {
.def("coo", &SparseMatrix::COOTensors) .def("coo", &SparseMatrix::COOTensors)
.def("csr", &SparseMatrix::CSRTensors) .def("csr", &SparseMatrix::CSRTensors)
.def("csc", &SparseMatrix::CSCTensors) .def("csc", &SparseMatrix::CSCTensors)
.def("transpose", &SparseMatrix::Transpose); .def("transpose", &SparseMatrix::Transpose)
.def("coalesce", &SparseMatrix::Coalesce)
.def("has_duplicate", &SparseMatrix::HasDuplicate);
m.def("create_from_coo", &CreateFromCOO) m.def("create_from_coo", &CreateFromCOO)
.def("create_from_csr", &CreateFromCSR) .def("create_from_csr", &CreateFromCSR)
.def("create_from_csc", &CreateFromCSC) .def("create_from_csc", &CreateFromCSC)
......
/**
* Copyright (c) 2022 by Contributors
* @file sparse_matrix_coalesce.cc
* @brief Operators related to sparse matrix coalescing.
*/
// clang-format off
#include <sparse/dgl_headers.h>
// clang-format on
#include <sparse/sparse_matrix.h>
#include "./utils.h"
namespace dgl {
namespace sparse {
c10::intrusive_ptr<SparseMatrix> SparseMatrix::Coalesce() {
auto torch_coo = COOToTorchCOO(this->COOPtr(), this->value());
auto coalesced_coo = torch_coo.coalesce();
torch::Tensor indices = coalesced_coo.indices();
torch::Tensor row = indices[0];
torch::Tensor col = indices[1];
return CreateFromCOO(row, col, coalesced_coo.values(), this->shape());
}
bool SparseMatrix::HasDuplicate() {
aten::CSRMatrix dgl_csr;
// The format for calculation will be chosen in the following order: CSR,
// CSC. CSR is created if the sparse matrix only has CSC format.
if (HasCSR() || !HasCSC()) {
dgl_csr = CSRToOldDGLCSR(CSRPtr());
} else {
dgl_csr = CSRToOldDGLCSR(CSCPtr());
}
return aten::CSRHasDuplicate(dgl_csr);
}
} // namespace sparse
} // namespace dgl
...@@ -32,17 +32,30 @@ void _SpSpMMSanityCheck( ...@@ -32,17 +32,30 @@ void _SpSpMMSanityCheck(
const c10::intrusive_ptr<SparseMatrix>& rhs_mat) { const c10::intrusive_ptr<SparseMatrix>& rhs_mat) {
const auto& lhs_shape = lhs_mat->shape(); const auto& lhs_shape = lhs_mat->shape();
const auto& rhs_shape = rhs_mat->shape(); const auto& rhs_shape = rhs_mat->shape();
CHECK_EQ(lhs_shape[1], rhs_shape[0]) TORCH_CHECK(
<< "SpSpMM: the second dim of lhs_mat should be equal to the first dim " lhs_shape[1] == rhs_shape[0],
"of the second matrix"; "SpSpMM: the second dim of lhs_mat should be equal to the first dim ",
CHECK_EQ(lhs_mat->value().dim(), 1) "of the second matrix");
<< "SpSpMM: the value shape of lhs_mat should be 1-D"; TORCH_CHECK(
CHECK_EQ(rhs_mat->value().dim(), 1) lhs_mat->value().dim() == 1,
<< "SpSpMM: the value shape of rhs_mat should be 1-D"; "SpSpMM: the value shape of lhs_mat should be 1-D");
CHECK_EQ(lhs_mat->device(), rhs_mat->device()) TORCH_CHECK(
<< "SpSpMM: lhs_mat and rhs_mat should on the same device"; rhs_mat->value().dim() == 1,
CHECK_EQ(lhs_mat->dtype(), rhs_mat->dtype()) "SpSpMM: the value shape of rhs_mat should be 1-D");
<< "SpSpMM: lhs_mat and rhs_mat should have the same dtype"; TORCH_CHECK(
lhs_mat->device() == rhs_mat->device(),
"SpSpMM: lhs_mat and rhs_mat should be on the same device");
TORCH_CHECK(
lhs_mat->dtype() == rhs_mat->dtype(),
"SpSpMM: lhs_mat and rhs_mat should have the same dtype");
TORCH_CHECK(
!lhs_mat->HasDuplicate(),
"SpSpMM does not support lhs_mat with duplicate indices. ",
"Call A = A.coalesce() to dedup first.");
TORCH_CHECK(
!rhs_mat->HasDuplicate(),
"SpSpMM does not support rhs_mat with duplicate indices. ",
"Call A = A.coalesce() to dedup first.");
} }
// Mask select value of `mat` by `sub_mat`. // Mask select value of `mat` by `sub_mat`.
......
...@@ -66,6 +66,28 @@ class SparseMatrix: ...@@ -66,6 +66,28 @@ class SparseMatrix:
""" """
return self.c_sparse_matrix.device() return self.c_sparse_matrix.device()
@property
def row(self) -> torch.Tensor:
"""Get the row indices of the nonzero elements.
Returns
-------
tensor
Row indices of the nonzero elements
"""
return self.coo()[0]
@property
def col(self) -> torch.Tensor:
"""Get the column indices of the nonzero elements.
Returns
-------
tensor
Column indices of the nonzero elements
"""
return self.coo()[1]
def indices( def indices(
self, fmt: str, return_shuffle=False self, fmt: str, return_shuffle=False
) -> Tuple[torch.Tensor, ...]: ) -> Tuple[torch.Tensor, ...]:
...@@ -173,6 +195,60 @@ class SparseMatrix: ...@@ -173,6 +195,60 @@ class SparseMatrix:
""" """
return SparseMatrix(self.c_sparse_matrix.transpose()) return SparseMatrix(self.c_sparse_matrix.transpose())
def coalesce(self):
"""Return a coalesced sparse matrix.
A coalesced sparse matrix satisfies the following properties:
- the indices of the non-zero elements are unique,
- the indices are sorted in lexicographical order.
The coalescing process will accumulate the non-zero values of the same
indices by summation.
The function does not support autograd.
Returns
-------
SparseMatrix
The coalesced sparse matrix.
Examples
--------
>>> row = torch.tensor([1, 0, 0, 0, 1])
>>> col = torch.tensor([1, 1, 1, 2, 2])
>>> val = torch.tensor([0, 1, 2, 3, 4])
>>> A = create_from_coo(row, col, val)
>>> A = A.coalesce()
>>> print(A)
SparseMatrix(indices=tensor([[0, 0, 1, 1],
[1, 2, 1, 2]]),
values=tensor([3, 3, 0, 4]),
shape=(2, 3), nnz=4)
"""
return SparseMatrix(self.c_sparse_matrix.coalesce())
def has_duplicate(self):
"""Return whether this sparse matrix contains duplicate indices.
Returns
-------
bool
True if this sparse matrix contains duplicate indices.
Examples
--------
>>> row = torch.tensor([1, 0, 0, 0, 1])
>>> col = torch.tensor([1, 1, 1, 2, 2])
>>> val = torch.tensor([0, 1, 2, 3, 4])
>>> A = create_from_coo(row, col, val)
>>> print(A.has_duplicate())
True
>>> print(A.coalesce().has_duplicate())
False
"""
return self.c_sparse_matrix.has_duplicate()
def create_from_coo( def create_from_coo(
row: torch.Tensor, row: torch.Tensor,
......
...@@ -4,7 +4,7 @@ import backend as F ...@@ -4,7 +4,7 @@ import backend as F
import pytest import pytest
import torch import torch
from dgl.mock_sparse2 import val_like from dgl.mock_sparse2 import create_from_coo, val_like
from .utils import ( from .utils import (
clone_detach_and_grad, clone_detach_and_grad,
...@@ -59,9 +59,7 @@ def test_spmm(create_func, shape, nnz, out_dim): ...@@ -59,9 +59,7 @@ def test_spmm(create_func, shape, nnz, out_dim):
@pytest.mark.parametrize("shape_k", [3, 4]) @pytest.mark.parametrize("shape_k", [3, 4])
@pytest.mark.parametrize("nnz1", [1, 10]) @pytest.mark.parametrize("nnz1", [1, 10])
@pytest.mark.parametrize("nnz2", [1, 10]) @pytest.mark.parametrize("nnz2", [1, 10])
def test_sparse_sparse_mm( def test_spspmm(create_func1, create_func2, shape_n_m, shape_k, nnz1, nnz2):
create_func1, create_func2, shape_n_m, shape_k, nnz1, nnz2
):
dev = F.ctx() dev = F.ctx()
shape1 = shape_n_m shape1 = shape_n_m
shape2 = (shape_n_m[1], shape_k) shape2 = (shape_n_m[1], shape_k)
...@@ -89,3 +87,33 @@ def test_sparse_sparse_mm( ...@@ -89,3 +87,33 @@ def test_sparse_sparse_mm(
torch_A2.grad.to_dense(), torch_A2.grad.to_dense(),
atol=1e-05, atol=1e-05,
) )
def test_spspmm_duplicate():
dev = F.ctx()
row = torch.tensor([1, 0, 0, 0, 1]).to(dev)
col = torch.tensor([1, 1, 1, 2, 2]).to(dev)
val = torch.randn(len(row)).to(dev)
shape = (4, 4)
A1 = create_from_coo(row, col, val, shape)
row = torch.tensor([1, 0, 0, 1]).to(dev)
col = torch.tensor([1, 1, 2, 2]).to(dev)
val = torch.randn(len(row)).to(dev)
shape = (4, 4)
A2 = create_from_coo(row, col, val, shape)
try:
A1 @ A2
except:
pass
else:
assert False, "Should raise error."
try:
A2 @ A1
except:
pass
else:
assert False, "Should raise error."
import pytest
import torch
import sys import sys
import backend as F import backend as F
import pytest
import torch
from dgl.mock_sparse2 import create_from_coo, create_from_csr, create_from_csc, val_like from dgl.mock_sparse2 import (
create_from_coo,
create_from_csc,
create_from_csr,
val_like,
)
# TODO(#4818): Skipping tests on win. # TODO(#4818): Skipping tests on win.
if not sys.platform.startswith("linux"): if not sys.platform.startswith("linux"):
...@@ -337,6 +342,7 @@ def test_csr_to_csc(dense_dim, indptr, indices, shape): ...@@ -337,6 +342,7 @@ def test_csr_to_csc(dense_dim, indptr, indices, shape):
assert torch.allclose(mat_indptr, indptr) assert torch.allclose(mat_indptr, indptr)
assert torch.allclose(mat_indices, indices) assert torch.allclose(mat_indices, indices)
@pytest.mark.parametrize("val_shape", [(3), (3, 2)]) @pytest.mark.parametrize("val_shape", [(3), (3, 2)])
@pytest.mark.parametrize("shape", [(3, 5), (5, 5)]) @pytest.mark.parametrize("shape", [(3, 5), (5, 5)])
def test_val_like(val_shape, shape): def test_val_like(val_shape, shape):
...@@ -368,3 +374,47 @@ def test_val_like(val_shape, shape): ...@@ -368,3 +374,47 @@ def test_val_like(val_shape, shape):
csc_A = create_from_csc(indptr, indices, val, shape) csc_A = create_from_csc(indptr, indices, val, shape)
csc_B = val_like(csc_A, new_val) csc_B = val_like(csc_A, new_val)
check_val_like(csc_A, csc_B) check_val_like(csc_A, csc_B)
def test_coalesce():
ctx = F.ctx()
row = torch.tensor([1, 0, 0, 0, 1]).to(ctx)
col = torch.tensor([1, 1, 1, 2, 2]).to(ctx)
val = torch.arange(len(row)).to(ctx)
A = create_from_coo(row, col, val, (4, 4))
assert A.has_duplicate()
A_coalesced = A.coalesce()
assert A_coalesced.nnz == 4
assert A_coalesced.shape == (4, 4)
assert list(A_coalesced.row) == [0, 0, 1, 1]
assert list(A_coalesced.col) == [1, 2, 1, 2]
# Values of duplicate indices are added together.
assert list(A_coalesced.val) == [3, 3, 0, 4]
assert not A_coalesced.has_duplicate()
def test_has_duplicate():
ctx = F.ctx()
row = torch.tensor([1, 0, 0, 0, 1]).to(ctx)
col = torch.tensor([1, 1, 1, 2, 2]).to(ctx)
val = torch.arange(len(row)).to(ctx)
shape = (4, 4)
# COO
coo_A = create_from_coo(row, col, val, shape)
assert coo_A.has_duplicate()
# CSR
indptr, indices, _ = coo_A.csr()
csr_A = create_from_csr(indptr, indices, val, shape)
assert csr_A.has_duplicate()
# CSC
indptr, indices, _ = coo_A.csc()
csc_A = create_from_csc(indptr, indices, val, shape)
assert csc_A.has_duplicate()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment