[Sparse] Add reduction operators (#4600)

* [Sparse] Add reduction operators * Fix linting problem * Fix linting problem * Fix linting problem * Fix docstring * Fix test cases * Fix linting problem * Skip reduction unittests

[Sparse] Add reduction operators (#4600)
* [Sparse] Add reduction operators * Fix linting problem * Fix linting problem * Fix linting problem * Fix docstring * Fix test cases * Fix linting problem * Skip reduction unittests
544a2cd1 · czkkkkkk · GitHub · 80b22ad8 · 544a2cd1 · 544a2cd1
Unverified Commit 544a2cd1 authored Sep 26, 2022 by czkkkkkk Committed by GitHub Sep 26, 2022
5 changed files
--- a/docs/source/api/python/dgl.sparse_v0.rst
+++ b/docs/source/api/python/dgl.sparse_v0.rst
@@ -13,7 +13,8 @@ Sparse matrix class
 .. currentmodule:: dgl.mock_sparse
 .. autoclass:: SparseMatrix
-	:members: shape, nnz, dtype, device, row, col, val, __call__, indices, coo, csr, csc, dense, t, T, transpose
+	:members: shape, nnz, dtype, device, row, col, val, __call__, indices, coo, csr, csc, dense, t, T, transpose,
+            reduce, sum, smax, smin, smean
 .. autosummary::
    :toctree: ../../generated/
@@ -27,7 +28,8 @@ Diagonal matrix class
 .. currentmodule:: dgl.mock_sparse
 .. autoclass:: DiagMatrix
-	:members: val, shape, __call__, nnz, dtype, device, as_sparse, t, T, transpose
+	:members: val, shape, __call__, nnz, dtype, device, as_sparse, t, T, transpose,
+            reduce, sum, smax, smin, smean
 .. autosummary::
    :toctree: ../../generated/

--- a/python/dgl/mock_sparse/__init__.py
+++ b/python/dgl/mock_sparse/__init__.py
@@ -2,3 +2,4 @@
 from .diag_matrix import *
 from .sp_matrix import *
 from .elementwise_op_sp import *
+from .reduction import *   # pylint: disable=W0622
--- a/python/dgl/mock_sparse/reduction.py
+++ b/python/dgl/mock_sparse/reduction.py
+"""dgl reduce operators for sparse matrix module."""
+from typing import Optional
+import torch
+from .sp_matrix import SparseMatrix
+def reduce(A: SparseMatrix, dim: Optional[int]=None, rtype: str = "sum"):
+    """Compute the reduction of non-zero values in sparse matrix A along
+    the given dimension :attr:`dim`.
+    If :attr:`dim` is None, it reduces all the elements in the sparse
+    matrix. Otherwise, it reduces on the row (``dim=0``) or column (``dim=1``)
+    dimension, producing a tensor of shape ``(A.shape[1], ) + A.val.shape[:1]``
+    or ``(A.shape[0],) + A.val.shape[:1]``.
+    The reduction does not count zero values. If the row or column to be
+    reduced does not have any non-zero value, the result will be 0.
+    Parameters
+    ----------
+    A : SparseMatrix
+        Sparse matrix
+    dim : int, optional
+        The dimension to reduce.
+    rtype: str
+        Reduction type, one of ['sum', 'smin', 'smax', 'smean']
+    Returns
+    ----------
+    Tensor
+        Reduced tensor
+    Examples
+    ----------
+    >>> row = torch.tensor([0, 1, 1])
+    >>> col = torch.tensor([0, 0, 2])
+    >>> val = torch.tensor([1, 1, 2])
+    >>> A = create_from_coo(row, col, val, shape=(4, 3))
+    >>> print(A.reduce(rtype='sum'))
+    tensor(4)
+    >>> print(A.reduce(0, 'sum'))
+    tensor([2, 0, 2])
+    >>> print(A.reduce(1, 'sum'))
+    tensor([1, 3, 0, 0])
+    >>> print(A.reduce(0, 'smax'))
+    tensor([1, 0, 2])
+    >>> print(A.reduce(1, 'smin'))
+    tensor([1, 1, 0, 0])
+    >>> row = torch.tensor([0, 1, 1])
+    >>> col = torch.tensor([0, 0, 2])
+    >>> val = torch.tensor([[1, 2], [2, 1], [2, 2]])
+    >>> A = create_from_coo(row, col, val, shape=(4, 3))
+    >>> print(A.reduce(reduce='sum'))
+    tensor([5, 5])
+    >>> print(A.reduce(0, 'sum'))
+    tensor([[3, 3], [0, 0], [2, 2]])
+    >>> print(A.reduce(1, 'smin'))
+    tensor([[1, 2], [2, 1], [0, 0], [0, 0]])
+    >>> print(A.reduce(0, 'smean'))
+    tensor([[1, 1], [0, 0], [2, 2]])
+    """
+    if dim is not None and not isinstance(dim, int):
+        raise ValueError(f"Reduce dimension should be int but got {dim}")
+    if dim is None:
+        if rtype == "sum":
+            return torch.sum(A.val, dim=0)
+        if rtype == "smax":
+            return torch.amax(A.val, dim=0)
+        if rtype == "smin":
+            return torch.amin(A.val, dim=0)
+        if rtype == "smean":
+            return torch.mean(A.val, dim=0, dtype=torch.float64).to(A.val.dtype)
+    if dim == 0:
+        index = A.col
+        reduced_shape = (A.shape[1],) + A.val.shape[1:]
+        reduced = torch.zeros(reduced_shape, dtype=A.val.dtype, device=A.device)
+    else:
+        index = A.row
+        reduced_shape = (A.shape[0],) + A.val.shape[1:]
+        reduced = torch.zeros(reduced_shape, dtype=A.val.dtype, device=A.device)
+    if rtype in ("smax", "smin"):
+        rtype = "a" + rtype[1:]
+    if rtype == "smean":
+        rtype = "mean"
+    if len(A.val.shape) > 1:
+        index = torch.unsqueeze(index, 1)
+        index = index.repeat([1, A.val.shape[1]])
+    reduced = reduced.scatter_reduce(
+        0, index, A.val, reduce=rtype, include_self=False
+    )
+    return reduced
+def sum(A: SparseMatrix, dim: Optional[int]=None):  # pylint: disable=W0622
+    """Compute the sum of non-zero values in sparse matrix A along
+    the given dimension :attr:`dim`.
+    If :attr:`dim` is None, it reduces all the elements in the sparse matrix.
+    Otherwise, it reduces on the row (``dim=0``) or column (``dim=1``) dimension,
+    producing a tensor of shape ``(A.shape[1], ) + A.val.shape[:1]`` or
+    ``(A.shape[0],) + A.val.shape[:1]``.
+    Parameters
+    ----------
+    dim : int, optional
+        The dimension to reduce.
+    Returns
+    ----------
+    Tensor
+        Reduced tensor
+    Examples
+    ----------
+    >>> row = torch.tensor([0, 1, 1])
+    >>> col = torch.tensor([0, 0, 2])
+    >>> val = torch.tensor([1, 1, 2])
+    >>> A = create_from_coo(row, col, val, shape=(4, 3))
+    >>> print(A.sum())
+    tensor(4)
+    >>> print(A.sum(0))
+    tensor([2, 0, 2])
+    >>> print(A.sum(1))
+    tensor([1, 3, 0, 0])
+    >>> row = torch.tensor([0, 1, 1])
+    >>> col = torch.tensor([0, 0, 2])
+    >>> val = torch.tensor([[1, 2], [2, 1], [2, 2]])
+    >>> A = create_from_coo(row, col, val, shape=(4, 3))
+    >>> print(A.sum())
+    tensor([5, 5])
+    >>> print(A.sum(0))
+    tensor([[3, 3], [0, 0], [2, 2]])
+    """
+    return A.reduce(dim, rtype="sum")
+def smax(A: SparseMatrix, dim: Optional[int]=None):
+    """Compute the maximum of non-zero values in sparse matrix A along
+    the given dimension :attr:`dim`.
+    If :attr:`dim` is None, it reduces all the elements in the sparse matrix.
+    Otherwise, it reduces on the row (``dim=0``) or column (``dim=1``) dimension,
+    producing a tensor of shape ``(A.shape[1], ) + A.val.shape[:1]`` or
+    ``(A.shape[0],) + A.val.shape[:1]``.
+    The reduction does not count zero values. If the row or column to be
+    reduced does not have any non-zero value, the result will be 0.
+    Parameters
+    ----------
+    dim : int, optional
+        The dimension to reduce.
+    Returns
+    ----------
+    Tensor
+        Reduced tensor
+    Examples
+    ----------
+    >>> row = torch.tensor([0, 1, 1])
+    >>> col = torch.tensor([0, 0, 2])
+    >>> val = torch.tensor([1, 1, 2])
+    >>> A = create_from_coo(row, col, val, shape=(4, 3))
+    >>> print(A.smax())
+    tensor(2)
+    >>> print(A.smax(0))
+    tensor([1, 0, 2])
+    >>> print(A.smax(1))
+    tensor([1, 2, 0, 0])
+    >>> row = torch.tensor([0, 1, 1])
+    >>> col = torch.tensor([0, 0, 2])
+    >>> val = torch.tensor([[1, 2], [2, 1], [2, 2]])
+    >>> A = create_from_coo(row, col, val, shape=(4, 3))
+    >>> print(A.smax())
+    tensor([2, 2])
+    >>> print(A.smax(0))
+    tensor([[2, 2], [0, 0], [2, 2]])
+    >>> print(A.smax(1))
+    tensor([[1, 2], [2, 2], [0, 0], [0, 0]])
+    """
+    return A.reduce(dim, rtype="smax")
+def smin(A: SparseMatrix, dim: Optional[int]=None):
+    """Compute the minimum of non-zero values in sparse matrix A along
+    the given dimension :attr:`dim`.
+    If :attr:`dim` is None, it reduces all the elements in the sparse matrix.
+    Otherwise, it reduces on the row (``dim=0``) or column (``dim=1``) dimension,
+    producing a tensor of shape ``(A.shape[1], ) + A.val.shape[:1]`` or
+    ``(A.shape[0],) + A.val.shape[:1]``.
+    The reduction does not count zero values. If the row or column to be reduced
+    does not have any non-zero value, the result will be 0.
+    Parameters
+    ----------
+    dim : int, optional
+        The dimension to reduce.
+    Returns
+    ----------
+    Tensor
+        Reduced tensor
+    Example
+    ----------
+    >>> row = torch.tensor([0, 1, 1])
+    >>> col = torch.tensor([0, 0, 2])
+    >>> val = torch.tensor([1, 1, 2])
+    >>> A = create_from_coo(row, col, val, shape=(4, 3))
+    >>> print(A.smin())
+    tensor(1)
+    >>> print(A.smin(0))
+    tensor([1, 0, 2])
+    >>> print(A.smin(1))
+    tensor([1, 1, 0, 0])
+    >>> row = torch.tensor([0, 1, 1])
+    >>> col = torch.tensor([0, 0, 2])
+    >>> val = torch.tensor([[1, 2], [2, 1], [2, 2]])
+    >>> A = create_from_coo(row, col, val, shape=(4, 3))
+    >>> print(A.smin())
+    tensor([1, 1])
+    >>> print(A.smin(0))
+    tensor([[1, 1], [0, 0], [2, 2]])
+    >>> print(A.smin(1))
+    tensor([[1, 2], [2, 1], [0, 0], [0, 0]])
+    """
+    return A.reduce(dim, rtype="smin")
+def smean(A: SparseMatrix, dim: Optional[int]=None):
+    """Compute the mean of non-zero values in sparse matrix A along
+    the given dimension :attr:`dim`.
+    If :attr:`dim` is None, it reduces all the elements in the sparse matrix.
+    Otherwise, it reduces on the row (``dim=0``) or column (``dim=1``) dimension,
+    producing a tensor of shape ``(A.shape[1], ) + A.val.shape[:1]`` or
+    ``(A.shape[0],) + A.val.shape[:1]``.
+    The reduction does not count zero values. If the row or column to be reduced
+    does not have any non-zero value, the result will be 0.
+    Parameters
+    ----------
+    dim : int, optional
+        The dimension to reduce.
+    Returns
+    ----------
+    Tensor
+        Reduced tensor
+    Example
+    ----------
+    >>> row = torch.tensor([0, 1, 1])
+    >>> col = torch.tensor([0, 0, 2])
+    >>> val = torch.tensor([1, 1, 2])
+    >>> A = create_from_coo(row, col, val, shape=(4, 3))
+    >>> print(A.smean())
+    tensor(1)
+    >>> print(A.smean(0))
+    tensor([1, 0, 2])
+    >>> print(A.smean(1))
+    tensor([1, 1, 0, 0])
+    >>> row = torch.tensor([0, 1, 1])
+    >>> col = torch.tensor([0, 0, 2])
+    >>> val = torch.tensor([[1, 2], [2, 1], [2, 2]])
+    >>> A = create_from_coo(row, col, val, shape=(4, 3))
+    >>> print(A.smean())
+    tensor([1, 1])
+    >>> print(A.smean(0))
+    tensor([[1, 1], [0, 0], [2, 2]])
+    >>> print(A.smean(1))
+    tensor([[1, 2], [2, 1], [0, 0], [0, 0]])
+    """
+    return A.reduce(dim, rtype="smean")
+SparseMatrix.reduce = reduce
+SparseMatrix.sum = sum
+SparseMatrix.smax = smax
+SparseMatrix.smin = smin
+SparseMatrix.smean = smean
--- a/tests/pytorch/mock_sparse/test_reduction.py
+++ b/tests/pytorch/mock_sparse/test_reduction.py
+import pytest
+import torch
+import numpy
+from dgl.mock_sparse import create_from_coo
+@pytest.mark.skip(reason="no way of currently testing this")
+@pytest.mark.parametrize("dense_dim", [None, 2])
+@pytest.mark.parametrize("row", [[0, 0, 1, 2], (0, 1, 2, 4)])
+@pytest.mark.parametrize("col", [(0, 1, 2, 2), (1, 3, 3, 4)])
+@pytest.mark.parametrize("extra_shape", [(0, 1), (2, 1)])
+@pytest.mark.parametrize("reduce_type", ['sum', 'smax', 'smin', 'smean'])
+@pytest.mark.parametrize("dim", [None, 0, 1])
+def test_reduction(dense_dim, row, col, extra_shape, reduce_type, dim):
+    mat_shape = (max(row) + 1 + extra_shape[0], max(col) + 1 + extra_shape[1])
+    val_shape = (len(row),)
+    if dense_dim is not None:
+        val_shape += (dense_dim,)
+    val = torch.randn(val_shape)
+    row = torch.tensor(row)
+    col = torch.tensor(col)
+    mat = create_from_coo(row, col, val, mat_shape)
+    reduce_func = getattr(mat, reduce_type)
+    reduced = reduce_func(dim)
+    def calc_expected(row, col, val, mat_shape, reduce_type, dim):
+        def reduce_func(reduce_type, lhs, rhs):
+            if lhs is None:
+                return rhs
+            if reduce_type == 'sum' or reduce_type == 'smean':
+                return lhs + rhs
+            if reduce_type == 'smax':
+                return numpy.maximum(lhs, rhs)
+            if reduce_type == 'smin':
+                return numpy.minimum(lhs, rhs)
+        val = val.numpy()
+        row = row.numpy()
+        col = col.numpy()
+        if dim is None:
+            reduced = None
+            for i in range(val.shape[0]):
+                reduced = reduce_func(reduce_type, reduced, val[i])
+            if reduced is None:
+                reduced = numpy.zeros(val.shape[1:])
+            if reduce_type == 'smean':
+                reduced = reduced / val.shape[0]
+            return reduced
+        reduced_shape = (mat_shape[0] if dim == 1 else mat_shape[1])
+        reduced = [None] * reduced_shape
+        count = [0] * reduced_shape
+        for i, (r, c) in enumerate(zip(row, col)):
+            axis = r if dim == 1 else c
+            reduced[axis] = reduce_func(reduce_type, reduced[axis], val[i])
+            count[axis] += 1
+        for i in range(reduced_shape):
+            if count[i] == 0:
+                reduced[i] = numpy.zeros(val.shape[1:])
+            else:
+                if reduce_type == 'smean':
+                    reduced[i] /= count[i]
+        return numpy.stack(reduced, axis=0)
+    expected = calc_expected(row, col, val, mat_shape, reduce_type, dim)
+    assert torch.allclose(reduced, torch.tensor(expected).float())
--- a/tests/pytorch/mock_sparse/test_transpose.py
+++ b/tests/pytorch/mock_sparse/test_transpose.py
@@ -19,12 +19,9 @@ def test_diag_matrix_transpose(val_shape, mat_shape):
 @pytest.mark.parametrize("dense_dim", [None, 2])
 @pytest.mark.parametrize("row", [[0, 0, 1, 2], (0, 1, 2, 4)])
 @pytest.mark.parametrize("col", [(0, 1, 2, 2), (1, 3, 3, 4)])
-@pytest.mark.parametrize("mat_shape", [(3, 5), (5, 3)])
+@pytest.mark.parametrize("extra_shape", [(0, 1), (2, 1)])
-def test_sparse_matrix_transpose(dense_dim, row, col, mat_shape):
+def test_sparse_matrix_transpose(dense_dim, row, col, extra_shape):
-    # Skip invalid matrices
+    mat_shape = (max(row) + 1 + extra_shape[0], max(col) + 1 + extra_shape[1])
-    if max(row) >= mat_shape[0] or max(col) >= mat_shape[1]:
-        return
    val_shape = (len(row),)
    if dense_dim is not None:
        val_shape += (dense_dim,)