Unverified Commit f0ce2bea authored by Quan (Andy) Gan's avatar Quan (Andy) Gan Committed by GitHub
Browse files

[Sparse] Sparse matrix reduction Python part (#5013)



* initial commit

* lint

* rename

* address comments

* add doctests

* address comments

* remove additional test

* add description

* lint

* lint

* lint

* Update test_reduction.py
Co-authored-by: default avatarHongzhi (Steve), Chen <chenhongzhi.nkcs@gmail.com>
parent b3aec7ae
...@@ -9,11 +9,12 @@ from .diag_matrix import * ...@@ -9,11 +9,12 @@ from .diag_matrix import *
from .elementwise_op import * from .elementwise_op import *
from .elementwise_op_diag import * from .elementwise_op_diag import *
from .elementwise_op_sp import * from .elementwise_op_sp import *
from .matmul import *
from .reduction import * # pylint: disable=W0622
from .sddmm import *
from .sparse_matrix import * from .sparse_matrix import *
from .unary_op_diag import * from .unary_op_diag import *
from .unary_op_sp import * from .unary_op_sp import *
from .matmul import *
from .sddmm import *
def load_dgl_sparse(): def load_dgl_sparse():
......
"""DGL sparse matrix reduce operators"""
from typing import Optional
import torch
from .sparse_matrix import SparseMatrix
def reduce(A: SparseMatrix, dim: Optional[int] = None, rtype: str = "sum"):
"""Compute the reduction of non-zero values in sparse matrix A along
the given dimension :attr:`dim`.
The reduction does not count zero values. If the row or column to be
reduced does not have any non-zero value, the result will be 0.
Parameters
----------
A : SparseMatrix
Sparse matrix
dim : int, optional
The dimension to reduce. Must be either 0 (by rows) or 1 (by columns)
or None (on all non-zero entries).
If :attr:`dim` is None, it reduces all the elements in the sparse
matrix. Otherwise, it reduces on the row (``dim=0``) or column
(``dim=1``) dimension, producing a tensor of shape
``(A.shape[1],) + A.val.shape[1:]`` or
``(A.shape[0],) + A.val.shape[1:]``.
rtype: str, optional
Reduction type, one of ``['sum', 'smin', 'smax', 'smean', 'sprod']``,
representing taking the sum, minimum, maximum, mean, and product of the
non-zero entries.
Returns
----------
Tensor
Reduced tensor
Examples
----------
>>> row = torch.tensor([0, 1, 1])
>>> col = torch.tensor([0, 0, 2])
>>> val = torch.tensor([1, 1, 2])
>>> A = dglsp.create_from_coo(row, col, val, shape=(4, 3))
>>> print(A.reduce(rtype='sum'))
tensor(4)
>>> print(A.reduce(0, 'sum'))
tensor([2, 0, 2])
>>> print(A.reduce(1, 'sum'))
tensor([1, 3, 0, 0])
>>> print(A.reduce(0, 'smax'))
tensor([1, 0, 2])
>>> print(A.reduce(1, 'smin'))
tensor([1, 1, 0, 0])
>>> row = torch.tensor([0, 1, 1])
>>> col = torch.tensor([0, 0, 2])
>>> val = torch.tensor([[1., 2.], [2., 1.], [2., 2.]])
>>> A = dglsp.create_from_coo(row, col, val, shape=(4, 3))
>>> print(A.reduce(rtype='sum'))
tensor([5., 5.])
>>> print(A.reduce(0, 'sum'))
tensor([[3., 3.],
[0., 0.],
[2., 2.]])
>>> print(A.reduce(1, 'smin'))
tensor([[1., 2.],
[2., 1.],
[0., 0.],
[0., 0.]])
>>> print(A.reduce(0, 'smean'))
tensor([[1.5000, 1.5000],
[0.0000, 0.0000],
[2.0000, 2.0000]])
"""
return torch.ops.dgl_sparse.reduce(A.c_sparse_matrix, rtype, dim)
# pylint: disable=W0622
def sum(A: SparseMatrix, dim: Optional[int] = None):
"""Compute the sum of non-zero values in sparse matrix A along
the given dimension :attr:`dim`.
Parameters
----------
dim : int, optional
The dimension to reduce. Must be either 0 (by rows) or 1 (by columns)
or None (on all non-zero entries).
If :attr:`dim` is None, it reduces all the elements in the sparse
matrix. Otherwise, it reduces on the row (``dim=0``) or column
(``dim=1``) dimension, producing a tensor of shape
``(A.shape[1],) + A.val.shape[1:]`` or
``(A.shape[0],) + A.val.shape[1:]``.
Returns
----------
Tensor
Reduced tensor
Examples
----------
>>> row = torch.tensor([0, 1, 1])
>>> col = torch.tensor([0, 0, 2])
>>> val = torch.tensor([1, 1, 2])
>>> A = dglsp.create_from_coo(row, col, val, shape=(4, 3))
>>> print(A.sum())
tensor(4)
>>> print(A.sum(0))
tensor([2, 0, 2])
>>> print(A.sum(1))
tensor([1, 3, 0, 0])
>>> row = torch.tensor([0, 1, 1])
>>> col = torch.tensor([0, 0, 2])
>>> val = torch.tensor([[1, 2], [2, 1], [2, 2]])
>>> A = dglsp.create_from_coo(row, col, val, shape=(4, 3))
>>> print(A.sum())
tensor([5, 5])
>>> print(A.sum(0))
tensor([[3, 3],
[0, 0],
[2, 2]])
"""
return torch.ops.dgl_sparse.sum(A.c_sparse_matrix, dim)
def smax(A: SparseMatrix, dim: Optional[int] = None):
"""Compute the maximum of non-zero values in sparse matrix A along
the given dimension :attr:`dim`.
The reduction does not count zero values. If the row or column to be
reduced does not have any non-zero value, the result will be 0.
Parameters
----------
dim : int, optional
The dimension to reduce. Must be either 0 (by rows) or 1 (by columns)
or None (on all non-zero entries).
If :attr:`dim` is None, it reduces all the elements in the sparse
matrix. Otherwise, it reduces on the row (``dim=0``) or column
(``dim=1``) dimension, producing a tensor of shape
``(A.shape[1],) + A.val.shape[1:]`` or
``(A.shape[0],) + A.val.shape[1:]``.
Returns
----------
Tensor
Reduced tensor
Examples
----------
>>> row = torch.tensor([0, 1, 1])
>>> col = torch.tensor([0, 0, 2])
>>> val = torch.tensor([1, 1, 2])
>>> A = dglsp.create_from_coo(row, col, val, shape=(4, 3))
>>> print(A.smax())
tensor(2)
>>> print(A.smax(0))
tensor([1, 0, 2])
>>> print(A.smax(1))
tensor([1, 2, 0, 0])
>>> row = torch.tensor([0, 1, 1])
>>> col = torch.tensor([0, 0, 2])
>>> val = torch.tensor([[1, 2], [2, 1], [2, 2]])
>>> A = dglsp.create_from_coo(row, col, val, shape=(4, 3))
>>> print(A.smax())
tensor([2, 2])
>>> print(A.smax(1))
tensor([[1, 2],
[2, 2],
[0, 0],
[0, 0]])
"""
return torch.ops.dgl_sparse.smax(A.c_sparse_matrix, dim)
def smin(A: SparseMatrix, dim: Optional[int] = None):
"""Compute the minimum of non-zero values in sparse matrix A along
the given dimension :attr:`dim`.
The reduction does not count zero values. If the row or column to be reduced
does not have any non-zero value, the result will be 0.
Parameters
----------
dim : int, optional
The dimension to reduce. Must be either 0 (by rows) or 1 (by columns)
or None (on all non-zero entries).
If :attr:`dim` is None, it reduces all the elements in the sparse
matrix. Otherwise, it reduces on the row (``dim=0``) or column
(``dim=1``) dimension, producing a tensor of shape
``(A.shape[1],) + A.val.shape[1:]`` or
``(A.shape[0],) + A.val.shape[1:]``.
Returns
----------
Tensor
Reduced tensor
Example
----------
>>> row = torch.tensor([0, 1, 1])
>>> col = torch.tensor([0, 0, 2])
>>> val = torch.tensor([1, 1, 2])
>>> A = dglsp.create_from_coo(row, col, val, shape=(4, 3))
>>> print(A.smin())
tensor(1)
>>> print(A.smin(0))
tensor([1, 0, 2])
>>> print(A.smin(1))
tensor([1, 1, 0, 0])
>>> row = torch.tensor([0, 1, 1])
>>> col = torch.tensor([0, 0, 2])
>>> val = torch.tensor([[1, 2], [2, 1], [2, 2]])
>>> A = dglsp.create_from_coo(row, col, val, shape=(4, 3))
>>> print(A.smin())
tensor([1, 1])
>>> print(A.smin(0))
tensor([[1, 1],
[0, 0],
[2, 2]])
>>> print(A.smin(1))
tensor([[1, 2],
[2, 1],
[0, 0],
[0, 0]])
"""
return torch.ops.dgl_sparse.smin(A.c_sparse_matrix, dim)
def smean(A: SparseMatrix, dim: Optional[int] = None):
"""Compute the mean of non-zero values in sparse matrix A along
the given dimension :attr:`dim`.
The reduction does not count zero values. If the row or column to be reduced
does not have any non-zero value, the result will be 0.
Parameters
----------
dim : int, optional
The dimension to reduce. Must be either 0 (by rows) or 1 (by columns)
or None (on all non-zero entries).
If :attr:`dim` is None, it reduces all the elements in the sparse
matrix. Otherwise, it reduces on the row (``dim=0``) or column
(``dim=1``) dimension, producing a tensor of shape
``(A.shape[1],) + A.val.shape[1:]`` or
``(A.shape[0],) + A.val.shape[1:]``.
Returns
----------
Tensor
Reduced tensor
Example
----------
>>> row = torch.tensor([0, 1, 1])
>>> col = torch.tensor([0, 0, 2])
>>> val = torch.tensor([1., 1., 2.])
>>> A = dglsp.create_from_coo(row, col, val, shape=(4, 3))
>>> print(A.smean())
tensor(1.3333)
>>> print(A.smean(0))
tensor([1., 0., 2.])
>>> print(A.smean(1))
tensor([1.0000, 1.5000, 0.0000, 0.0000])
>>> row = torch.tensor([0, 1, 1])
>>> col = torch.tensor([0, 0, 2])
>>> val = torch.tensor([[1., 2.], [2., 1.], [2., 2.]])
>>> A = dglsp.create_from_coo(row, col, val, shape=(4, 3))
>>> print(A.smean())
tensor([1.6667, 1.6667])
>>> print(A.smean(0))
tensor([[1.5000, 1.5000],
[0.0000, 0.0000],
[2.0000, 2.0000]])
>>> print(A.smean(1))
tensor([[1.0000, 2.0000],
[2.0000, 1.5000],
[0.0000, 0.0000],
[0.0000, 0.0000]])
"""
return torch.ops.dgl_sparse.smean(A.c_sparse_matrix, dim)
def sprod(A: SparseMatrix, dim: Optional[int] = None):
"""Compute the product of non-zero values in sparse matrix A along
the given dimension :attr:`dim`.
The reduction does not count zero values. If the row or column to be reduced
does not have any non-zero value, the result will be 0.
Parameters
----------
dim : int, optional
The dimension to reduce. Must be either 0 (by rows) or 1 (by columns)
or None (on all non-zero entries).
If :attr:`dim` is None, it reduces all the elements in the sparse
matrix. Otherwise, it reduces on the row (``dim=0``) or column
(``dim=1``) dimension, producing a tensor of shape
``(A.shape[1],) + A.val.shape[1:]`` or
``(A.shape[0],) + A.val.shape[1:]``.
Returns
----------
Tensor
Reduced tensor
Example
----------
>>> row = torch.tensor([0, 1, 1])
>>> col = torch.tensor([0, 0, 2])
>>> val = torch.tensor([1, 1, 2])
>>> A = dglsp.create_from_coo(row, col, val, shape=(4, 3))
>>> print(A.sprod())
tensor(2)
>>> print(A.sprod(0))
tensor([1, 0, 2])
>>> print(A.sprod(1))
tensor([1, 2, 0, 0])
>>> row = torch.tensor([0, 1, 1])
>>> col = torch.tensor([0, 0, 2])
>>> val = torch.tensor([[1, 2], [2, 1], [2, 2]])
>>> A = dglsp.create_from_coo(row, col, val, shape=(4, 3))
>>> print(A.sprod())
tensor([4, 4])
>>> print(A.sprod(0))
tensor([[2, 2],
[0, 0],
[2, 2]])
>>> print(A.sprod(1))
tensor([[1, 2],
[4, 2],
[0, 0],
[0, 0]])
"""
return torch.ops.dgl_sparse.sprod(A.c_sparse_matrix, dim)
SparseMatrix.reduce = reduce
SparseMatrix.sum = sum
SparseMatrix.smax = smax
SparseMatrix.smin = smin
SparseMatrix.smean = smean
SparseMatrix.sprod = sprod
import doctest
import operator
import sys
import backend as F
import dgl.mock_sparse2 as dglsp
import pytest
import torch
# TODO(#5013): Skipping tests on win.
if not sys.platform.startswith("linux"):
pytest.skip("skipping tests on win", allow_module_level=True)
dgl_op_map = {
"sum": "sum",
"amin": "smin",
"amax": "smax",
"mean": "smean",
"prod": "sprod",
}
default_entry = {
"sum": 0,
"amin": float("inf"),
"amax": float("-inf"),
"mean": 0,
"prod": 1,
}
binary_op_map = {
"sum": operator.add,
"amin": torch.min,
"amax": torch.max,
"mean": operator.add,
"prod": operator.mul,
}
NUM_ROWS = 10
NUM_COLS = 15
def _coalesce_dense(row, col, val, nrows, ncols, op):
# Sparse matrix coalescing on a dense matrix.
#
# It is done by stacking every non-zero entry on an individual slice
# of an (nrows x ncols x nnz), that is, construct a tensor A with
# shape (nrows, ncols, len(val)) where
#
# A[row[i], col[i], i] = val[i]
#
# and then reducing on the third "nnz" dimension.
#
# The mask matrix M has the same sparsity pattern as A with 1 being
# the non-zero entries. This is used for division if the reduce
# operator is mean.
M = torch.zeros(NUM_ROWS, NUM_COLS, device=F.ctx())
A = torch.full(
(NUM_ROWS, NUM_COLS, 20) + val.shape[1:],
default_entry[op],
device=F.ctx(),
dtype=val.dtype,
)
A = torch.index_put(A, (row, col, torch.arange(20)), val)
for i in range(20):
M[row[i], col[i]] += 1
if op == "mean":
A = A.sum(2)
else:
A = getattr(A, op)(2)
M = M.view(NUM_ROWS, NUM_COLS, *([1] * (val.dim() - 1)))
return A, M
# Add docstring tests of dglsp.reduction to unit tests
@pytest.mark.parametrize(
"func", ["reduce", "sum", "smin", "smax", "sprod", "smean"]
)
def test_docstring(func):
globs = {"torch": torch, "dglsp": dglsp}
runner = doctest.DebugRunner()
finder = doctest.DocTestFinder()
obj = getattr(dglsp, func)
for test in finder.find(obj, func, globs=globs):
runner.run(test)
@pytest.mark.parametrize("shape", [(20,), (20, 20)])
@pytest.mark.parametrize("op", ["sum", "amin", "amax", "mean", "prod"])
@pytest.mark.parametrize("use_reduce", [False, True])
def test_reduce_all(shape, op, use_reduce):
row = torch.randint(0, NUM_ROWS, (20,), device=F.ctx())
col = torch.randint(0, NUM_COLS, (20,), device=F.ctx())
val = torch.randn(*shape, device=F.ctx())
val2 = val.clone()
val = val.requires_grad_()
val2 = val2.requires_grad_()
A = dglsp.create_from_coo(row, col, val, shape=(NUM_ROWS, NUM_COLS))
A2, M = _coalesce_dense(row, col, val2, NUM_ROWS, NUM_COLS, op)
if not use_reduce:
output = getattr(A, dgl_op_map[op])()
else:
output = A.reduce(rtype=dgl_op_map[op])
if op == "mean":
output2 = A2.sum((0, 1)) / M.sum()
elif op == "prod":
output2 = A2.prod(0).prod(0) # prod() does not support tuple of dims
else:
output2 = getattr(A2, op)((0, 1))
assert (output - output2).abs().max() < 1e-4
head = torch.randn(*output.shape).to(val) if output.dim() > 0 else None
output.backward(head)
output2.backward(head)
assert (val.grad - val2.grad).abs().max() < 1e-4
@pytest.mark.parametrize("shape", [(20,), (20, 20)])
@pytest.mark.parametrize("dim", [0, 1])
@pytest.mark.parametrize("empty_nnz", [False, True])
@pytest.mark.parametrize("op", ["sum", "amin", "amax", "mean", "prod"])
@pytest.mark.parametrize("use_reduce", [False, True])
def test_reduce_along(shape, dim, empty_nnz, op, use_reduce):
row = torch.randint(0, NUM_ROWS, (20,), device=F.ctx())
col = torch.randint(0, NUM_COLS, (20,), device=F.ctx())
if dim == 0:
mask = torch.bincount(col, minlength=NUM_COLS) == 0
else:
mask = torch.bincount(row, minlength=NUM_ROWS) == 0
val = torch.randn(*shape, device=F.ctx())
val2 = val.clone()
val = val.requires_grad_()
val2 = val2.requires_grad_()
# empty_nnz controls whether at least one column or one row has no
# non-zero entry.
if empty_nnz:
row[row == 0] = 1
col[col == 0] = 1
A = dglsp.create_from_coo(row, col, val, shape=(NUM_ROWS, NUM_COLS))
A2, M = _coalesce_dense(row, col, val2, NUM_ROWS, NUM_COLS, op)
if not use_reduce:
output = getattr(A, dgl_op_map[op])(dim)
else:
output = A.reduce(dim=dim, rtype=dgl_op_map[op])
if op == "mean":
output2 = A2.sum(dim) / M.sum(dim)
else:
output2 = getattr(A2, op)(dim)
zero_entry_idx = (M.sum(dim) != 0).nonzero(as_tuple=True)[0]
output3 = torch.index_put(
torch.zeros_like(output2), (zero_entry_idx,), output2[zero_entry_idx]
)
assert (output - output3).abs().max() < 1e-4
head = torch.randn(*output.shape).to(val) if output.dim() > 0 else None
output.backward(head)
output3.backward(head)
assert (val.grad - val2.grad).abs().max() < 1e-4
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment