Unverified Commit acd21a6d authored by Quan (Andy) Gan's avatar Quan (Andy) Gan Committed by GitHub
Browse files

[Feature] Support direct creation from CSR and CSC (#3045)



* csr and csc creation

* fix

* fix

* fixes to adj transpose

* fine

* raise error if indptr did not match number of nodes

* fix

* huh?

* oh
Co-authored-by: default avatarMinjie Wang <wmjlyjemaine@gmail.com>
parent 2f7ca414
......@@ -110,7 +110,7 @@ class DiffPoolBatchedGraphLayer(nn.Module):
assign_tensor = torch.block_diag(*assign_tensor) # size = (sum_N, batch_size * N_a)
h = torch.matmul(torch.t(assign_tensor), feat)
adj = g.adjacency_matrix(transpose=False, ctx=device)
adj = g.adjacency_matrix(transpose=True, ctx=device)
adj_new = torch.sparse.mm(adj, assign_tensor)
adj_new = torch.mm(torch.t(assign_tensor), adj_new)
......
......@@ -393,7 +393,7 @@ class CSRMM(mx.autograd.Function):
def forward(self, A_weights, B_weights):
gidxC, C_weights = _csrmm(self.gidxA, A_weights, self.gidxB, B_weights, self.num_vtypes)
nrows, ncols, C_indptr, C_indices, C_eids = gidxC.adjacency_matrix_tensors(0, True, 'csr')
nrows, ncols, C_indptr, C_indices, C_eids = gidxC.adjacency_matrix_tensors(0, False, 'csr')
# Note: the returned C_indptr, C_indices and C_eids tensors MUST be the same
# as the underlying tensors of the created graph gidxC.
self.backward_cache = gidxC
......@@ -430,7 +430,7 @@ class CSRSum(mx.autograd.Function):
def forward(self, *weights):
gidxC, C_weights = _csrsum(self.gidxs, weights)
nrows, ncols, C_indptr, C_indices, C_eids = gidxC.adjacency_matrix_tensors(
0, True, 'csr')
0, False, 'csr')
# Note: the returned C_indptr, C_indices and C_eids tensors MUST be the same
# as the underlying tensors of the created graph gidxC.
self.backward_cache = gidxC
......
......@@ -310,7 +310,7 @@ class CSRMM(th.autograd.Function):
@staticmethod
def forward(ctx, gidxA, A_weights, gidxB, B_weights, num_vtypes):
gidxC, C_weights = _csrmm(gidxA, A_weights, gidxB, B_weights, num_vtypes)
nrows, ncols, C_indptr, C_indices, C_eids = gidxC.adjacency_matrix_tensors(0, True, 'csr')
nrows, ncols, C_indptr, C_indices, C_eids = gidxC.adjacency_matrix_tensors(0, False, 'csr')
# Note: the returned C_indptr, C_indices and C_eids tensors MUST be the same
# as the underlying tensors of the created graph gidxC.
ctx.backward_cache = gidxA, gidxB, gidxC
......@@ -337,7 +337,7 @@ class CSRSum(th.autograd.Function):
# PyTorch tensors must be explicit arguments of the forward function
gidxC, C_weights = _csrsum(gidxs, weights)
nrows, ncols, C_indptr, C_indices, C_eids = gidxC.adjacency_matrix_tensors(
0, True, 'csr')
0, False, 'csr')
# Note: the returned C_indptr, C_indices and C_eids tensors MUST be the same
# as the underlying tensors of the created graph gidxC.
ctx.backward_cache = gidxs, gidxC
......
......@@ -302,7 +302,7 @@ def scatter_add(x, idx, m):
def csrmm_real(gidxA, A_weights, gidxB, B_weights, num_vtypes):
gidxC, C_weights = _csrmm(gidxA, A_weights, gidxB, B_weights, num_vtypes)
nrows, ncols, C_indptr, C_indices, C_eids = gidxC.adjacency_matrix_tensors(0, True, 'csr')
nrows, ncols, C_indptr, C_indices, C_eids = gidxC.adjacency_matrix_tensors(0, False, 'csr')
def grad(dnrows, dncols, dC_indptr, dC_indices, dC_eids, dC_weights):
# Only the last argument is meaningful.
......@@ -328,7 +328,7 @@ def csrmm(gidxA, A_weights, gidxB, B_weights, num_vtypes):
def csrsum_real(gidxs, weights):
gidxC, C_weights = _csrsum(gidxs, weights)
nrows, ncols, C_indptr, C_indices, C_eids = gidxC.adjacency_matrix_tensors(0, True, 'csr')
nrows, ncols, C_indptr, C_indices, C_eids = gidxC.adjacency_matrix_tensors(0, False, 'csr')
def grad(dnrows, dncols, dC_indptr, dC_indices, dC_eids, dC_weights):
# Only the last argument is meaningful.
......
......@@ -53,8 +53,18 @@ def graph(data,
DGL calls this format "tuple of node-tensors". The tensors should have the same
data type of int32/int64 and device context (see below the descriptions of
:attr:`idtype` and :attr:`device`).
- ``(iterable[int], iterable[int])``: Similar to the tuple of node-tensors
format, but stores node IDs in two sequences (e.g. list, tuple, numpy.ndarray).
- ``('coo', (Tensor, Tensor))``: Same as ``(Tensor, Tensor)``.
- ``('csr', (Tensor, Tensor, Tensor))``: The three tensors form the CSR representation
of the graph's adjacency matrix. The first one is the row index pointer. The
second one is the column indices. The third one is the edge IDs, which can be empty
to represent consecutive integer IDs starting from 0.
- ``('csc', (Tensor, Tensor, Tensor))``: The three tensors form the CSC representation
of the graph's adjacency matrix. The first one is the column index pointer. The
second one is the row indices. The third one is the edge IDs, which can be empty
to represent consecutive integer IDs starting from 0.
The tensors can be replaced with any iterable of integers (e.g. list, tuple,
numpy.ndarray).
ntype : str, optional
Deprecated. To construct a graph with named node types, use :func:`dgl.heterograph`.
etype : str, optional
......@@ -131,6 +141,14 @@ def graph(data,
>>> g = dgl.graph((src_ids, dst_ids), idtype=torch.int32, device='cuda:0')
Creating a graph with CSR representation:
>>> g = dgl.graph(('csr', ([0, 0, 0, 1, 2, 3], [1, 2, 3], [])))
Create the same graph with CSR representation and edge IDs.
>>> g = dgl.graph(('csr', ([0, 0, 0, 1, 2, 3], [1, 2, 3], [0, 1, 2])))
See Also
--------
from_scipy
......@@ -158,16 +176,15 @@ def graph(data,
" Please refer to their API documents for more details.".format(
deprecated_kwargs.keys()))
u, v, urange, vrange = utils.graphdata2tensors(data, idtype)
(sparse_fmt, arrays), urange, vrange = utils.graphdata2tensors(data, idtype)
if num_nodes is not None: # override the number of nodes
if num_nodes < max(urange, vrange):
raise DGLError('The num_nodes argument must be larger than the max ID in the data,'
' but got {} and {}.'.format(num_nodes, max(urange, vrange) - 1))
urange, vrange = num_nodes, num_nodes
g = create_from_edges(u, v, '_N', '_E', '_N', urange, vrange,
row_sorted=row_sorted, col_sorted=col_sorted,
validate=False)
g = create_from_edges(sparse_fmt, arrays, '_N', '_E', '_N', urange, vrange,
row_sorted=row_sorted, col_sorted=col_sorted)
return g.to(device)
......@@ -226,8 +243,18 @@ def heterograph(data_dict,
this format "tuple of node-tensors". The tensors should have the same data type,
which must be either int32 or int64. They should also have the same device context
(see below the descriptions of :attr:`idtype` and :attr:`device`).
- ``(iterable[int], iterable[int])``: Similar to the tuple of node-tensors
format, but stores node IDs in two sequences (e.g. list, tuple, numpy.ndarray).
- ``('coo', (Tensor, Tensor))``: Same as ``(Tensor, Tensor)``.
- ``('csr', (Tensor, Tensor, Tensor))``: The three tensors form the CSR representation
of the graph's adjacency matrix. The first one is the row index pointer. The
second one is the column indices. The third one is the edge IDs, which can be empty
(i.e. with 0 elements) to represent consecutive integer IDs starting from 0.
- ``('csc', (Tensor, Tensor, Tensor))``: The three tensors form the CSC representation
of the graph's adjacency matrix. The first one is the column index pointer. The
second one is the row indices. The third one is the edge IDs, which can be empty
to represent consecutive integer IDs starting from 0.
The tensors can be replaced with any iterable of integers (e.g. list, tuple,
numpy.ndarray).
num_nodes_dict : dict[str, int], optional
The number of nodes for some node types, which is a dictionary mapping a node type
:math:`T` to the number of :math:`T`-typed nodes. If not given for a node type
......@@ -320,8 +347,9 @@ def heterograph(data_dict,
raise DGLError("dgl.heterograph no longer supports graph construction from a NetworkX "
"graph, use dgl.from_networkx instead.")
is_bipartite = (sty != dty)
u, v, urange, vrange = utils.graphdata2tensors(data, idtype, bipartite=is_bipartite)
node_tensor_dict[(sty, ety, dty)] = (u, v)
(sparse_fmt, arrays), urange, vrange = utils.graphdata2tensors(
data, idtype, bipartite=is_bipartite)
node_tensor_dict[(sty, ety, dty)] = (sparse_fmt, arrays)
if need_infer:
num_nodes_dict[sty] = max(num_nodes_dict[sty], urange)
num_nodes_dict[dty] = max(num_nodes_dict[dty], vrange)
......@@ -340,8 +368,8 @@ def heterograph(data_dict,
num_nodes_per_type = utils.toindex([num_nodes_dict[ntype] for ntype in ntypes], "int64")
rel_graphs = []
for srctype, etype, dsttype in relations:
src, dst = node_tensor_dict[(srctype, etype, dsttype)]
g = create_from_edges(src, dst, srctype, etype, dsttype,
sparse_fmt, arrays = node_tensor_dict[(srctype, etype, dsttype)]
g = create_from_edges(sparse_fmt, arrays, srctype, etype, dsttype,
num_nodes_dict[srctype], num_nodes_dict[dsttype])
rel_graphs.append(g)
......@@ -368,8 +396,18 @@ def create_block(data_dict, num_src_nodes=None, num_dst_nodes=None, idtype=None,
this format "tuple of node-tensors". The tensors should have the same data type,
which must be either int32 or int64. They should also have the same device context
(see below the descriptions of :attr:`idtype` and :attr:`device`).
- ``(iterable[int], iterable[int])``: Similar to the tuple of node-tensors
format, but stores node IDs in two sequences (e.g. list, tuple, numpy.ndarray).
- ``('coo', (Tensor, Tensor))``: Same as ``(Tensor, Tensor)``.
- ``('csr', (Tensor, Tensor, Tensor))``: The three tensors form the CSR representation
of the graph's adjacency matrix. The first one is the row index pointer. The
second one is the column indices. The third one is the edge IDs, which can be empty
to represent consecutive integer IDs starting from 0.
- ``('csc', (Tensor, Tensor, Tensor))``: The three tensors form the CSC representation
of the graph's adjacency matrix. The first one is the column index pointer. The
second one is the row indices. The third one is the edge IDs, which can be empty
to represent consecutive integer IDs starting from 0.
The tensors can be replaced with any iterable of integers (e.g. list, tuple,
numpy.ndarray).
If you would like to create a MFG with a single source node type, a single destination
node type, and a single edge type, then you can pass in the graph data directly
......@@ -489,8 +527,9 @@ def create_block(data_dict, num_src_nodes=None, num_dst_nodes=None, idtype=None,
# Convert all data to node tensors first
node_tensor_dict = {}
for (sty, ety, dty), data in data_dict.items():
u, v, urange, vrange = utils.graphdata2tensors(data, idtype, bipartite=True)
node_tensor_dict[(sty, ety, dty)] = (u, v)
(sparse_fmt, arrays), urange, vrange = utils.graphdata2tensors(
data, idtype, bipartite=True)
node_tensor_dict[(sty, ety, dty)] = (sparse_fmt, arrays)
if need_infer:
num_src_nodes[sty] = max(num_src_nodes[sty], urange)
num_dst_nodes[dty] = max(num_dst_nodes[dty], vrange)
......@@ -525,8 +564,8 @@ def create_block(data_dict, num_src_nodes=None, num_dst_nodes=None, idtype=None,
meta_edges_src.append(srctype_dict[srctype])
meta_edges_dst.append(dsttype_dict[dsttype])
etypes.append(etype)
src, dst = node_tensor_dict[(srctype, etype, dsttype)]
g = create_from_edges(src, dst, 'SRC/' + srctype, etype, 'DST/' + dsttype,
sparse_fmt, arrays = node_tensor_dict[(srctype, etype, dsttype)]
g = create_from_edges(sparse_fmt, arrays, 'SRC/' + srctype, etype, 'DST/' + dsttype,
num_src_nodes[srctype], num_dst_nodes[dsttype])
rel_graphs.append(g)
......@@ -1041,8 +1080,8 @@ def from_scipy(sp_mat,
raise DGLError('Expect the number of rows to be the same as the number of columns for '
'sp_mat, got {:d} and {:d}.'.format(num_rows, num_cols))
u, v, urange, vrange = utils.graphdata2tensors(sp_mat, idtype)
g = create_from_edges(u, v, '_N', '_E', '_N', urange, vrange)
(sparse_fmt, arrays), urange, vrange = utils.graphdata2tensors(sp_mat, idtype)
g = create_from_edges(sparse_fmt, arrays, '_N', '_E', '_N', urange, vrange)
if eweight_name is not None:
g.edata[eweight_name] = F.tensor(sp_mat.data)
return g.to(device)
......@@ -1135,9 +1174,8 @@ def bipartite_from_scipy(sp_mat,
heterograph
bipartite_from_networkx
"""
# Sanity check
u, v, urange, vrange = utils.graphdata2tensors(sp_mat, idtype, bipartite=True)
g = create_from_edges(u, v, utype, etype, vtype, urange, vrange)
(sparse_fmt, arrays), urange, vrange = utils.graphdata2tensors(sp_mat, idtype, bipartite=True)
g = create_from_edges(sparse_fmt, arrays, utype, etype, vtype, urange, vrange)
if eweight_name is not None:
g.edata[eweight_name] = F.tensor(sp_mat.data)
return g.to(device)
......@@ -1255,10 +1293,10 @@ def from_networkx(nx_graph,
if not nx_graph.is_directed():
nx_graph = nx_graph.to_directed()
u, v, urange, vrange = utils.graphdata2tensors(
(sparse_fmt, arrays), urange, vrange = utils.graphdata2tensors(
nx_graph, idtype, edge_id_attr_name=edge_id_attr_name)
g = create_from_edges(u, v, '_N', '_E', '_N', urange, vrange)
g = create_from_edges(sparse_fmt, arrays, '_N', '_E', '_N', urange, vrange)
# nx_graph.edges(data=True) returns src, dst, attr_dict
has_edge_id = nx_graph.number_of_edges() > 0 and edge_id_attr_name is not None
......@@ -1450,12 +1488,12 @@ def bipartite_from_networkx(nx_graph,
bottom_map = {n : i for i, n in enumerate(bottom_nodes)}
# Get the node tensors and the number of nodes
u, v, urange, vrange = utils.graphdata2tensors(
(sparse_fmt, arrays), urange, vrange = utils.graphdata2tensors(
nx_graph, idtype, bipartite=True,
edge_id_attr_name=edge_id_attr_name,
top_map=top_map, bottom_map=bottom_map)
g = create_from_edges(u, v, utype, etype, vtype, urange, vrange)
g = create_from_edges(sparse_fmt, arrays, utype, etype, vtype, urange, vrange)
# nx_graph.edges(data=True) returns src, dst, attr_dict
has_edge_id = nx_graph.number_of_edges() > 0 and edge_id_attr_name is not None
......@@ -1586,10 +1624,9 @@ DGLHeteroGraph.to_networkx = to_networkx
# Internal APIs
############################################################
def create_from_edges(u, v,
def create_from_edges(sparse_fmt, arrays,
utype, etype, vtype,
urange, vrange,
validate=True,
row_sorted=False,
col_sorted=False):
"""Internal function to create a graph from incident nodes with types.
......@@ -1598,10 +1635,10 @@ def create_from_edges(u, v,
Parameters
----------
u : Tensor
Source node IDs.
v : Tensor
Dest node IDs.
sparse_fmt : str
The sparse adjacency matrix format.
arrays : tuple[Tensor]
The sparse adjacency matrix arrays.
utype : str
Source node type name.
etype : str
......@@ -1614,8 +1651,6 @@ def create_from_edges(u, v,
vrange : int, optional
The destination node ID range. If None, the value is the
maximum of the destination node IDs in the edge list plus 1. (Default: None)
validate : bool, optional
If True, checks if node IDs are within range.
row_sorted : bool, optional
Whether or not the rows of the COO are in ascending order.
col_sorted : bool, optional
......@@ -1627,24 +1662,21 @@ def create_from_edges(u, v,
-------
DGLHeteroGraph
"""
if validate:
if urange is not None and len(u) > 0 and \
urange <= F.as_scalar(F.max(u, dim=0)):
raise DGLError('Invalid node id {} (should be less than cardinality {}).'.format(
urange, F.as_scalar(F.max(u, dim=0))))
if vrange is not None and len(v) > 0 and \
vrange <= F.as_scalar(F.max(v, dim=0)):
raise DGLError('Invalid node id {} (should be less than cardinality {}).'.format(
vrange, F.as_scalar(F.max(v, dim=0))))
if utype == vtype:
num_ntypes = 1
else:
num_ntypes = 2
hgidx = heterograph_index.create_unitgraph_from_coo(
num_ntypes, urange, vrange, u, v, ['coo', 'csr', 'csc'],
row_sorted, col_sorted)
if sparse_fmt == 'coo':
u, v = arrays
hgidx = heterograph_index.create_unitgraph_from_coo(
num_ntypes, urange, vrange, u, v, ['coo', 'csr', 'csc'],
row_sorted, col_sorted)
else: # 'csr' or 'csc'
indptr, indices, eids = arrays
hgidx = heterograph_index.create_unitgraph_from_csr(
num_ntypes, urange, vrange, indptr, indices, eids, ['coo', 'csr', 'csc'],
sparse_fmt == 'csc')
if utype == vtype:
return DGLHeteroGraph(hgidx, [utype], [etype])
else:
......
......@@ -6,7 +6,6 @@ import numpy as np
from .utils import save_graphs, load_graphs, _get_dgl_url
from ..convert import heterograph
from ..utils import graphdata2tensors
from .dgl_dataset import DGLBuiltinDataset
from .. import backend as F
......@@ -106,8 +105,9 @@ class FraudDataset(DGLBuiltinDataset):
graph_data = {}
for relation in self.relations[self.name]:
u, v, _, _ = graphdata2tensors(data[relation])
graph_data[(self.node_name[self.name], relation, self.node_name[self.name])] = (u, v)
adj = data[relation].tocoo()
row, col = adj.row, adj.col
graph_data[(self.node_name[self.name], relation, self.node_name[self.name])] = (row, col)
g = heterograph(graph_data)
g.ndata['feature'] = F.tensor(node_features)
......
......@@ -69,9 +69,14 @@ class DGLHeteroGraph(object):
if not isinstance(gidx, heterograph_index.HeteroGraphIndex):
dgl_warning('Recommend creating graphs by `dgl.graph(data)`'
' instead of `dgl.DGLGraph(data)`.')
u, v, num_src, num_dst = utils.graphdata2tensors(gidx)
gidx = heterograph_index.create_unitgraph_from_coo(
1, num_src, num_dst, u, v, ['coo', 'csr', 'csc'])
(sparse_fmt, arrays), num_src, num_dst = utils.graphdata2tensors(gidx)
if sparse_fmt == 'coo':
gidx = heterograph_index.create_unitgraph_from_coo(
1, num_src, num_dst, arrays[0], arrays[1], ['coo', 'csr', 'csc'])
else:
gidx = heterograph_index.create_unitgraph_from_csr(
1, num_src, num_dst, arrays[0], arrays[1], arrays[2], ['coo', 'csr', 'csc'],
sparse_fmt == 'csc')
if len(deprecate_kwargs) != 0:
dgl_warning('Keyword arguments {} are deprecated in v0.5, and can be safely'
' removed in all cases.'.format(list(deprecate_kwargs.keys())))
......@@ -3506,23 +3511,23 @@ class DGLHeteroGraph(object):
else:
return deg
def adjacency_matrix(self, transpose=True, ctx=F.cpu(), scipy_fmt=None, etype=None):
def adjacency_matrix(self, transpose=False, ctx=F.cpu(), scipy_fmt=None, etype=None):
"""Alias of :meth:`adj`"""
return self.adj(transpose, ctx, scipy_fmt, etype)
def adj(self, transpose=True, ctx=F.cpu(), scipy_fmt=None, etype=None):
def adj(self, transpose=False, ctx=F.cpu(), scipy_fmt=None, etype=None):
"""Return the adjacency matrix of edges of the given edge type.
By default, a row of returned adjacency matrix represents the
destination of an edge and the column represents the source.
source of an edge and the column represents the destination.
When transpose is True, a row represents the source and a column
represents a destination.
When transpose is True, a row represents the destination and a column
represents the source.
Parameters
----------
transpose : bool, optional
A flag to transpose the returned adjacency matrix. (Default: True)
A flag to transpose the returned adjacency matrix. (Default: False)
ctx : context, optional
The context of returned adjacency matrix. (Default: cpu)
scipy_fmt : str, optional
......@@ -3578,8 +3583,52 @@ class DGLHeteroGraph(object):
else:
return self._graph.adjacency_matrix_scipy(etid, transpose, scipy_fmt, False)
def adj_sparse(self, fmt, etype=None):
"""Return the adjacency matrix of edges of the given edge type as tensors of
a sparse matrix representation.
By default, a row of returned adjacency matrix represents the
source of an edge and the column represents the destination.
Parameters
----------
fmt : str
Either ``coo``, ``csr`` or ``csc``.
etype : str or (str, str, str), optional
The type names of the edges. The allowed type name formats are:
* ``(str, str, str)`` for source node type, edge type and destination node type.
* or one ``str`` edge type name if the name can uniquely identify a
triplet format in the graph.
Can be omitted if the graph has only one type of edges.
Returns
-------
tuple[Tensor]
If :attr:`fmt` is ``coo``, returns a pair of source and destination node ID
tensors.
If :attr:`fmt` is ``csr`` or ``csc``, return the CSR or CSC representation
of the adjacency matrix as a triplet of tensors
``(indptr, indices, edge_ids)``. Namely ``edge_ids`` could be an empty
tensor with 0 elements, in which case the edge IDs are consecutive
integers starting from 0.
Examples
--------
>>> g = dgl.graph(([0, 1, 2], [1, 2, 3]))
>>> g.adj_sparse('coo')
>>> g.adj_sparse('csr')
"""
etid = self.get_etype_id(etype)
if fmt == 'csc':
# The first two elements are number of rows and columns
return self._graph.adjacency_matrix_tensors(etid, True, 'csr')[2:]
else:
return self._graph.adjacency_matrix_tensors(etid, False, fmt)[2:]
def adjacency_matrix_scipy(self, transpose=True, fmt='csr', return_edge_ids=None):
def adjacency_matrix_scipy(self, transpose=False, fmt='csr', return_edge_ids=None):
"""DEPRECATED: please use ``dgl.adjacency_matrix(transpose, scipy_fmt=fmt)``.
"""
dgl_warning('DGLGraph.adjacency_matrix_scipy is deprecated. '
......
......@@ -600,11 +600,11 @@ class HeteroGraphIndex(ObjectBase):
def adjacency_matrix(self, etype, transpose, ctx):
"""Return the adjacency matrix representation of this graph.
By default, a row of returned adjacency matrix represents the destination
of an edge and the column represents the source.
By default, a row of returned adjacency matrix represents the source
of an edge and the column represents the destination.
When transpose is True, a row represents the source and a column represents
a destination.
When transpose is True, a row represents the destination and a column represents
the source.
Parameters
----------
......@@ -630,8 +630,8 @@ class HeteroGraphIndex(ObjectBase):
rst = _CAPI_DGLHeteroGetAdj(self, int(etype), transpose, fmt)
# convert to framework-specific sparse matrix
srctype, dsttype = self.metagraph.find_edge(etype)
nrows = self.number_of_nodes(srctype) if transpose else self.number_of_nodes(dsttype)
ncols = self.number_of_nodes(dsttype) if transpose else self.number_of_nodes(srctype)
nrows = self.number_of_nodes(dsttype) if transpose else self.number_of_nodes(srctype)
ncols = self.number_of_nodes(srctype) if transpose else self.number_of_nodes(dsttype)
nnz = self.number_of_edges(etype)
if fmt == "csr":
indptr = F.copy_to(F.from_dgl_nd(rst(0)), ctx)
......@@ -653,11 +653,11 @@ class HeteroGraphIndex(ObjectBase):
def adjacency_matrix_tensors(self, etype, transpose, fmt):
"""Return the adjacency matrix as a triplet of tensors.
By default, a row of returned adjacency matrix represents the destination
of an edge and the column represents the source.
By default, a row of returned adjacency matrix represents the source
of an edge and the column represents the destination.
When transpose is True, a row represents the source and a column represents
a destination.
When transpose is True, a row represents the destination and a column represents
the source.
Parameters
----------
......@@ -689,8 +689,8 @@ class HeteroGraphIndex(ObjectBase):
rst = _CAPI_DGLHeteroGetAdj(self, int(etype), transpose, fmt)
srctype, dsttype = self.metagraph.find_edge(etype)
nrows = self.number_of_nodes(srctype) if transpose else self.number_of_nodes(dsttype)
ncols = self.number_of_nodes(dsttype) if transpose else self.number_of_nodes(srctype)
nrows = self.number_of_nodes(dsttype) if transpose else self.number_of_nodes(srctype)
ncols = self.number_of_nodes(srctype) if transpose else self.number_of_nodes(dsttype)
nnz = self.number_of_edges(etype)
if fmt == "csr":
indptr = F.from_dgl_nd(rst(0))
......@@ -919,9 +919,9 @@ class HeteroGraphIndex(ObjectBase):
The first element of the tuple is the shuffle order for outward graph
The second element of the tuple is the shuffle order for inward graph
"""
csr = _CAPI_DGLHeteroGetAdj(self, int(etype), True, "csr")
csr = _CAPI_DGLHeteroGetAdj(self, int(etype), False, "csr")
order = csr(2)
rev_csr = _CAPI_DGLHeteroGetAdj(self, int(etype), False, "csr")
rev_csr = _CAPI_DGLHeteroGetAdj(self, int(etype), True, "csr")
rev_order = rev_csr(2)
return utils.toindex(order, self.dtype), utils.toindex(rev_order, self.dtype)
......@@ -1106,7 +1106,7 @@ def create_unitgraph_from_coo(num_ntypes, num_src, num_dst, row, col,
formats, row_sorted, col_sorted)
def create_unitgraph_from_csr(num_ntypes, num_src, num_dst, indptr, indices, edge_ids,
formats):
formats, transpose=False):
"""Create a unitgraph graph index from CSR format
Parameters
......@@ -1125,6 +1125,8 @@ def create_unitgraph_from_csr(num_ntypes, num_src, num_dst, indptr, indices, edg
Edge shuffle id.
formats : str
Restrict the storage formats allowed for the unit graph.
transpose : bool, optional
If True, treats the input matrix as CSC.
Returns
-------
......@@ -1135,7 +1137,7 @@ def create_unitgraph_from_csr(num_ntypes, num_src, num_dst, indptr, indices, edg
return _CAPI_DGLHeteroCreateUnitGraphFromCSR(
int(num_ntypes), int(num_src), int(num_dst),
F.to_dgl_nd(indptr), F.to_dgl_nd(indices), F.to_dgl_nd(edge_ids),
formats)
formats, transpose)
def create_heterograph_from_relations(metagraph, rel_graphs, num_nodes_per_type):
"""Create a heterograph from metagraph and graphs of every relation.
......
......@@ -954,7 +954,7 @@ def khop_adj(g, k):
"""
assert g.is_homogeneous, \
'only homogeneous graph is supported'
adj_k = g.adj(scipy_fmt=g.formats()['created'][0]) ** k
adj_k = g.adj(transpose=True, scipy_fmt=g.formats()['created'][0]) ** k
return F.tensor(adj_k.todense().astype(np.float32))
def khop_graph(g, k, copy_ndata=True):
......@@ -1024,7 +1024,7 @@ def khop_graph(g, k, copy_ndata=True):
assert g.is_homogeneous, \
'only homogeneous graph is supported'
n = g.number_of_nodes()
adj_k = g.adj(transpose=True, scipy_fmt=g.formats()['created'][0]) ** k
adj_k = g.adj(transpose=False, scipy_fmt=g.formats()['created'][0]) ** k
adj_k = adj_k.tocoo()
multiplicity = adj_k.data
row = np.repeat(adj_k.row, multiplicity)
......@@ -1280,7 +1280,7 @@ def laplacian_lambda_max(g):
rst = []
for g_i in g_arr:
n = g_i.number_of_nodes()
adj = g_i.adj(scipy_fmt=g_i.formats()['created'][0]).astype(float)
adj = g_i.adj(transpose=True, scipy_fmt=g_i.formats()['created'][0]).astype(float)
norm = sparse.diags(F.asnumpy(g_i.in_degrees()).clip(1) ** -0.5, dtype=float)
laplacian = sparse.eye(n) - norm * adj * norm
rst.append(sparse.linalg.eigs(laplacian, 1, which='LM',
......@@ -1336,7 +1336,7 @@ def metapath_reachable_graph(g, metapath):
"""
adj = 1
for etype in metapath:
adj = adj * g.adj(etype=etype, scipy_fmt='csr', transpose=True)
adj = adj * g.adj(etype=etype, scipy_fmt='csr', transpose=False)
adj = (adj != 0).tocsr()
srctype = g.to_canonical_etype(metapath[0])[0]
......@@ -2845,12 +2845,12 @@ def sort_in_edges(g, tag, tag_offset_name='_TAG_OFFSET'):
-----------
>>> g = dgl.graph(([0,1,2,3,4,0,1,2],[0,0,0,0,0,1,1,1]))
>>> g.adjacency_matrix(scipy_fmt='csr', transpose=False).nonzero()
>>> g.adjacency_matrix(scipy_fmt='csr', transpose=True).nonzero()
(array([0, 0, 0, 0, 0, 1, 1, 1], dtype=int32),
array([0, 1, 2, 3, 4, 0, 1, 2], dtype=int32)))
>>> tag = torch.IntTensor([1,1,0,2,0])
>>> g_sorted = dgl.transform.sort_in_edges(g, tag)
>>> g_sorted.adjacency_matrix(scipy_fmt='csr', transpose=False).nonzero()
>>> g_sorted.adjacency_matrix(scipy_fmt='csr', transpose=True).nonzero()
(array([0, 0, 0, 0, 0, 1, 1, 1], dtype=int32),
array([2, 4, 0, 1, 3, 2, 0, 1], dtype=int32))
>>> g_sorted.ndata['_TAG_OFFSET']
......
"""Data utilities."""
from collections import namedtuple
import scipy as sp
import networkx as nx
......@@ -31,7 +32,9 @@ def elist2tensor(elist, idtype):
return F.tensor(u, idtype), F.tensor(v, idtype)
def scipy2tensor(spmat, idtype):
"""Function to convert a scipy matrix to edge tensors.
"""Function to convert a scipy matrix to a sparse adjacency matrix tuple.
Note that the data array of the scipy matrix is discarded.
Parameters
----------
......@@ -42,13 +45,20 @@ def scipy2tensor(spmat, idtype):
Returns
-------
(Tensor, Tensor)
Edge tensors.
(str, tuple[Tensor])
A tuple containing the format as well as the list of tensors representing
the sparse matrix.
"""
spmat = spmat.tocoo()
row = F.tensor(spmat.row, idtype)
col = F.tensor(spmat.col, idtype)
return row, col
if spmat.format in ['csr', 'csc']:
indptr = F.tensor(spmat.indptr, idtype)
indices = F.tensor(spmat.indices, idtype)
data = F.tensor([], idtype)
return SparseAdjTuple(spmat.format, (indptr, indices, data))
else:
spmat = spmat.tocoo()
row = F.tensor(spmat.row, idtype)
col = F.tensor(spmat.col, idtype)
return SparseAdjTuple('coo', (row, col))
def networkx2tensor(nx_graph, idtype, edge_id_attr_name=None):
"""Function to convert a networkx graph to edge tensors.
......@@ -96,6 +106,8 @@ def networkx2tensor(nx_graph, idtype, edge_id_attr_name=None):
dst = F.tensor(dst, idtype)
return src, dst
SparseAdjTuple = namedtuple('SparseAdjTuple', ['format', 'arrays'])
def graphdata2tensors(data, idtype=None, bipartite=False, **kwargs):
"""Function to convert various types of data to edge tensors and infer
the number of nodes.
......@@ -103,7 +115,14 @@ def graphdata2tensors(data, idtype=None, bipartite=False, **kwargs):
Parameters
----------
data : graph data
Various kinds of graph data.
Various kinds of graph data. Possible data types are:
- ``(row, col)``
- ``('coo', (row, col))``
- ``('csr', (indptr, indices, edge_ids))``
- ``('csc', (indptr, indices, edge_ids))``
- SciPy sparse matrix
- NetworkX graph
idtype : int32, int64, optional
Integer ID type. If None, try infer from the data and if fail use
int64.
......@@ -121,55 +140,52 @@ def graphdata2tensors(data, idtype=None, bipartite=False, **kwargs):
Returns
-------
src : Tensor
Src nodes.
dst : Tensor
Dst nodes.
data : SparseAdjTuple
A tuple with the sparse matrix format and the adjacency matrix tensors.
num_src : int
Number of source nodes
Number of source nodes.
num_dst : int
Number of destination nodes.
"""
if idtype is None and not (isinstance(data, tuple) and F.is_tensor(data[0])):
# Convert tuple to SparseAdjTuple
if isinstance(data, tuple):
if not isinstance(data[0], str):
# (row, col) format, convert to ('coo', (row, col))
data = ('coo', data)
data = SparseAdjTuple(*data)
if idtype is None and \
not (isinstance(data, SparseAdjTuple) and F.is_tensor(data.arrays[0])):
# preferred default idtype is int64
# if data is tensor and idtype is None, infer the idtype from tensor
idtype = F.int64
checks.check_valid_idtype(idtype)
if isinstance(data, tuple) and (not F.is_tensor(data[0]) or not F.is_tensor(data[1])):
if isinstance(data, SparseAdjTuple) and (not all(F.is_tensor(a) for a in data.arrays)):
# (Iterable, Iterable) type data, convert it to (Tensor, Tensor)
if len(data[0]) == 0:
if len(data.arrays[0]) == 0:
# force idtype for empty list
data = F.tensor(data[0], idtype), F.tensor(data[1], idtype)
data = SparseAdjTuple(data.format, tuple(F.tensor(a, idtype) for a in data.arrays))
else:
# convert the iterable to tensor and keep its native data type so we can check
# its validity later
data = F.tensor(data[0]), F.tensor(data[1])
data = SparseAdjTuple(data.format, tuple(F.tensor(a) for a in data.arrays))
if isinstance(data, tuple):
# (Tensor, Tensor) type data
src, dst = data
# sanity checks
# TODO(minjie): move these checks to C for faster graph construction.
if F.dtype(src) != F.dtype(dst):
raise DGLError('Expect the source and destination node IDs to have the same type,'
' but got {} and {}.'.format(F.dtype(src), F.dtype(dst)))
if F.context(src) != F.context(dst):
raise DGLError('Expect the source and destination node IDs to be on the same device,'
' but got {} and {}.'.format(F.context(src), F.context(dst)))
if F.dtype(src) not in (F.int32, F.int64):
raise DGLError('Expect the source ID tensor to have data type int32 or int64,'
' but got {}.'.format(F.dtype(src)))
if F.dtype(dst) not in (F.int32, F.int64):
raise DGLError('Expect the destination ID tensor to have data type int32 or int64,'
' but got {}.'.format(F.dtype(dst)))
if isinstance(data, SparseAdjTuple):
if idtype is not None:
src, dst = F.astype(src, idtype), F.astype(dst, idtype)
data = SparseAdjTuple(data.format, tuple(F.astype(a, idtype) for a in data.arrays))
num_src, num_dst = infer_num_nodes(data, bipartite=bipartite)
elif isinstance(data, list):
src, dst = elist2tensor(data, idtype)
data = SparseAdjTuple('coo', (src, dst))
num_src, num_dst = infer_num_nodes(data, bipartite=bipartite)
elif isinstance(data, sp.sparse.spmatrix):
src, dst = scipy2tensor(data, idtype)
# We can get scipy matrix's number of rows and columns easily.
num_src, num_dst = infer_num_nodes(data, bipartite=bipartite)
data = scipy2tensor(data, idtype)
elif isinstance(data, nx.Graph):
# We can get networkx graph's number of sources and destinations easily.
num_src, num_dst = infer_num_nodes(data, bipartite=bipartite)
edge_id_attr_name = kwargs.get('edge_id_attr_name', None)
if bipartite:
top_map = kwargs.get('top_map')
......@@ -180,22 +196,11 @@ def graphdata2tensors(data, idtype=None, bipartite=False, **kwargs):
else:
src, dst = networkx2tensor(
data, idtype, edge_id_attr_name=edge_id_attr_name)
data = SparseAdjTuple('coo', (src, dst))
else:
raise DGLError('Unsupported graph data type:', type(data))
if len(src) != len(dst):
raise DGLError('Expect the source and destination ID tensors to have the same length,'
' but got {} and {}.'.format(len(src), len(dst)))
if len(src) > 0 and (F.as_scalar(F.min(src, 0)) < 0 or F.as_scalar(F.min(dst, 0)) < 0):
raise DGLError('All IDs must be non-negative integers.')
# infer number of nodes
infer_from_raw = infer_num_nodes(data, bipartite=bipartite)
if infer_from_raw is None:
num_src, num_dst = infer_num_nodes((src, dst), bipartite=bipartite)
else:
num_src, num_dst = infer_from_raw
return src, dst, num_src, num_dst
return data, num_src, num_dst
def networkxbipartite2tensors(nx_graph, idtype, top_map, bottom_map, edge_id_attr_name=None):
"""Function to convert a networkx bipartite to edge tensors.
......@@ -264,9 +269,11 @@ def infer_num_nodes(data, bipartite=False):
----------
data : graph data
Supported types are:
* Tensor pair (u, v)
* SciPy matrix
* NetworkX graph
* SparseTuple ``(sparse_fmt, arrays)`` where ``arrays`` can be either ``(src, dst)`` or
``(indptr, indices, data)``.
* SciPy matrix.
* NetworkX graph.
bipartite : bool, optional
Whether infer number of nodes of a bipartite graph --
num_src and num_dst can be different.
......@@ -283,10 +290,27 @@ def infer_num_nodes(data, bipartite=False):
None
If the inference failed.
"""
if isinstance(data, tuple) and len(data) == 2 and F.is_tensor(data[0]):
u, v = data
nsrc = F.as_scalar(F.max(u, dim=0)) + 1 if len(u) > 0 else 0
ndst = F.as_scalar(F.max(v, dim=0)) + 1 if len(v) > 0 else 0
if isinstance(data, tuple) and len(data) == 2:
if not isinstance(data[0], str):
raise TypeError('Expected sparse format as a str, but got %s' % type(data[0]))
if data[0] == 'coo':
# ('coo', (src, dst)) format
u, v = data[1]
nsrc = F.as_scalar(F.max(u, dim=0)) + 1 if len(u) > 0 else 0
ndst = F.as_scalar(F.max(v, dim=0)) + 1 if len(v) > 0 else 0
elif data[0] == 'csr':
# ('csr', (indptr, indices, eids)) format
indptr, indices, _ = data[1]
nsrc = F.shape(indptr)[0] - 1
ndst = F.as_scalar(F.max(indices, dim=0)) + 1 if len(indices) > 0 else 0
elif data[0] == 'csc':
# ('csc', (indptr, indices, eids)) format
indptr, indices, _ = data[1]
ndst = F.shape(indptr)[0] - 1
nsrc = F.as_scalar(F.max(indices, dim=0)) + 1 if len(indices) > 0 else 0
else:
raise ValueError('unknown format %s' % data[0])
elif isinstance(data, sp.sparse.spmatrix):
nsrc, ndst = data.shape[0], data.shape[1]
elif isinstance(data, nx.Graph):
......
......@@ -52,14 +52,20 @@ DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroCreateUnitGraphFromCSR")
IdArray indices = args[4];
IdArray edge_ids = args[5];
List<Value> formats = args[6];
bool transpose = args[7];
std::vector<SparseFormat> formats_vec;
for (Value val : formats) {
std::string fmt = val->data;
formats_vec.push_back(ParseSparseFormat(fmt));
}
const auto code = SparseFormatsToCode(formats_vec);
auto hgptr = CreateFromCSR(nvtypes, num_src, num_dst, indptr, indices, edge_ids, code);
*rv = HeteroGraphRef(hgptr);
if (!transpose) {
auto hgptr = CreateFromCSR(nvtypes, num_src, num_dst, indptr, indices, edge_ids, code);
*rv = HeteroGraphRef(hgptr);
} else {
auto hgptr = CreateFromCSC(nvtypes, num_src, num_dst, indptr, indices, edge_ids, code);
*rv = HeteroGraphRef(hgptr);
}
});
DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroCreateHeteroGraph")
......
......@@ -456,6 +456,8 @@ class UnitGraph::CSR : public BaseHeteroGraph {
if (aten::IsValidIdArray(edge_ids))
CHECK((indices->shape[0] == edge_ids->shape[0]) || aten::IsNullArray(edge_ids))
<< "edge id arrays should have the same length as indices if not empty";
CHECK_EQ(num_src, indptr->shape[0] - 1)
<< "number of nodes do not match the length of indptr minus 1.";
adj_ = aten::CSRMatrix{num_src, num_dst, indptr, indices, edge_ids};
}
......@@ -1070,10 +1072,10 @@ std::vector<IdArray> UnitGraph::GetAdj(
// to_scipy_sparse_matrix. With the upcoming custom kernel change, we should change the
// behavior and make row for src and col for dst.
if (fmt == std::string("csr")) {
return transpose? GetOutCSR()->GetAdj(etype, false, "csr")
return !transpose ? GetOutCSR()->GetAdj(etype, false, "csr")
: GetInCSR()->GetAdj(etype, false, "csr");
} else if (fmt == std::string("coo")) {
return GetCOO()->GetAdj(etype, !transpose, fmt);
return GetCOO()->GetAdj(etype, transpose, fmt);
} else {
LOG(FATAL) << "unsupported adjacency matrix format: " << fmt;
return {};
......
......@@ -33,7 +33,7 @@ def test_csrmm(idtype, dtype):
a, A = _random_simple_graph(idtype, dtype, F.ctx(), 500, 600, 9000, 'A', 'B', 'AB')
b, B = _random_simple_graph(idtype, dtype, F.ctx(), 600, 700, 9000, 'B', 'C', 'BC')
C, C_weights = dgl.sparse._csrmm(A._graph, A.edata['w'], B._graph, B.edata['w'], 2)
C_adj = C.adjacency_matrix_scipy(0, True, 'csr')
C_adj = C.adjacency_matrix_scipy(0, False, 'csr')
C_adj.data = F.asnumpy(C_weights)
C_adj = F.tensor(C_adj.todense(), dtype=dtype)
c = F.tensor((a * b).todense(), dtype=dtype)
......@@ -83,7 +83,7 @@ def test_csrsum(idtype, dtype):
a, A = _random_simple_graph(idtype, dtype, F.ctx(), 500, 600, 9000, 'A', 'B', 'AB')
b, B = _random_simple_graph(idtype, dtype, F.ctx(), 500, 600, 9000, 'A', 'B', 'AB')
C, C_weights = dgl.sparse._csrsum([A._graph, B._graph], [A.edata['w'], B.edata['w']])
C_adj = C.adjacency_matrix_scipy(0, True, 'csr')
C_adj = C.adjacency_matrix_scipy(0, False, 'csr')
C_adj.data = F.asnumpy(C_weights)
C_adj = F.tensor(C_adj.todense(), dtype=dtype)
c = F.tensor((a + b).todense(), dtype=dtype)
......
......@@ -118,9 +118,9 @@ def test_query():
assert F.allclose(g.out_degrees([8, 9]), F.tensor([0, 1]))
assert np.array_equal(
F.sparse_to_numpy(g.adjacency_matrix(transpose=False)), scipy_coo_input().toarray().T)
F.sparse_to_numpy(g.adjacency_matrix(transpose=True)), scipy_coo_input().toarray().T)
assert np.array_equal(
F.sparse_to_numpy(g.adjacency_matrix(transpose=True)), scipy_coo_input().toarray())
F.sparse_to_numpy(g.adjacency_matrix(transpose=False)), scipy_coo_input().toarray())
def _test(g):
# test twice to see whether the cached format works or not
......@@ -192,9 +192,9 @@ def test_query():
assert F.allclose(g.out_degrees([8, 9]), F.tensor([0, 1]))
assert np.array_equal(
F.sparse_to_numpy(g.adjacency_matrix(transpose=False)), scipy_coo_input().toarray().T)
F.sparse_to_numpy(g.adjacency_matrix(transpose=True)), scipy_coo_input().toarray().T)
assert np.array_equal(
F.sparse_to_numpy(g.adjacency_matrix(transpose=True)), scipy_coo_input().toarray())
F.sparse_to_numpy(g.adjacency_matrix(transpose=False)), scipy_coo_input().toarray())
def _test_csr(g):
# test twice to see whether the cached format works or not
......@@ -253,8 +253,8 @@ def test_scipy_adjmat():
adj_1 = g.adj(scipy_fmt='coo')
assert np.array_equal(adj_0.toarray(), adj_1.toarray())
adj_t0 = g.adj(transpose=True, scipy_fmt='csr')
adj_t_1 = g.adj(transpose=True, scipy_fmt='coo')
adj_t0 = g.adj(transpose=False, scipy_fmt='csr')
adj_t_1 = g.adj(transpose=False, scipy_fmt='coo')
assert np.array_equal(adj_0.toarray(), adj_1.toarray())
def test_incmat():
......
......@@ -51,8 +51,8 @@ def create_test_heterograph2(idtype):
g = dgl.heterograph({
('user', 'follows', 'user'): ([0, 1], [1, 2]),
('user', 'plays', 'game'): ([0, 1, 2, 1], [0, 0, 1, 1]),
('user', 'wishes', 'game'): ([0, 2], [1, 0]),
('developer', 'develops', 'game'): ([0, 1], [0, 1]),
('user', 'wishes', 'game'): ('csr', ([0, 1, 1, 2], [1, 0], [])),
('developer', 'develops', 'game'): ('csc', ([0, 1, 2], [0, 1], [0, 1])),
}, idtype=idtype, device=F.ctx())
assert g.idtype == idtype
assert g.device == F.ctx()
......@@ -488,53 +488,53 @@ def _test_edge_ids():
@parametrize_dtype
def test_adj(idtype):
g = create_test_heterograph(idtype)
adj = F.sparse_to_numpy(g.adj(transpose=False, etype='follows'))
adj = F.sparse_to_numpy(g.adj(transpose=True, etype='follows'))
assert np.allclose(
adj,
np.array([[0., 0., 0.],
[1., 0., 0.],
[0., 1., 0.]]))
adj = F.sparse_to_numpy(g.adj(transpose=True, etype='follows'))
adj = F.sparse_to_numpy(g.adj(transpose=False, etype='follows'))
assert np.allclose(
adj,
np.array([[0., 1., 0.],
[0., 0., 1.],
[0., 0., 0.]]))
adj = F.sparse_to_numpy(g.adj(transpose=False, etype='plays'))
adj = F.sparse_to_numpy(g.adj(transpose=True, etype='plays'))
assert np.allclose(
adj,
np.array([[1., 1., 0.],
[0., 1., 1.]]))
adj = F.sparse_to_numpy(g.adj(transpose=True, etype='plays'))
adj = F.sparse_to_numpy(g.adj(transpose=False, etype='plays'))
assert np.allclose(
adj,
np.array([[1., 0.],
[1., 1.],
[0., 1.]]))
adj = g.adj(transpose=False, scipy_fmt='csr', etype='follows')
adj = g.adj(transpose=True, scipy_fmt='csr', etype='follows')
assert np.allclose(
adj.todense(),
np.array([[0., 0., 0.],
[1., 0., 0.],
[0., 1., 0.]]))
adj = g.adj(transpose=False, scipy_fmt='coo', etype='follows')
adj = g.adj(transpose=True, scipy_fmt='coo', etype='follows')
assert np.allclose(
adj.todense(),
np.array([[0., 0., 0.],
[1., 0., 0.],
[0., 1., 0.]]))
adj = g.adj(transpose=False, scipy_fmt='csr', etype='plays')
adj = g.adj(transpose=True, scipy_fmt='csr', etype='plays')
assert np.allclose(
adj.todense(),
np.array([[1., 1., 0.],
[0., 1., 1.]]))
adj = g.adj(transpose=False, scipy_fmt='coo', etype='plays')
adj = g.adj(transpose=True, scipy_fmt='coo', etype='plays')
assert np.allclose(
adj.todense(),
np.array([[1., 1., 0.],
[0., 1., 1.]]))
adj = F.sparse_to_numpy(g['follows'].adj(transpose=False))
adj = F.sparse_to_numpy(g['follows'].adj(transpose=True))
assert np.allclose(
adj,
np.array([[0., 0., 0.],
......@@ -2648,6 +2648,63 @@ def test_create_block(idtype):
assert hg.edges['AB'].data['x'] is eabx
assert hg.edges['BA'].data['x'] is ebax
@parametrize_dtype
@pytest.mark.parametrize('fmt', ['coo', 'csr', 'csc'])
def test_adj_sparse(idtype, fmt):
if fmt == 'coo':
A = ssp.random(10, 10, 0.2).tocoo()
A.data = np.arange(20)
row = F.tensor(A.row, idtype)
col = F.tensor(A.col, idtype)
g = dgl.graph((row, col))
elif fmt == 'csr':
A = ssp.random(10, 10, 0.2).tocsr()
A.data = np.arange(20)
indptr = F.tensor(A.indptr, idtype)
indices = F.tensor(A.indices, idtype)
g = dgl.graph(('csr', (indptr, indices, [])))
with pytest.raises(DGLError):
g2 = dgl.graph(('csr', (indptr[:-1], indices, [])), num_nodes=10)
elif fmt == 'csc':
A = ssp.random(10, 10, 0.2).tocsc()
A.data = np.arange(20)
indptr = F.tensor(A.indptr, idtype)
indices = F.tensor(A.indices, idtype)
g = dgl.graph(('csc', (indptr, indices, [])))
with pytest.raises(DGLError):
g2 = dgl.graph(('csr', (indptr[:-1], indices, [])), num_nodes=10)
A_coo = A.tocoo()
A_csr = A.tocsr()
A_csc = A.tocsc()
row, col = g.adj_sparse('coo')
assert np.array_equal(F.asnumpy(row), A_coo.row)
assert np.array_equal(F.asnumpy(col), A_coo.col)
indptr, indices, eids = g.adj_sparse('csr')
assert np.array_equal(F.asnumpy(indptr), A_csr.indptr)
if fmt == 'csr':
assert len(eids) == 0
assert np.array_equal(F.asnumpy(indices), A_csr.indices)
else:
indices_sorted = F.zeros(len(indices), idtype)
indices_sorted = F.scatter_row(indices_sorted, eids, indices)
indices_sorted_np = np.zeros(len(indices), dtype=A_csr.indices.dtype)
indices_sorted_np[A_csr.data] = A_csr.indices
assert np.array_equal(F.asnumpy(indices_sorted), indices_sorted_np)
indptr, indices, eids = g.adj_sparse('csc')
assert np.array_equal(F.asnumpy(indptr), A_csc.indptr)
if fmt == 'csc':
assert len(eids) == 0
assert np.array_equal(F.asnumpy(indices), A_csc.indices)
else:
indices_sorted = F.zeros(len(indices), idtype)
indices_sorted = F.scatter_row(indices_sorted, eids, indices)
indices_sorted_np = np.zeros(len(indices), dtype=A_csc.indices.dtype)
indices_sorted_np[A_csc.data] = A_csc.indices
assert np.array_equal(F.asnumpy(indices_sorted), indices_sorted_np)
if __name__ == '__main__':
# test_create()
......
......@@ -62,8 +62,8 @@ def test_sort_with_tag(idtype):
assert(not check_sort(old_csr, tag)) # Check the original csr is not modified.
new_g = dgl.sort_in_edges(g, tag)
old_csc = g.adjacency_matrix(transpose=False, scipy_fmt='csr')
new_csc = new_g.adjacency_matrix(transpose=False, scipy_fmt='csr')
old_csc = g.adjacency_matrix(transpose=True, scipy_fmt='csr')
new_csc = new_g.adjacency_matrix(transpose=True, scipy_fmt='csr')
assert(check_sort(new_csc, tag, new_g.ndata["_TAG_OFFSET"]))
assert(not check_sort(old_csc, tag))
......@@ -83,8 +83,8 @@ def test_sort_with_tag_bipartite(idtype):
assert(not check_sort(old_csr, vtag))
new_g = dgl.sort_in_edges(g, utag)
old_csc = g.adjacency_matrix(transpose=False, scipy_fmt='csr')
new_csc = new_g.adjacency_matrix(transpose=False, scipy_fmt='csr')
old_csc = g.adjacency_matrix(transpose=True, scipy_fmt='csr')
new_csc = new_g.adjacency_matrix(transpose=True, scipy_fmt='csr')
assert(check_sort(new_csc, utag, new_g.nodes['_V'].data['_TAG_OFFSET']))
assert(not check_sort(old_csc, utag))
......
......@@ -67,7 +67,7 @@ def test_topological_nodes(idtype, n=100):
layers_dgl = dgl.topological_nodes_generator(g)
adjmat = g.adjacency_matrix(transpose=False)
adjmat = g.adjacency_matrix(transpose=True)
def tensor_topo_traverse():
n = g.number_of_nodes()
mask = F.copy_to(F.ones((n, 1)), F.cpu())
......
......@@ -134,7 +134,7 @@ def check_compute_func(worker_id, graph_name, return_dict):
in_feats = g.nodes[0].data['feat'].shape[1]
# Test update all.
g.update_all(fn.copy_src(src='feat', out='m'), fn.sum(msg='m', out='preprocess'))
adj = g.adjacency_matrix()
adj = g.adjacency_matrix(transpose=True)
tmp = F.spmm(adj, g.nodes[:].data['feat'])
assert_almost_equal(F.asnumpy(g.nodes[:].data['preprocess']), F.asnumpy(tmp))
g._sync_barrier(60)
......@@ -261,13 +261,13 @@ def check_mem(gidx, cond_v, shared_v):
cond_v.release()
gidx1 = dgl.graph_index.from_shared_mem_graph_index("test_graph5")
in_csr = gidx.adjacency_matrix_scipy(False, "csr")
out_csr = gidx.adjacency_matrix_scipy(True, "csr")
in_csr = gidx.adjacency_matrix_scipy(True, "csr")
out_csr = gidx.adjacency_matrix_scipy(False, "csr")
in_csr1 = gidx1.adjacency_matrix_scipy(False, "csr")
in_csr1 = gidx1.adjacency_matrix_scipy(True, "csr")
assert_array_equal(in_csr.indptr, in_csr1.indptr)
assert_array_equal(in_csr.indices, in_csr1.indices)
out_csr1 = gidx1.adjacency_matrix_scipy(True, "csr")
out_csr1 = gidx1.adjacency_matrix_scipy(False, "csr")
assert_array_equal(out_csr.indptr, out_csr1.indptr)
assert_array_equal(out_csr.indices, out_csr1.indices)
......
......@@ -25,7 +25,7 @@ def test_graph_conv(idtype, out_dim):
g = dgl.from_networkx(nx.path_graph(3))
g = g.astype(idtype).to(F.ctx())
ctx = F.ctx()
adj = g.adjacency_matrix(transpose=False, ctx=ctx)
adj = g.adjacency_matrix(transpose=True, ctx=ctx)
conv = nn.GraphConv(5, out_dim, norm='none', bias=True)
conv.initialize(ctx=ctx)
......@@ -136,7 +136,7 @@ def _S2AXWb(A, N, X, W, b):
def test_tagconv(out_dim):
g = dgl.from_networkx(nx.path_graph(3)).to(F.ctx())
ctx = F.ctx()
adj = g.adjacency_matrix(transpose=False, ctx=ctx)
adj = g.adjacency_matrix(transpose=True, ctx=ctx)
norm = mx.nd.power(g.in_degrees().astype('float32'), -0.5)
conv = nn.TAGConv(5, out_dim, bias=True)
......@@ -317,7 +317,7 @@ def test_dense_cheb_conv(out_dim):
for k in range(1, 4):
ctx = F.ctx()
g = dgl.from_scipy(sp.sparse.random(100, 100, density=0.3)).to(F.ctx())
adj = g.adjacency_matrix(transpose=False, ctx=ctx).tostype('default')
adj = g.adjacency_matrix(transpose=True, ctx=ctx).tostype('default')
cheb = nn.ChebConv(5, out_dim, k)
dense_cheb = nn.DenseChebConv(5, out_dim, k)
cheb.initialize(ctx=ctx)
......@@ -342,7 +342,7 @@ def test_dense_cheb_conv(out_dim):
def test_dense_graph_conv(idtype, g, norm_type, out_dim):
g = g.astype(idtype).to(F.ctx())
ctx = F.ctx()
adj = g.adjacency_matrix(transpose=False, ctx=ctx).tostype('default')
adj = g.adjacency_matrix(transpose=True, ctx=ctx).tostype('default')
conv = nn.GraphConv(5, out_dim, norm=norm_type, bias=True)
dense_conv = nn.DenseGraphConv(5, out_dim, norm=norm_type, bias=True)
conv.initialize(ctx=ctx)
......@@ -362,7 +362,7 @@ def test_dense_graph_conv(idtype, g, norm_type, out_dim):
def test_dense_sage_conv(idtype, g, out_dim):
g = g.astype(idtype).to(F.ctx())
ctx = F.ctx()
adj = g.adjacency_matrix(transpose=False, ctx=ctx).tostype('default')
adj = g.adjacency_matrix(transpose=True, ctx=ctx).tostype('default')
sage = nn.SAGEConv(5, out_dim, 'gcn')
dense_sage = nn.DenseSAGEConv(5, out_dim)
sage.initialize(ctx=ctx)
......
......@@ -24,7 +24,7 @@ def _AXWb(A, X, W, b):
def test_graph_conv0(out_dim):
g = dgl.DGLGraph(nx.path_graph(3)).to(F.ctx())
ctx = F.ctx()
adj = g.adjacency_matrix(transpose=False, ctx=ctx)
adj = g.adjacency_matrix(transpose=True, ctx=ctx)
conv = nn.GraphConv(5, out_dim, norm='none', bias=True)
conv = conv.to(ctx)
......@@ -186,7 +186,7 @@ def test_tagconv(out_dim):
g = dgl.DGLGraph(nx.path_graph(3))
g = g.to(F.ctx())
ctx = F.ctx()
adj = g.adjacency_matrix(transpose=False, ctx=ctx)
adj = g.adjacency_matrix(transpose=True, ctx=ctx)
norm = th.pow(g.in_degrees().float(), -0.5)
conv = nn.TAGConv(5, out_dim, bias=True)
......@@ -806,7 +806,7 @@ def test_dense_graph_conv(norm_type, g, idtype, out_dim):
g = g.astype(idtype).to(F.ctx())
ctx = F.ctx()
# TODO(minjie): enable the following option after #1385
adj = g.adjacency_matrix(transpose=False, ctx=ctx).to_dense()
adj = g.adjacency_matrix(transpose=True, ctx=ctx).to_dense()
conv = nn.GraphConv(5, out_dim, norm=norm_type, bias=True)
dense_conv = nn.DenseGraphConv(5, out_dim, norm=norm_type, bias=True)
dense_conv.weight.data = conv.weight.data
......@@ -824,7 +824,7 @@ def test_dense_graph_conv(norm_type, g, idtype, out_dim):
def test_dense_sage_conv(g, idtype, out_dim):
g = g.astype(idtype).to(F.ctx())
ctx = F.ctx()
adj = g.adjacency_matrix(transpose=False, ctx=ctx).to_dense()
adj = g.adjacency_matrix(transpose=True, ctx=ctx).to_dense()
sage = nn.SAGEConv(5, out_dim, 'gcn')
dense_sage = nn.DenseSAGEConv(5, out_dim)
dense_sage.fc.weight.data = sage.fc_neigh.weight.data
......@@ -911,7 +911,7 @@ def test_dense_cheb_conv(out_dim):
ctx = F.ctx()
g = dgl.DGLGraph(sp.sparse.random(100, 100, density=0.1), readonly=True)
g = g.to(F.ctx())
adj = g.adjacency_matrix(transpose=False, ctx=ctx).to_dense()
adj = g.adjacency_matrix(transpose=True, ctx=ctx).to_dense()
cheb = nn.ChebConv(5, out_dim, k, None)
dense_cheb = nn.DenseChebConv(5, out_dim, k)
#for i in range(len(cheb.fc)):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment