Unverified Commit acd21a6d authored by Quan (Andy) Gan's avatar Quan (Andy) Gan Committed by GitHub
Browse files

[Feature] Support direct creation from CSR and CSC (#3045)



* csr and csc creation

* fix

* fix

* fixes to adj transpose

* fine

* raise error if indptr did not match number of nodes

* fix

* huh?

* oh
Co-authored-by: default avatarMinjie Wang <wmjlyjemaine@gmail.com>
parent 2f7ca414
...@@ -110,7 +110,7 @@ class DiffPoolBatchedGraphLayer(nn.Module): ...@@ -110,7 +110,7 @@ class DiffPoolBatchedGraphLayer(nn.Module):
assign_tensor = torch.block_diag(*assign_tensor) # size = (sum_N, batch_size * N_a) assign_tensor = torch.block_diag(*assign_tensor) # size = (sum_N, batch_size * N_a)
h = torch.matmul(torch.t(assign_tensor), feat) h = torch.matmul(torch.t(assign_tensor), feat)
adj = g.adjacency_matrix(transpose=False, ctx=device) adj = g.adjacency_matrix(transpose=True, ctx=device)
adj_new = torch.sparse.mm(adj, assign_tensor) adj_new = torch.sparse.mm(adj, assign_tensor)
adj_new = torch.mm(torch.t(assign_tensor), adj_new) adj_new = torch.mm(torch.t(assign_tensor), adj_new)
......
...@@ -393,7 +393,7 @@ class CSRMM(mx.autograd.Function): ...@@ -393,7 +393,7 @@ class CSRMM(mx.autograd.Function):
def forward(self, A_weights, B_weights): def forward(self, A_weights, B_weights):
gidxC, C_weights = _csrmm(self.gidxA, A_weights, self.gidxB, B_weights, self.num_vtypes) gidxC, C_weights = _csrmm(self.gidxA, A_weights, self.gidxB, B_weights, self.num_vtypes)
nrows, ncols, C_indptr, C_indices, C_eids = gidxC.adjacency_matrix_tensors(0, True, 'csr') nrows, ncols, C_indptr, C_indices, C_eids = gidxC.adjacency_matrix_tensors(0, False, 'csr')
# Note: the returned C_indptr, C_indices and C_eids tensors MUST be the same # Note: the returned C_indptr, C_indices and C_eids tensors MUST be the same
# as the underlying tensors of the created graph gidxC. # as the underlying tensors of the created graph gidxC.
self.backward_cache = gidxC self.backward_cache = gidxC
...@@ -430,7 +430,7 @@ class CSRSum(mx.autograd.Function): ...@@ -430,7 +430,7 @@ class CSRSum(mx.autograd.Function):
def forward(self, *weights): def forward(self, *weights):
gidxC, C_weights = _csrsum(self.gidxs, weights) gidxC, C_weights = _csrsum(self.gidxs, weights)
nrows, ncols, C_indptr, C_indices, C_eids = gidxC.adjacency_matrix_tensors( nrows, ncols, C_indptr, C_indices, C_eids = gidxC.adjacency_matrix_tensors(
0, True, 'csr') 0, False, 'csr')
# Note: the returned C_indptr, C_indices and C_eids tensors MUST be the same # Note: the returned C_indptr, C_indices and C_eids tensors MUST be the same
# as the underlying tensors of the created graph gidxC. # as the underlying tensors of the created graph gidxC.
self.backward_cache = gidxC self.backward_cache = gidxC
......
...@@ -310,7 +310,7 @@ class CSRMM(th.autograd.Function): ...@@ -310,7 +310,7 @@ class CSRMM(th.autograd.Function):
@staticmethod @staticmethod
def forward(ctx, gidxA, A_weights, gidxB, B_weights, num_vtypes): def forward(ctx, gidxA, A_weights, gidxB, B_weights, num_vtypes):
gidxC, C_weights = _csrmm(gidxA, A_weights, gidxB, B_weights, num_vtypes) gidxC, C_weights = _csrmm(gidxA, A_weights, gidxB, B_weights, num_vtypes)
nrows, ncols, C_indptr, C_indices, C_eids = gidxC.adjacency_matrix_tensors(0, True, 'csr') nrows, ncols, C_indptr, C_indices, C_eids = gidxC.adjacency_matrix_tensors(0, False, 'csr')
# Note: the returned C_indptr, C_indices and C_eids tensors MUST be the same # Note: the returned C_indptr, C_indices and C_eids tensors MUST be the same
# as the underlying tensors of the created graph gidxC. # as the underlying tensors of the created graph gidxC.
ctx.backward_cache = gidxA, gidxB, gidxC ctx.backward_cache = gidxA, gidxB, gidxC
...@@ -337,7 +337,7 @@ class CSRSum(th.autograd.Function): ...@@ -337,7 +337,7 @@ class CSRSum(th.autograd.Function):
# PyTorch tensors must be explicit arguments of the forward function # PyTorch tensors must be explicit arguments of the forward function
gidxC, C_weights = _csrsum(gidxs, weights) gidxC, C_weights = _csrsum(gidxs, weights)
nrows, ncols, C_indptr, C_indices, C_eids = gidxC.adjacency_matrix_tensors( nrows, ncols, C_indptr, C_indices, C_eids = gidxC.adjacency_matrix_tensors(
0, True, 'csr') 0, False, 'csr')
# Note: the returned C_indptr, C_indices and C_eids tensors MUST be the same # Note: the returned C_indptr, C_indices and C_eids tensors MUST be the same
# as the underlying tensors of the created graph gidxC. # as the underlying tensors of the created graph gidxC.
ctx.backward_cache = gidxs, gidxC ctx.backward_cache = gidxs, gidxC
......
...@@ -302,7 +302,7 @@ def scatter_add(x, idx, m): ...@@ -302,7 +302,7 @@ def scatter_add(x, idx, m):
def csrmm_real(gidxA, A_weights, gidxB, B_weights, num_vtypes): def csrmm_real(gidxA, A_weights, gidxB, B_weights, num_vtypes):
gidxC, C_weights = _csrmm(gidxA, A_weights, gidxB, B_weights, num_vtypes) gidxC, C_weights = _csrmm(gidxA, A_weights, gidxB, B_weights, num_vtypes)
nrows, ncols, C_indptr, C_indices, C_eids = gidxC.adjacency_matrix_tensors(0, True, 'csr') nrows, ncols, C_indptr, C_indices, C_eids = gidxC.adjacency_matrix_tensors(0, False, 'csr')
def grad(dnrows, dncols, dC_indptr, dC_indices, dC_eids, dC_weights): def grad(dnrows, dncols, dC_indptr, dC_indices, dC_eids, dC_weights):
# Only the last argument is meaningful. # Only the last argument is meaningful.
...@@ -328,7 +328,7 @@ def csrmm(gidxA, A_weights, gidxB, B_weights, num_vtypes): ...@@ -328,7 +328,7 @@ def csrmm(gidxA, A_weights, gidxB, B_weights, num_vtypes):
def csrsum_real(gidxs, weights): def csrsum_real(gidxs, weights):
gidxC, C_weights = _csrsum(gidxs, weights) gidxC, C_weights = _csrsum(gidxs, weights)
nrows, ncols, C_indptr, C_indices, C_eids = gidxC.adjacency_matrix_tensors(0, True, 'csr') nrows, ncols, C_indptr, C_indices, C_eids = gidxC.adjacency_matrix_tensors(0, False, 'csr')
def grad(dnrows, dncols, dC_indptr, dC_indices, dC_eids, dC_weights): def grad(dnrows, dncols, dC_indptr, dC_indices, dC_eids, dC_weights):
# Only the last argument is meaningful. # Only the last argument is meaningful.
......
...@@ -53,8 +53,18 @@ def graph(data, ...@@ -53,8 +53,18 @@ def graph(data,
DGL calls this format "tuple of node-tensors". The tensors should have the same DGL calls this format "tuple of node-tensors". The tensors should have the same
data type of int32/int64 and device context (see below the descriptions of data type of int32/int64 and device context (see below the descriptions of
:attr:`idtype` and :attr:`device`). :attr:`idtype` and :attr:`device`).
- ``(iterable[int], iterable[int])``: Similar to the tuple of node-tensors - ``('coo', (Tensor, Tensor))``: Same as ``(Tensor, Tensor)``.
format, but stores node IDs in two sequences (e.g. list, tuple, numpy.ndarray). - ``('csr', (Tensor, Tensor, Tensor))``: The three tensors form the CSR representation
of the graph's adjacency matrix. The first one is the row index pointer. The
second one is the column indices. The third one is the edge IDs, which can be empty
to represent consecutive integer IDs starting from 0.
- ``('csc', (Tensor, Tensor, Tensor))``: The three tensors form the CSC representation
of the graph's adjacency matrix. The first one is the column index pointer. The
second one is the row indices. The third one is the edge IDs, which can be empty
to represent consecutive integer IDs starting from 0.
The tensors can be replaced with any iterable of integers (e.g. list, tuple,
numpy.ndarray).
ntype : str, optional ntype : str, optional
Deprecated. To construct a graph with named node types, use :func:`dgl.heterograph`. Deprecated. To construct a graph with named node types, use :func:`dgl.heterograph`.
etype : str, optional etype : str, optional
...@@ -131,6 +141,14 @@ def graph(data, ...@@ -131,6 +141,14 @@ def graph(data,
>>> g = dgl.graph((src_ids, dst_ids), idtype=torch.int32, device='cuda:0') >>> g = dgl.graph((src_ids, dst_ids), idtype=torch.int32, device='cuda:0')
Creating a graph with CSR representation:
>>> g = dgl.graph(('csr', ([0, 0, 0, 1, 2, 3], [1, 2, 3], [])))
Create the same graph with CSR representation and edge IDs.
>>> g = dgl.graph(('csr', ([0, 0, 0, 1, 2, 3], [1, 2, 3], [0, 1, 2])))
See Also See Also
-------- --------
from_scipy from_scipy
...@@ -158,16 +176,15 @@ def graph(data, ...@@ -158,16 +176,15 @@ def graph(data,
" Please refer to their API documents for more details.".format( " Please refer to their API documents for more details.".format(
deprecated_kwargs.keys())) deprecated_kwargs.keys()))
u, v, urange, vrange = utils.graphdata2tensors(data, idtype) (sparse_fmt, arrays), urange, vrange = utils.graphdata2tensors(data, idtype)
if num_nodes is not None: # override the number of nodes if num_nodes is not None: # override the number of nodes
if num_nodes < max(urange, vrange): if num_nodes < max(urange, vrange):
raise DGLError('The num_nodes argument must be larger than the max ID in the data,' raise DGLError('The num_nodes argument must be larger than the max ID in the data,'
' but got {} and {}.'.format(num_nodes, max(urange, vrange) - 1)) ' but got {} and {}.'.format(num_nodes, max(urange, vrange) - 1))
urange, vrange = num_nodes, num_nodes urange, vrange = num_nodes, num_nodes
g = create_from_edges(u, v, '_N', '_E', '_N', urange, vrange, g = create_from_edges(sparse_fmt, arrays, '_N', '_E', '_N', urange, vrange,
row_sorted=row_sorted, col_sorted=col_sorted, row_sorted=row_sorted, col_sorted=col_sorted)
validate=False)
return g.to(device) return g.to(device)
...@@ -226,8 +243,18 @@ def heterograph(data_dict, ...@@ -226,8 +243,18 @@ def heterograph(data_dict,
this format "tuple of node-tensors". The tensors should have the same data type, this format "tuple of node-tensors". The tensors should have the same data type,
which must be either int32 or int64. They should also have the same device context which must be either int32 or int64. They should also have the same device context
(see below the descriptions of :attr:`idtype` and :attr:`device`). (see below the descriptions of :attr:`idtype` and :attr:`device`).
- ``(iterable[int], iterable[int])``: Similar to the tuple of node-tensors - ``('coo', (Tensor, Tensor))``: Same as ``(Tensor, Tensor)``.
format, but stores node IDs in two sequences (e.g. list, tuple, numpy.ndarray). - ``('csr', (Tensor, Tensor, Tensor))``: The three tensors form the CSR representation
of the graph's adjacency matrix. The first one is the row index pointer. The
second one is the column indices. The third one is the edge IDs, which can be empty
(i.e. with 0 elements) to represent consecutive integer IDs starting from 0.
- ``('csc', (Tensor, Tensor, Tensor))``: The three tensors form the CSC representation
of the graph's adjacency matrix. The first one is the column index pointer. The
second one is the row indices. The third one is the edge IDs, which can be empty
to represent consecutive integer IDs starting from 0.
The tensors can be replaced with any iterable of integers (e.g. list, tuple,
numpy.ndarray).
num_nodes_dict : dict[str, int], optional num_nodes_dict : dict[str, int], optional
The number of nodes for some node types, which is a dictionary mapping a node type The number of nodes for some node types, which is a dictionary mapping a node type
:math:`T` to the number of :math:`T`-typed nodes. If not given for a node type :math:`T` to the number of :math:`T`-typed nodes. If not given for a node type
...@@ -320,8 +347,9 @@ def heterograph(data_dict, ...@@ -320,8 +347,9 @@ def heterograph(data_dict,
raise DGLError("dgl.heterograph no longer supports graph construction from a NetworkX " raise DGLError("dgl.heterograph no longer supports graph construction from a NetworkX "
"graph, use dgl.from_networkx instead.") "graph, use dgl.from_networkx instead.")
is_bipartite = (sty != dty) is_bipartite = (sty != dty)
u, v, urange, vrange = utils.graphdata2tensors(data, idtype, bipartite=is_bipartite) (sparse_fmt, arrays), urange, vrange = utils.graphdata2tensors(
node_tensor_dict[(sty, ety, dty)] = (u, v) data, idtype, bipartite=is_bipartite)
node_tensor_dict[(sty, ety, dty)] = (sparse_fmt, arrays)
if need_infer: if need_infer:
num_nodes_dict[sty] = max(num_nodes_dict[sty], urange) num_nodes_dict[sty] = max(num_nodes_dict[sty], urange)
num_nodes_dict[dty] = max(num_nodes_dict[dty], vrange) num_nodes_dict[dty] = max(num_nodes_dict[dty], vrange)
...@@ -340,8 +368,8 @@ def heterograph(data_dict, ...@@ -340,8 +368,8 @@ def heterograph(data_dict,
num_nodes_per_type = utils.toindex([num_nodes_dict[ntype] for ntype in ntypes], "int64") num_nodes_per_type = utils.toindex([num_nodes_dict[ntype] for ntype in ntypes], "int64")
rel_graphs = [] rel_graphs = []
for srctype, etype, dsttype in relations: for srctype, etype, dsttype in relations:
src, dst = node_tensor_dict[(srctype, etype, dsttype)] sparse_fmt, arrays = node_tensor_dict[(srctype, etype, dsttype)]
g = create_from_edges(src, dst, srctype, etype, dsttype, g = create_from_edges(sparse_fmt, arrays, srctype, etype, dsttype,
num_nodes_dict[srctype], num_nodes_dict[dsttype]) num_nodes_dict[srctype], num_nodes_dict[dsttype])
rel_graphs.append(g) rel_graphs.append(g)
...@@ -368,8 +396,18 @@ def create_block(data_dict, num_src_nodes=None, num_dst_nodes=None, idtype=None, ...@@ -368,8 +396,18 @@ def create_block(data_dict, num_src_nodes=None, num_dst_nodes=None, idtype=None,
this format "tuple of node-tensors". The tensors should have the same data type, this format "tuple of node-tensors". The tensors should have the same data type,
which must be either int32 or int64. They should also have the same device context which must be either int32 or int64. They should also have the same device context
(see below the descriptions of :attr:`idtype` and :attr:`device`). (see below the descriptions of :attr:`idtype` and :attr:`device`).
- ``(iterable[int], iterable[int])``: Similar to the tuple of node-tensors - ``('coo', (Tensor, Tensor))``: Same as ``(Tensor, Tensor)``.
format, but stores node IDs in two sequences (e.g. list, tuple, numpy.ndarray). - ``('csr', (Tensor, Tensor, Tensor))``: The three tensors form the CSR representation
of the graph's adjacency matrix. The first one is the row index pointer. The
second one is the column indices. The third one is the edge IDs, which can be empty
to represent consecutive integer IDs starting from 0.
- ``('csc', (Tensor, Tensor, Tensor))``: The three tensors form the CSC representation
of the graph's adjacency matrix. The first one is the column index pointer. The
second one is the row indices. The third one is the edge IDs, which can be empty
to represent consecutive integer IDs starting from 0.
The tensors can be replaced with any iterable of integers (e.g. list, tuple,
numpy.ndarray).
If you would like to create a MFG with a single source node type, a single destination If you would like to create a MFG with a single source node type, a single destination
node type, and a single edge type, then you can pass in the graph data directly node type, and a single edge type, then you can pass in the graph data directly
...@@ -489,8 +527,9 @@ def create_block(data_dict, num_src_nodes=None, num_dst_nodes=None, idtype=None, ...@@ -489,8 +527,9 @@ def create_block(data_dict, num_src_nodes=None, num_dst_nodes=None, idtype=None,
# Convert all data to node tensors first # Convert all data to node tensors first
node_tensor_dict = {} node_tensor_dict = {}
for (sty, ety, dty), data in data_dict.items(): for (sty, ety, dty), data in data_dict.items():
u, v, urange, vrange = utils.graphdata2tensors(data, idtype, bipartite=True) (sparse_fmt, arrays), urange, vrange = utils.graphdata2tensors(
node_tensor_dict[(sty, ety, dty)] = (u, v) data, idtype, bipartite=True)
node_tensor_dict[(sty, ety, dty)] = (sparse_fmt, arrays)
if need_infer: if need_infer:
num_src_nodes[sty] = max(num_src_nodes[sty], urange) num_src_nodes[sty] = max(num_src_nodes[sty], urange)
num_dst_nodes[dty] = max(num_dst_nodes[dty], vrange) num_dst_nodes[dty] = max(num_dst_nodes[dty], vrange)
...@@ -525,8 +564,8 @@ def create_block(data_dict, num_src_nodes=None, num_dst_nodes=None, idtype=None, ...@@ -525,8 +564,8 @@ def create_block(data_dict, num_src_nodes=None, num_dst_nodes=None, idtype=None,
meta_edges_src.append(srctype_dict[srctype]) meta_edges_src.append(srctype_dict[srctype])
meta_edges_dst.append(dsttype_dict[dsttype]) meta_edges_dst.append(dsttype_dict[dsttype])
etypes.append(etype) etypes.append(etype)
src, dst = node_tensor_dict[(srctype, etype, dsttype)] sparse_fmt, arrays = node_tensor_dict[(srctype, etype, dsttype)]
g = create_from_edges(src, dst, 'SRC/' + srctype, etype, 'DST/' + dsttype, g = create_from_edges(sparse_fmt, arrays, 'SRC/' + srctype, etype, 'DST/' + dsttype,
num_src_nodes[srctype], num_dst_nodes[dsttype]) num_src_nodes[srctype], num_dst_nodes[dsttype])
rel_graphs.append(g) rel_graphs.append(g)
...@@ -1041,8 +1080,8 @@ def from_scipy(sp_mat, ...@@ -1041,8 +1080,8 @@ def from_scipy(sp_mat,
raise DGLError('Expect the number of rows to be the same as the number of columns for ' raise DGLError('Expect the number of rows to be the same as the number of columns for '
'sp_mat, got {:d} and {:d}.'.format(num_rows, num_cols)) 'sp_mat, got {:d} and {:d}.'.format(num_rows, num_cols))
u, v, urange, vrange = utils.graphdata2tensors(sp_mat, idtype) (sparse_fmt, arrays), urange, vrange = utils.graphdata2tensors(sp_mat, idtype)
g = create_from_edges(u, v, '_N', '_E', '_N', urange, vrange) g = create_from_edges(sparse_fmt, arrays, '_N', '_E', '_N', urange, vrange)
if eweight_name is not None: if eweight_name is not None:
g.edata[eweight_name] = F.tensor(sp_mat.data) g.edata[eweight_name] = F.tensor(sp_mat.data)
return g.to(device) return g.to(device)
...@@ -1135,9 +1174,8 @@ def bipartite_from_scipy(sp_mat, ...@@ -1135,9 +1174,8 @@ def bipartite_from_scipy(sp_mat,
heterograph heterograph
bipartite_from_networkx bipartite_from_networkx
""" """
# Sanity check (sparse_fmt, arrays), urange, vrange = utils.graphdata2tensors(sp_mat, idtype, bipartite=True)
u, v, urange, vrange = utils.graphdata2tensors(sp_mat, idtype, bipartite=True) g = create_from_edges(sparse_fmt, arrays, utype, etype, vtype, urange, vrange)
g = create_from_edges(u, v, utype, etype, vtype, urange, vrange)
if eweight_name is not None: if eweight_name is not None:
g.edata[eweight_name] = F.tensor(sp_mat.data) g.edata[eweight_name] = F.tensor(sp_mat.data)
return g.to(device) return g.to(device)
...@@ -1255,10 +1293,10 @@ def from_networkx(nx_graph, ...@@ -1255,10 +1293,10 @@ def from_networkx(nx_graph,
if not nx_graph.is_directed(): if not nx_graph.is_directed():
nx_graph = nx_graph.to_directed() nx_graph = nx_graph.to_directed()
u, v, urange, vrange = utils.graphdata2tensors( (sparse_fmt, arrays), urange, vrange = utils.graphdata2tensors(
nx_graph, idtype, edge_id_attr_name=edge_id_attr_name) nx_graph, idtype, edge_id_attr_name=edge_id_attr_name)
g = create_from_edges(u, v, '_N', '_E', '_N', urange, vrange) g = create_from_edges(sparse_fmt, arrays, '_N', '_E', '_N', urange, vrange)
# nx_graph.edges(data=True) returns src, dst, attr_dict # nx_graph.edges(data=True) returns src, dst, attr_dict
has_edge_id = nx_graph.number_of_edges() > 0 and edge_id_attr_name is not None has_edge_id = nx_graph.number_of_edges() > 0 and edge_id_attr_name is not None
...@@ -1450,12 +1488,12 @@ def bipartite_from_networkx(nx_graph, ...@@ -1450,12 +1488,12 @@ def bipartite_from_networkx(nx_graph,
bottom_map = {n : i for i, n in enumerate(bottom_nodes)} bottom_map = {n : i for i, n in enumerate(bottom_nodes)}
# Get the node tensors and the number of nodes # Get the node tensors and the number of nodes
u, v, urange, vrange = utils.graphdata2tensors( (sparse_fmt, arrays), urange, vrange = utils.graphdata2tensors(
nx_graph, idtype, bipartite=True, nx_graph, idtype, bipartite=True,
edge_id_attr_name=edge_id_attr_name, edge_id_attr_name=edge_id_attr_name,
top_map=top_map, bottom_map=bottom_map) top_map=top_map, bottom_map=bottom_map)
g = create_from_edges(u, v, utype, etype, vtype, urange, vrange) g = create_from_edges(sparse_fmt, arrays, utype, etype, vtype, urange, vrange)
# nx_graph.edges(data=True) returns src, dst, attr_dict # nx_graph.edges(data=True) returns src, dst, attr_dict
has_edge_id = nx_graph.number_of_edges() > 0 and edge_id_attr_name is not None has_edge_id = nx_graph.number_of_edges() > 0 and edge_id_attr_name is not None
...@@ -1586,10 +1624,9 @@ DGLHeteroGraph.to_networkx = to_networkx ...@@ -1586,10 +1624,9 @@ DGLHeteroGraph.to_networkx = to_networkx
# Internal APIs # Internal APIs
############################################################ ############################################################
def create_from_edges(u, v, def create_from_edges(sparse_fmt, arrays,
utype, etype, vtype, utype, etype, vtype,
urange, vrange, urange, vrange,
validate=True,
row_sorted=False, row_sorted=False,
col_sorted=False): col_sorted=False):
"""Internal function to create a graph from incident nodes with types. """Internal function to create a graph from incident nodes with types.
...@@ -1598,10 +1635,10 @@ def create_from_edges(u, v, ...@@ -1598,10 +1635,10 @@ def create_from_edges(u, v,
Parameters Parameters
---------- ----------
u : Tensor sparse_fmt : str
Source node IDs. The sparse adjacency matrix format.
v : Tensor arrays : tuple[Tensor]
Dest node IDs. The sparse adjacency matrix arrays.
utype : str utype : str
Source node type name. Source node type name.
etype : str etype : str
...@@ -1614,8 +1651,6 @@ def create_from_edges(u, v, ...@@ -1614,8 +1651,6 @@ def create_from_edges(u, v,
vrange : int, optional vrange : int, optional
The destination node ID range. If None, the value is the The destination node ID range. If None, the value is the
maximum of the destination node IDs in the edge list plus 1. (Default: None) maximum of the destination node IDs in the edge list plus 1. (Default: None)
validate : bool, optional
If True, checks if node IDs are within range.
row_sorted : bool, optional row_sorted : bool, optional
Whether or not the rows of the COO are in ascending order. Whether or not the rows of the COO are in ascending order.
col_sorted : bool, optional col_sorted : bool, optional
...@@ -1627,24 +1662,21 @@ def create_from_edges(u, v, ...@@ -1627,24 +1662,21 @@ def create_from_edges(u, v,
------- -------
DGLHeteroGraph DGLHeteroGraph
""" """
if validate:
if urange is not None and len(u) > 0 and \
urange <= F.as_scalar(F.max(u, dim=0)):
raise DGLError('Invalid node id {} (should be less than cardinality {}).'.format(
urange, F.as_scalar(F.max(u, dim=0))))
if vrange is not None and len(v) > 0 and \
vrange <= F.as_scalar(F.max(v, dim=0)):
raise DGLError('Invalid node id {} (should be less than cardinality {}).'.format(
vrange, F.as_scalar(F.max(v, dim=0))))
if utype == vtype: if utype == vtype:
num_ntypes = 1 num_ntypes = 1
else: else:
num_ntypes = 2 num_ntypes = 2
if sparse_fmt == 'coo':
u, v = arrays
hgidx = heterograph_index.create_unitgraph_from_coo( hgidx = heterograph_index.create_unitgraph_from_coo(
num_ntypes, urange, vrange, u, v, ['coo', 'csr', 'csc'], num_ntypes, urange, vrange, u, v, ['coo', 'csr', 'csc'],
row_sorted, col_sorted) row_sorted, col_sorted)
else: # 'csr' or 'csc'
indptr, indices, eids = arrays
hgidx = heterograph_index.create_unitgraph_from_csr(
num_ntypes, urange, vrange, indptr, indices, eids, ['coo', 'csr', 'csc'],
sparse_fmt == 'csc')
if utype == vtype: if utype == vtype:
return DGLHeteroGraph(hgidx, [utype], [etype]) return DGLHeteroGraph(hgidx, [utype], [etype])
else: else:
......
...@@ -6,7 +6,6 @@ import numpy as np ...@@ -6,7 +6,6 @@ import numpy as np
from .utils import save_graphs, load_graphs, _get_dgl_url from .utils import save_graphs, load_graphs, _get_dgl_url
from ..convert import heterograph from ..convert import heterograph
from ..utils import graphdata2tensors
from .dgl_dataset import DGLBuiltinDataset from .dgl_dataset import DGLBuiltinDataset
from .. import backend as F from .. import backend as F
...@@ -106,8 +105,9 @@ class FraudDataset(DGLBuiltinDataset): ...@@ -106,8 +105,9 @@ class FraudDataset(DGLBuiltinDataset):
graph_data = {} graph_data = {}
for relation in self.relations[self.name]: for relation in self.relations[self.name]:
u, v, _, _ = graphdata2tensors(data[relation]) adj = data[relation].tocoo()
graph_data[(self.node_name[self.name], relation, self.node_name[self.name])] = (u, v) row, col = adj.row, adj.col
graph_data[(self.node_name[self.name], relation, self.node_name[self.name])] = (row, col)
g = heterograph(graph_data) g = heterograph(graph_data)
g.ndata['feature'] = F.tensor(node_features) g.ndata['feature'] = F.tensor(node_features)
......
...@@ -69,9 +69,14 @@ class DGLHeteroGraph(object): ...@@ -69,9 +69,14 @@ class DGLHeteroGraph(object):
if not isinstance(gidx, heterograph_index.HeteroGraphIndex): if not isinstance(gidx, heterograph_index.HeteroGraphIndex):
dgl_warning('Recommend creating graphs by `dgl.graph(data)`' dgl_warning('Recommend creating graphs by `dgl.graph(data)`'
' instead of `dgl.DGLGraph(data)`.') ' instead of `dgl.DGLGraph(data)`.')
u, v, num_src, num_dst = utils.graphdata2tensors(gidx) (sparse_fmt, arrays), num_src, num_dst = utils.graphdata2tensors(gidx)
if sparse_fmt == 'coo':
gidx = heterograph_index.create_unitgraph_from_coo( gidx = heterograph_index.create_unitgraph_from_coo(
1, num_src, num_dst, u, v, ['coo', 'csr', 'csc']) 1, num_src, num_dst, arrays[0], arrays[1], ['coo', 'csr', 'csc'])
else:
gidx = heterograph_index.create_unitgraph_from_csr(
1, num_src, num_dst, arrays[0], arrays[1], arrays[2], ['coo', 'csr', 'csc'],
sparse_fmt == 'csc')
if len(deprecate_kwargs) != 0: if len(deprecate_kwargs) != 0:
dgl_warning('Keyword arguments {} are deprecated in v0.5, and can be safely' dgl_warning('Keyword arguments {} are deprecated in v0.5, and can be safely'
' removed in all cases.'.format(list(deprecate_kwargs.keys()))) ' removed in all cases.'.format(list(deprecate_kwargs.keys())))
...@@ -3506,23 +3511,23 @@ class DGLHeteroGraph(object): ...@@ -3506,23 +3511,23 @@ class DGLHeteroGraph(object):
else: else:
return deg return deg
def adjacency_matrix(self, transpose=True, ctx=F.cpu(), scipy_fmt=None, etype=None): def adjacency_matrix(self, transpose=False, ctx=F.cpu(), scipy_fmt=None, etype=None):
"""Alias of :meth:`adj`""" """Alias of :meth:`adj`"""
return self.adj(transpose, ctx, scipy_fmt, etype) return self.adj(transpose, ctx, scipy_fmt, etype)
def adj(self, transpose=True, ctx=F.cpu(), scipy_fmt=None, etype=None): def adj(self, transpose=False, ctx=F.cpu(), scipy_fmt=None, etype=None):
"""Return the adjacency matrix of edges of the given edge type. """Return the adjacency matrix of edges of the given edge type.
By default, a row of returned adjacency matrix represents the By default, a row of returned adjacency matrix represents the
destination of an edge and the column represents the source. source of an edge and the column represents the destination.
When transpose is True, a row represents the source and a column When transpose is True, a row represents the destination and a column
represents a destination. represents the source.
Parameters Parameters
---------- ----------
transpose : bool, optional transpose : bool, optional
A flag to transpose the returned adjacency matrix. (Default: True) A flag to transpose the returned adjacency matrix. (Default: False)
ctx : context, optional ctx : context, optional
The context of returned adjacency matrix. (Default: cpu) The context of returned adjacency matrix. (Default: cpu)
scipy_fmt : str, optional scipy_fmt : str, optional
...@@ -3578,8 +3583,52 @@ class DGLHeteroGraph(object): ...@@ -3578,8 +3583,52 @@ class DGLHeteroGraph(object):
else: else:
return self._graph.adjacency_matrix_scipy(etid, transpose, scipy_fmt, False) return self._graph.adjacency_matrix_scipy(etid, transpose, scipy_fmt, False)
def adj_sparse(self, fmt, etype=None):
"""Return the adjacency matrix of edges of the given edge type as tensors of
a sparse matrix representation.
By default, a row of returned adjacency matrix represents the
source of an edge and the column represents the destination.
Parameters
----------
fmt : str
Either ``coo``, ``csr`` or ``csc``.
etype : str or (str, str, str), optional
The type names of the edges. The allowed type name formats are:
* ``(str, str, str)`` for source node type, edge type and destination node type.
* or one ``str`` edge type name if the name can uniquely identify a
triplet format in the graph.
Can be omitted if the graph has only one type of edges.
Returns
-------
tuple[Tensor]
If :attr:`fmt` is ``coo``, returns a pair of source and destination node ID
tensors.
If :attr:`fmt` is ``csr`` or ``csc``, return the CSR or CSC representation
of the adjacency matrix as a triplet of tensors
``(indptr, indices, edge_ids)``. Namely ``edge_ids`` could be an empty
tensor with 0 elements, in which case the edge IDs are consecutive
integers starting from 0.
Examples
--------
>>> g = dgl.graph(([0, 1, 2], [1, 2, 3]))
>>> g.adj_sparse('coo')
>>> g.adj_sparse('csr')
"""
etid = self.get_etype_id(etype)
if fmt == 'csc':
# The first two elements are number of rows and columns
return self._graph.adjacency_matrix_tensors(etid, True, 'csr')[2:]
else:
return self._graph.adjacency_matrix_tensors(etid, False, fmt)[2:]
def adjacency_matrix_scipy(self, transpose=True, fmt='csr', return_edge_ids=None): def adjacency_matrix_scipy(self, transpose=False, fmt='csr', return_edge_ids=None):
"""DEPRECATED: please use ``dgl.adjacency_matrix(transpose, scipy_fmt=fmt)``. """DEPRECATED: please use ``dgl.adjacency_matrix(transpose, scipy_fmt=fmt)``.
""" """
dgl_warning('DGLGraph.adjacency_matrix_scipy is deprecated. ' dgl_warning('DGLGraph.adjacency_matrix_scipy is deprecated. '
......
...@@ -600,11 +600,11 @@ class HeteroGraphIndex(ObjectBase): ...@@ -600,11 +600,11 @@ class HeteroGraphIndex(ObjectBase):
def adjacency_matrix(self, etype, transpose, ctx): def adjacency_matrix(self, etype, transpose, ctx):
"""Return the adjacency matrix representation of this graph. """Return the adjacency matrix representation of this graph.
By default, a row of returned adjacency matrix represents the destination By default, a row of returned adjacency matrix represents the source
of an edge and the column represents the source. of an edge and the column represents the destination.
When transpose is True, a row represents the source and a column represents When transpose is True, a row represents the destination and a column represents
a destination. the source.
Parameters Parameters
---------- ----------
...@@ -630,8 +630,8 @@ class HeteroGraphIndex(ObjectBase): ...@@ -630,8 +630,8 @@ class HeteroGraphIndex(ObjectBase):
rst = _CAPI_DGLHeteroGetAdj(self, int(etype), transpose, fmt) rst = _CAPI_DGLHeteroGetAdj(self, int(etype), transpose, fmt)
# convert to framework-specific sparse matrix # convert to framework-specific sparse matrix
srctype, dsttype = self.metagraph.find_edge(etype) srctype, dsttype = self.metagraph.find_edge(etype)
nrows = self.number_of_nodes(srctype) if transpose else self.number_of_nodes(dsttype) nrows = self.number_of_nodes(dsttype) if transpose else self.number_of_nodes(srctype)
ncols = self.number_of_nodes(dsttype) if transpose else self.number_of_nodes(srctype) ncols = self.number_of_nodes(srctype) if transpose else self.number_of_nodes(dsttype)
nnz = self.number_of_edges(etype) nnz = self.number_of_edges(etype)
if fmt == "csr": if fmt == "csr":
indptr = F.copy_to(F.from_dgl_nd(rst(0)), ctx) indptr = F.copy_to(F.from_dgl_nd(rst(0)), ctx)
...@@ -653,11 +653,11 @@ class HeteroGraphIndex(ObjectBase): ...@@ -653,11 +653,11 @@ class HeteroGraphIndex(ObjectBase):
def adjacency_matrix_tensors(self, etype, transpose, fmt): def adjacency_matrix_tensors(self, etype, transpose, fmt):
"""Return the adjacency matrix as a triplet of tensors. """Return the adjacency matrix as a triplet of tensors.
By default, a row of returned adjacency matrix represents the destination By default, a row of returned adjacency matrix represents the source
of an edge and the column represents the source. of an edge and the column represents the destination.
When transpose is True, a row represents the source and a column represents When transpose is True, a row represents the destination and a column represents
a destination. the source.
Parameters Parameters
---------- ----------
...@@ -689,8 +689,8 @@ class HeteroGraphIndex(ObjectBase): ...@@ -689,8 +689,8 @@ class HeteroGraphIndex(ObjectBase):
rst = _CAPI_DGLHeteroGetAdj(self, int(etype), transpose, fmt) rst = _CAPI_DGLHeteroGetAdj(self, int(etype), transpose, fmt)
srctype, dsttype = self.metagraph.find_edge(etype) srctype, dsttype = self.metagraph.find_edge(etype)
nrows = self.number_of_nodes(srctype) if transpose else self.number_of_nodes(dsttype) nrows = self.number_of_nodes(dsttype) if transpose else self.number_of_nodes(srctype)
ncols = self.number_of_nodes(dsttype) if transpose else self.number_of_nodes(srctype) ncols = self.number_of_nodes(srctype) if transpose else self.number_of_nodes(dsttype)
nnz = self.number_of_edges(etype) nnz = self.number_of_edges(etype)
if fmt == "csr": if fmt == "csr":
indptr = F.from_dgl_nd(rst(0)) indptr = F.from_dgl_nd(rst(0))
...@@ -919,9 +919,9 @@ class HeteroGraphIndex(ObjectBase): ...@@ -919,9 +919,9 @@ class HeteroGraphIndex(ObjectBase):
The first element of the tuple is the shuffle order for outward graph The first element of the tuple is the shuffle order for outward graph
The second element of the tuple is the shuffle order for inward graph The second element of the tuple is the shuffle order for inward graph
""" """
csr = _CAPI_DGLHeteroGetAdj(self, int(etype), True, "csr") csr = _CAPI_DGLHeteroGetAdj(self, int(etype), False, "csr")
order = csr(2) order = csr(2)
rev_csr = _CAPI_DGLHeteroGetAdj(self, int(etype), False, "csr") rev_csr = _CAPI_DGLHeteroGetAdj(self, int(etype), True, "csr")
rev_order = rev_csr(2) rev_order = rev_csr(2)
return utils.toindex(order, self.dtype), utils.toindex(rev_order, self.dtype) return utils.toindex(order, self.dtype), utils.toindex(rev_order, self.dtype)
...@@ -1106,7 +1106,7 @@ def create_unitgraph_from_coo(num_ntypes, num_src, num_dst, row, col, ...@@ -1106,7 +1106,7 @@ def create_unitgraph_from_coo(num_ntypes, num_src, num_dst, row, col,
formats, row_sorted, col_sorted) formats, row_sorted, col_sorted)
def create_unitgraph_from_csr(num_ntypes, num_src, num_dst, indptr, indices, edge_ids, def create_unitgraph_from_csr(num_ntypes, num_src, num_dst, indptr, indices, edge_ids,
formats): formats, transpose=False):
"""Create a unitgraph graph index from CSR format """Create a unitgraph graph index from CSR format
Parameters Parameters
...@@ -1125,6 +1125,8 @@ def create_unitgraph_from_csr(num_ntypes, num_src, num_dst, indptr, indices, edg ...@@ -1125,6 +1125,8 @@ def create_unitgraph_from_csr(num_ntypes, num_src, num_dst, indptr, indices, edg
Edge shuffle id. Edge shuffle id.
formats : str formats : str
Restrict the storage formats allowed for the unit graph. Restrict the storage formats allowed for the unit graph.
transpose : bool, optional
If True, treats the input matrix as CSC.
Returns Returns
------- -------
...@@ -1135,7 +1137,7 @@ def create_unitgraph_from_csr(num_ntypes, num_src, num_dst, indptr, indices, edg ...@@ -1135,7 +1137,7 @@ def create_unitgraph_from_csr(num_ntypes, num_src, num_dst, indptr, indices, edg
return _CAPI_DGLHeteroCreateUnitGraphFromCSR( return _CAPI_DGLHeteroCreateUnitGraphFromCSR(
int(num_ntypes), int(num_src), int(num_dst), int(num_ntypes), int(num_src), int(num_dst),
F.to_dgl_nd(indptr), F.to_dgl_nd(indices), F.to_dgl_nd(edge_ids), F.to_dgl_nd(indptr), F.to_dgl_nd(indices), F.to_dgl_nd(edge_ids),
formats) formats, transpose)
def create_heterograph_from_relations(metagraph, rel_graphs, num_nodes_per_type): def create_heterograph_from_relations(metagraph, rel_graphs, num_nodes_per_type):
"""Create a heterograph from metagraph and graphs of every relation. """Create a heterograph from metagraph and graphs of every relation.
......
...@@ -954,7 +954,7 @@ def khop_adj(g, k): ...@@ -954,7 +954,7 @@ def khop_adj(g, k):
""" """
assert g.is_homogeneous, \ assert g.is_homogeneous, \
'only homogeneous graph is supported' 'only homogeneous graph is supported'
adj_k = g.adj(scipy_fmt=g.formats()['created'][0]) ** k adj_k = g.adj(transpose=True, scipy_fmt=g.formats()['created'][0]) ** k
return F.tensor(adj_k.todense().astype(np.float32)) return F.tensor(adj_k.todense().astype(np.float32))
def khop_graph(g, k, copy_ndata=True): def khop_graph(g, k, copy_ndata=True):
...@@ -1024,7 +1024,7 @@ def khop_graph(g, k, copy_ndata=True): ...@@ -1024,7 +1024,7 @@ def khop_graph(g, k, copy_ndata=True):
assert g.is_homogeneous, \ assert g.is_homogeneous, \
'only homogeneous graph is supported' 'only homogeneous graph is supported'
n = g.number_of_nodes() n = g.number_of_nodes()
adj_k = g.adj(transpose=True, scipy_fmt=g.formats()['created'][0]) ** k adj_k = g.adj(transpose=False, scipy_fmt=g.formats()['created'][0]) ** k
adj_k = adj_k.tocoo() adj_k = adj_k.tocoo()
multiplicity = adj_k.data multiplicity = adj_k.data
row = np.repeat(adj_k.row, multiplicity) row = np.repeat(adj_k.row, multiplicity)
...@@ -1280,7 +1280,7 @@ def laplacian_lambda_max(g): ...@@ -1280,7 +1280,7 @@ def laplacian_lambda_max(g):
rst = [] rst = []
for g_i in g_arr: for g_i in g_arr:
n = g_i.number_of_nodes() n = g_i.number_of_nodes()
adj = g_i.adj(scipy_fmt=g_i.formats()['created'][0]).astype(float) adj = g_i.adj(transpose=True, scipy_fmt=g_i.formats()['created'][0]).astype(float)
norm = sparse.diags(F.asnumpy(g_i.in_degrees()).clip(1) ** -0.5, dtype=float) norm = sparse.diags(F.asnumpy(g_i.in_degrees()).clip(1) ** -0.5, dtype=float)
laplacian = sparse.eye(n) - norm * adj * norm laplacian = sparse.eye(n) - norm * adj * norm
rst.append(sparse.linalg.eigs(laplacian, 1, which='LM', rst.append(sparse.linalg.eigs(laplacian, 1, which='LM',
...@@ -1336,7 +1336,7 @@ def metapath_reachable_graph(g, metapath): ...@@ -1336,7 +1336,7 @@ def metapath_reachable_graph(g, metapath):
""" """
adj = 1 adj = 1
for etype in metapath: for etype in metapath:
adj = adj * g.adj(etype=etype, scipy_fmt='csr', transpose=True) adj = adj * g.adj(etype=etype, scipy_fmt='csr', transpose=False)
adj = (adj != 0).tocsr() adj = (adj != 0).tocsr()
srctype = g.to_canonical_etype(metapath[0])[0] srctype = g.to_canonical_etype(metapath[0])[0]
...@@ -2845,12 +2845,12 @@ def sort_in_edges(g, tag, tag_offset_name='_TAG_OFFSET'): ...@@ -2845,12 +2845,12 @@ def sort_in_edges(g, tag, tag_offset_name='_TAG_OFFSET'):
----------- -----------
>>> g = dgl.graph(([0,1,2,3,4,0,1,2],[0,0,0,0,0,1,1,1])) >>> g = dgl.graph(([0,1,2,3,4,0,1,2],[0,0,0,0,0,1,1,1]))
>>> g.adjacency_matrix(scipy_fmt='csr', transpose=False).nonzero() >>> g.adjacency_matrix(scipy_fmt='csr', transpose=True).nonzero()
(array([0, 0, 0, 0, 0, 1, 1, 1], dtype=int32), (array([0, 0, 0, 0, 0, 1, 1, 1], dtype=int32),
array([0, 1, 2, 3, 4, 0, 1, 2], dtype=int32))) array([0, 1, 2, 3, 4, 0, 1, 2], dtype=int32)))
>>> tag = torch.IntTensor([1,1,0,2,0]) >>> tag = torch.IntTensor([1,1,0,2,0])
>>> g_sorted = dgl.transform.sort_in_edges(g, tag) >>> g_sorted = dgl.transform.sort_in_edges(g, tag)
>>> g_sorted.adjacency_matrix(scipy_fmt='csr', transpose=False).nonzero() >>> g_sorted.adjacency_matrix(scipy_fmt='csr', transpose=True).nonzero()
(array([0, 0, 0, 0, 0, 1, 1, 1], dtype=int32), (array([0, 0, 0, 0, 0, 1, 1, 1], dtype=int32),
array([2, 4, 0, 1, 3, 2, 0, 1], dtype=int32)) array([2, 4, 0, 1, 3, 2, 0, 1], dtype=int32))
>>> g_sorted.ndata['_TAG_OFFSET'] >>> g_sorted.ndata['_TAG_OFFSET']
......
"""Data utilities.""" """Data utilities."""
from collections import namedtuple
import scipy as sp import scipy as sp
import networkx as nx import networkx as nx
...@@ -31,7 +32,9 @@ def elist2tensor(elist, idtype): ...@@ -31,7 +32,9 @@ def elist2tensor(elist, idtype):
return F.tensor(u, idtype), F.tensor(v, idtype) return F.tensor(u, idtype), F.tensor(v, idtype)
def scipy2tensor(spmat, idtype): def scipy2tensor(spmat, idtype):
"""Function to convert a scipy matrix to edge tensors. """Function to convert a scipy matrix to a sparse adjacency matrix tuple.
Note that the data array of the scipy matrix is discarded.
Parameters Parameters
---------- ----------
...@@ -42,13 +45,20 @@ def scipy2tensor(spmat, idtype): ...@@ -42,13 +45,20 @@ def scipy2tensor(spmat, idtype):
Returns Returns
------- -------
(Tensor, Tensor) (str, tuple[Tensor])
Edge tensors. A tuple containing the format as well as the list of tensors representing
the sparse matrix.
""" """
if spmat.format in ['csr', 'csc']:
indptr = F.tensor(spmat.indptr, idtype)
indices = F.tensor(spmat.indices, idtype)
data = F.tensor([], idtype)
return SparseAdjTuple(spmat.format, (indptr, indices, data))
else:
spmat = spmat.tocoo() spmat = spmat.tocoo()
row = F.tensor(spmat.row, idtype) row = F.tensor(spmat.row, idtype)
col = F.tensor(spmat.col, idtype) col = F.tensor(spmat.col, idtype)
return row, col return SparseAdjTuple('coo', (row, col))
def networkx2tensor(nx_graph, idtype, edge_id_attr_name=None): def networkx2tensor(nx_graph, idtype, edge_id_attr_name=None):
"""Function to convert a networkx graph to edge tensors. """Function to convert a networkx graph to edge tensors.
...@@ -96,6 +106,8 @@ def networkx2tensor(nx_graph, idtype, edge_id_attr_name=None): ...@@ -96,6 +106,8 @@ def networkx2tensor(nx_graph, idtype, edge_id_attr_name=None):
dst = F.tensor(dst, idtype) dst = F.tensor(dst, idtype)
return src, dst return src, dst
SparseAdjTuple = namedtuple('SparseAdjTuple', ['format', 'arrays'])
def graphdata2tensors(data, idtype=None, bipartite=False, **kwargs): def graphdata2tensors(data, idtype=None, bipartite=False, **kwargs):
"""Function to convert various types of data to edge tensors and infer """Function to convert various types of data to edge tensors and infer
the number of nodes. the number of nodes.
...@@ -103,7 +115,14 @@ def graphdata2tensors(data, idtype=None, bipartite=False, **kwargs): ...@@ -103,7 +115,14 @@ def graphdata2tensors(data, idtype=None, bipartite=False, **kwargs):
Parameters Parameters
---------- ----------
data : graph data data : graph data
Various kinds of graph data. Various kinds of graph data. Possible data types are:
- ``(row, col)``
- ``('coo', (row, col))``
- ``('csr', (indptr, indices, edge_ids))``
- ``('csc', (indptr, indices, edge_ids))``
- SciPy sparse matrix
- NetworkX graph
idtype : int32, int64, optional idtype : int32, int64, optional
Integer ID type. If None, try infer from the data and if fail use Integer ID type. If None, try infer from the data and if fail use
int64. int64.
...@@ -121,55 +140,52 @@ def graphdata2tensors(data, idtype=None, bipartite=False, **kwargs): ...@@ -121,55 +140,52 @@ def graphdata2tensors(data, idtype=None, bipartite=False, **kwargs):
Returns Returns
------- -------
src : Tensor data : SparseAdjTuple
Src nodes. A tuple with the sparse matrix format and the adjacency matrix tensors.
dst : Tensor
Dst nodes.
num_src : int num_src : int
Number of source nodes Number of source nodes.
num_dst : int num_dst : int
Number of destination nodes. Number of destination nodes.
""" """
if idtype is None and not (isinstance(data, tuple) and F.is_tensor(data[0])): # Convert tuple to SparseAdjTuple
if isinstance(data, tuple):
if not isinstance(data[0], str):
# (row, col) format, convert to ('coo', (row, col))
data = ('coo', data)
data = SparseAdjTuple(*data)
if idtype is None and \
not (isinstance(data, SparseAdjTuple) and F.is_tensor(data.arrays[0])):
# preferred default idtype is int64 # preferred default idtype is int64
# if data is tensor and idtype is None, infer the idtype from tensor # if data is tensor and idtype is None, infer the idtype from tensor
idtype = F.int64 idtype = F.int64
checks.check_valid_idtype(idtype) checks.check_valid_idtype(idtype)
if isinstance(data, tuple) and (not F.is_tensor(data[0]) or not F.is_tensor(data[1])): if isinstance(data, SparseAdjTuple) and (not all(F.is_tensor(a) for a in data.arrays)):
# (Iterable, Iterable) type data, convert it to (Tensor, Tensor) # (Iterable, Iterable) type data, convert it to (Tensor, Tensor)
if len(data[0]) == 0: if len(data.arrays[0]) == 0:
# force idtype for empty list # force idtype for empty list
data = F.tensor(data[0], idtype), F.tensor(data[1], idtype) data = SparseAdjTuple(data.format, tuple(F.tensor(a, idtype) for a in data.arrays))
else: else:
# convert the iterable to tensor and keep its native data type so we can check # convert the iterable to tensor and keep its native data type so we can check
# its validity later # its validity later
data = F.tensor(data[0]), F.tensor(data[1]) data = SparseAdjTuple(data.format, tuple(F.tensor(a) for a in data.arrays))
if isinstance(data, tuple): if isinstance(data, SparseAdjTuple):
# (Tensor, Tensor) type data
src, dst = data
# sanity checks
# TODO(minjie): move these checks to C for faster graph construction.
if F.dtype(src) != F.dtype(dst):
raise DGLError('Expect the source and destination node IDs to have the same type,'
' but got {} and {}.'.format(F.dtype(src), F.dtype(dst)))
if F.context(src) != F.context(dst):
raise DGLError('Expect the source and destination node IDs to be on the same device,'
' but got {} and {}.'.format(F.context(src), F.context(dst)))
if F.dtype(src) not in (F.int32, F.int64):
raise DGLError('Expect the source ID tensor to have data type int32 or int64,'
' but got {}.'.format(F.dtype(src)))
if F.dtype(dst) not in (F.int32, F.int64):
raise DGLError('Expect the destination ID tensor to have data type int32 or int64,'
' but got {}.'.format(F.dtype(dst)))
if idtype is not None: if idtype is not None:
src, dst = F.astype(src, idtype), F.astype(dst, idtype) data = SparseAdjTuple(data.format, tuple(F.astype(a, idtype) for a in data.arrays))
num_src, num_dst = infer_num_nodes(data, bipartite=bipartite)
elif isinstance(data, list): elif isinstance(data, list):
src, dst = elist2tensor(data, idtype) src, dst = elist2tensor(data, idtype)
data = SparseAdjTuple('coo', (src, dst))
num_src, num_dst = infer_num_nodes(data, bipartite=bipartite)
elif isinstance(data, sp.sparse.spmatrix): elif isinstance(data, sp.sparse.spmatrix):
src, dst = scipy2tensor(data, idtype) # We can get scipy matrix's number of rows and columns easily.
num_src, num_dst = infer_num_nodes(data, bipartite=bipartite)
data = scipy2tensor(data, idtype)
elif isinstance(data, nx.Graph): elif isinstance(data, nx.Graph):
# We can get networkx graph's number of sources and destinations easily.
num_src, num_dst = infer_num_nodes(data, bipartite=bipartite)
edge_id_attr_name = kwargs.get('edge_id_attr_name', None) edge_id_attr_name = kwargs.get('edge_id_attr_name', None)
if bipartite: if bipartite:
top_map = kwargs.get('top_map') top_map = kwargs.get('top_map')
...@@ -180,22 +196,11 @@ def graphdata2tensors(data, idtype=None, bipartite=False, **kwargs): ...@@ -180,22 +196,11 @@ def graphdata2tensors(data, idtype=None, bipartite=False, **kwargs):
else: else:
src, dst = networkx2tensor( src, dst = networkx2tensor(
data, idtype, edge_id_attr_name=edge_id_attr_name) data, idtype, edge_id_attr_name=edge_id_attr_name)
data = SparseAdjTuple('coo', (src, dst))
else: else:
raise DGLError('Unsupported graph data type:', type(data)) raise DGLError('Unsupported graph data type:', type(data))
if len(src) != len(dst): return data, num_src, num_dst
raise DGLError('Expect the source and destination ID tensors to have the same length,'
' but got {} and {}.'.format(len(src), len(dst)))
if len(src) > 0 and (F.as_scalar(F.min(src, 0)) < 0 or F.as_scalar(F.min(dst, 0)) < 0):
raise DGLError('All IDs must be non-negative integers.')
# infer number of nodes
infer_from_raw = infer_num_nodes(data, bipartite=bipartite)
if infer_from_raw is None:
num_src, num_dst = infer_num_nodes((src, dst), bipartite=bipartite)
else:
num_src, num_dst = infer_from_raw
return src, dst, num_src, num_dst
def networkxbipartite2tensors(nx_graph, idtype, top_map, bottom_map, edge_id_attr_name=None): def networkxbipartite2tensors(nx_graph, idtype, top_map, bottom_map, edge_id_attr_name=None):
"""Function to convert a networkx bipartite to edge tensors. """Function to convert a networkx bipartite to edge tensors.
...@@ -264,9 +269,11 @@ def infer_num_nodes(data, bipartite=False): ...@@ -264,9 +269,11 @@ def infer_num_nodes(data, bipartite=False):
---------- ----------
data : graph data data : graph data
Supported types are: Supported types are:
* Tensor pair (u, v)
* SciPy matrix * SparseTuple ``(sparse_fmt, arrays)`` where ``arrays`` can be either ``(src, dst)`` or
* NetworkX graph ``(indptr, indices, data)``.
* SciPy matrix.
* NetworkX graph.
bipartite : bool, optional bipartite : bool, optional
Whether infer number of nodes of a bipartite graph -- Whether infer number of nodes of a bipartite graph --
num_src and num_dst can be different. num_src and num_dst can be different.
...@@ -283,10 +290,27 @@ def infer_num_nodes(data, bipartite=False): ...@@ -283,10 +290,27 @@ def infer_num_nodes(data, bipartite=False):
None None
If the inference failed. If the inference failed.
""" """
if isinstance(data, tuple) and len(data) == 2 and F.is_tensor(data[0]): if isinstance(data, tuple) and len(data) == 2:
u, v = data if not isinstance(data[0], str):
raise TypeError('Expected sparse format as a str, but got %s' % type(data[0]))
if data[0] == 'coo':
# ('coo', (src, dst)) format
u, v = data[1]
nsrc = F.as_scalar(F.max(u, dim=0)) + 1 if len(u) > 0 else 0 nsrc = F.as_scalar(F.max(u, dim=0)) + 1 if len(u) > 0 else 0
ndst = F.as_scalar(F.max(v, dim=0)) + 1 if len(v) > 0 else 0 ndst = F.as_scalar(F.max(v, dim=0)) + 1 if len(v) > 0 else 0
elif data[0] == 'csr':
# ('csr', (indptr, indices, eids)) format
indptr, indices, _ = data[1]
nsrc = F.shape(indptr)[0] - 1
ndst = F.as_scalar(F.max(indices, dim=0)) + 1 if len(indices) > 0 else 0
elif data[0] == 'csc':
# ('csc', (indptr, indices, eids)) format
indptr, indices, _ = data[1]
ndst = F.shape(indptr)[0] - 1
nsrc = F.as_scalar(F.max(indices, dim=0)) + 1 if len(indices) > 0 else 0
else:
raise ValueError('unknown format %s' % data[0])
elif isinstance(data, sp.sparse.spmatrix): elif isinstance(data, sp.sparse.spmatrix):
nsrc, ndst = data.shape[0], data.shape[1] nsrc, ndst = data.shape[0], data.shape[1]
elif isinstance(data, nx.Graph): elif isinstance(data, nx.Graph):
......
...@@ -52,14 +52,20 @@ DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroCreateUnitGraphFromCSR") ...@@ -52,14 +52,20 @@ DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroCreateUnitGraphFromCSR")
IdArray indices = args[4]; IdArray indices = args[4];
IdArray edge_ids = args[5]; IdArray edge_ids = args[5];
List<Value> formats = args[6]; List<Value> formats = args[6];
bool transpose = args[7];
std::vector<SparseFormat> formats_vec; std::vector<SparseFormat> formats_vec;
for (Value val : formats) { for (Value val : formats) {
std::string fmt = val->data; std::string fmt = val->data;
formats_vec.push_back(ParseSparseFormat(fmt)); formats_vec.push_back(ParseSparseFormat(fmt));
} }
const auto code = SparseFormatsToCode(formats_vec); const auto code = SparseFormatsToCode(formats_vec);
if (!transpose) {
auto hgptr = CreateFromCSR(nvtypes, num_src, num_dst, indptr, indices, edge_ids, code); auto hgptr = CreateFromCSR(nvtypes, num_src, num_dst, indptr, indices, edge_ids, code);
*rv = HeteroGraphRef(hgptr); *rv = HeteroGraphRef(hgptr);
} else {
auto hgptr = CreateFromCSC(nvtypes, num_src, num_dst, indptr, indices, edge_ids, code);
*rv = HeteroGraphRef(hgptr);
}
}); });
DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroCreateHeteroGraph") DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroCreateHeteroGraph")
......
...@@ -456,6 +456,8 @@ class UnitGraph::CSR : public BaseHeteroGraph { ...@@ -456,6 +456,8 @@ class UnitGraph::CSR : public BaseHeteroGraph {
if (aten::IsValidIdArray(edge_ids)) if (aten::IsValidIdArray(edge_ids))
CHECK((indices->shape[0] == edge_ids->shape[0]) || aten::IsNullArray(edge_ids)) CHECK((indices->shape[0] == edge_ids->shape[0]) || aten::IsNullArray(edge_ids))
<< "edge id arrays should have the same length as indices if not empty"; << "edge id arrays should have the same length as indices if not empty";
CHECK_EQ(num_src, indptr->shape[0] - 1)
<< "number of nodes do not match the length of indptr minus 1.";
adj_ = aten::CSRMatrix{num_src, num_dst, indptr, indices, edge_ids}; adj_ = aten::CSRMatrix{num_src, num_dst, indptr, indices, edge_ids};
} }
...@@ -1070,10 +1072,10 @@ std::vector<IdArray> UnitGraph::GetAdj( ...@@ -1070,10 +1072,10 @@ std::vector<IdArray> UnitGraph::GetAdj(
// to_scipy_sparse_matrix. With the upcoming custom kernel change, we should change the // to_scipy_sparse_matrix. With the upcoming custom kernel change, we should change the
// behavior and make row for src and col for dst. // behavior and make row for src and col for dst.
if (fmt == std::string("csr")) { if (fmt == std::string("csr")) {
return transpose? GetOutCSR()->GetAdj(etype, false, "csr") return !transpose ? GetOutCSR()->GetAdj(etype, false, "csr")
: GetInCSR()->GetAdj(etype, false, "csr"); : GetInCSR()->GetAdj(etype, false, "csr");
} else if (fmt == std::string("coo")) { } else if (fmt == std::string("coo")) {
return GetCOO()->GetAdj(etype, !transpose, fmt); return GetCOO()->GetAdj(etype, transpose, fmt);
} else { } else {
LOG(FATAL) << "unsupported adjacency matrix format: " << fmt; LOG(FATAL) << "unsupported adjacency matrix format: " << fmt;
return {}; return {};
......
...@@ -33,7 +33,7 @@ def test_csrmm(idtype, dtype): ...@@ -33,7 +33,7 @@ def test_csrmm(idtype, dtype):
a, A = _random_simple_graph(idtype, dtype, F.ctx(), 500, 600, 9000, 'A', 'B', 'AB') a, A = _random_simple_graph(idtype, dtype, F.ctx(), 500, 600, 9000, 'A', 'B', 'AB')
b, B = _random_simple_graph(idtype, dtype, F.ctx(), 600, 700, 9000, 'B', 'C', 'BC') b, B = _random_simple_graph(idtype, dtype, F.ctx(), 600, 700, 9000, 'B', 'C', 'BC')
C, C_weights = dgl.sparse._csrmm(A._graph, A.edata['w'], B._graph, B.edata['w'], 2) C, C_weights = dgl.sparse._csrmm(A._graph, A.edata['w'], B._graph, B.edata['w'], 2)
C_adj = C.adjacency_matrix_scipy(0, True, 'csr') C_adj = C.adjacency_matrix_scipy(0, False, 'csr')
C_adj.data = F.asnumpy(C_weights) C_adj.data = F.asnumpy(C_weights)
C_adj = F.tensor(C_adj.todense(), dtype=dtype) C_adj = F.tensor(C_adj.todense(), dtype=dtype)
c = F.tensor((a * b).todense(), dtype=dtype) c = F.tensor((a * b).todense(), dtype=dtype)
...@@ -83,7 +83,7 @@ def test_csrsum(idtype, dtype): ...@@ -83,7 +83,7 @@ def test_csrsum(idtype, dtype):
a, A = _random_simple_graph(idtype, dtype, F.ctx(), 500, 600, 9000, 'A', 'B', 'AB') a, A = _random_simple_graph(idtype, dtype, F.ctx(), 500, 600, 9000, 'A', 'B', 'AB')
b, B = _random_simple_graph(idtype, dtype, F.ctx(), 500, 600, 9000, 'A', 'B', 'AB') b, B = _random_simple_graph(idtype, dtype, F.ctx(), 500, 600, 9000, 'A', 'B', 'AB')
C, C_weights = dgl.sparse._csrsum([A._graph, B._graph], [A.edata['w'], B.edata['w']]) C, C_weights = dgl.sparse._csrsum([A._graph, B._graph], [A.edata['w'], B.edata['w']])
C_adj = C.adjacency_matrix_scipy(0, True, 'csr') C_adj = C.adjacency_matrix_scipy(0, False, 'csr')
C_adj.data = F.asnumpy(C_weights) C_adj.data = F.asnumpy(C_weights)
C_adj = F.tensor(C_adj.todense(), dtype=dtype) C_adj = F.tensor(C_adj.todense(), dtype=dtype)
c = F.tensor((a + b).todense(), dtype=dtype) c = F.tensor((a + b).todense(), dtype=dtype)
......
...@@ -118,9 +118,9 @@ def test_query(): ...@@ -118,9 +118,9 @@ def test_query():
assert F.allclose(g.out_degrees([8, 9]), F.tensor([0, 1])) assert F.allclose(g.out_degrees([8, 9]), F.tensor([0, 1]))
assert np.array_equal( assert np.array_equal(
F.sparse_to_numpy(g.adjacency_matrix(transpose=False)), scipy_coo_input().toarray().T) F.sparse_to_numpy(g.adjacency_matrix(transpose=True)), scipy_coo_input().toarray().T)
assert np.array_equal( assert np.array_equal(
F.sparse_to_numpy(g.adjacency_matrix(transpose=True)), scipy_coo_input().toarray()) F.sparse_to_numpy(g.adjacency_matrix(transpose=False)), scipy_coo_input().toarray())
def _test(g): def _test(g):
# test twice to see whether the cached format works or not # test twice to see whether the cached format works or not
...@@ -192,9 +192,9 @@ def test_query(): ...@@ -192,9 +192,9 @@ def test_query():
assert F.allclose(g.out_degrees([8, 9]), F.tensor([0, 1])) assert F.allclose(g.out_degrees([8, 9]), F.tensor([0, 1]))
assert np.array_equal( assert np.array_equal(
F.sparse_to_numpy(g.adjacency_matrix(transpose=False)), scipy_coo_input().toarray().T) F.sparse_to_numpy(g.adjacency_matrix(transpose=True)), scipy_coo_input().toarray().T)
assert np.array_equal( assert np.array_equal(
F.sparse_to_numpy(g.adjacency_matrix(transpose=True)), scipy_coo_input().toarray()) F.sparse_to_numpy(g.adjacency_matrix(transpose=False)), scipy_coo_input().toarray())
def _test_csr(g): def _test_csr(g):
# test twice to see whether the cached format works or not # test twice to see whether the cached format works or not
...@@ -253,8 +253,8 @@ def test_scipy_adjmat(): ...@@ -253,8 +253,8 @@ def test_scipy_adjmat():
adj_1 = g.adj(scipy_fmt='coo') adj_1 = g.adj(scipy_fmt='coo')
assert np.array_equal(adj_0.toarray(), adj_1.toarray()) assert np.array_equal(adj_0.toarray(), adj_1.toarray())
adj_t0 = g.adj(transpose=True, scipy_fmt='csr') adj_t0 = g.adj(transpose=False, scipy_fmt='csr')
adj_t_1 = g.adj(transpose=True, scipy_fmt='coo') adj_t_1 = g.adj(transpose=False, scipy_fmt='coo')
assert np.array_equal(adj_0.toarray(), adj_1.toarray()) assert np.array_equal(adj_0.toarray(), adj_1.toarray())
def test_incmat(): def test_incmat():
......
...@@ -51,8 +51,8 @@ def create_test_heterograph2(idtype): ...@@ -51,8 +51,8 @@ def create_test_heterograph2(idtype):
g = dgl.heterograph({ g = dgl.heterograph({
('user', 'follows', 'user'): ([0, 1], [1, 2]), ('user', 'follows', 'user'): ([0, 1], [1, 2]),
('user', 'plays', 'game'): ([0, 1, 2, 1], [0, 0, 1, 1]), ('user', 'plays', 'game'): ([0, 1, 2, 1], [0, 0, 1, 1]),
('user', 'wishes', 'game'): ([0, 2], [1, 0]), ('user', 'wishes', 'game'): ('csr', ([0, 1, 1, 2], [1, 0], [])),
('developer', 'develops', 'game'): ([0, 1], [0, 1]), ('developer', 'develops', 'game'): ('csc', ([0, 1, 2], [0, 1], [0, 1])),
}, idtype=idtype, device=F.ctx()) }, idtype=idtype, device=F.ctx())
assert g.idtype == idtype assert g.idtype == idtype
assert g.device == F.ctx() assert g.device == F.ctx()
...@@ -488,53 +488,53 @@ def _test_edge_ids(): ...@@ -488,53 +488,53 @@ def _test_edge_ids():
@parametrize_dtype @parametrize_dtype
def test_adj(idtype): def test_adj(idtype):
g = create_test_heterograph(idtype) g = create_test_heterograph(idtype)
adj = F.sparse_to_numpy(g.adj(transpose=False, etype='follows')) adj = F.sparse_to_numpy(g.adj(transpose=True, etype='follows'))
assert np.allclose( assert np.allclose(
adj, adj,
np.array([[0., 0., 0.], np.array([[0., 0., 0.],
[1., 0., 0.], [1., 0., 0.],
[0., 1., 0.]])) [0., 1., 0.]]))
adj = F.sparse_to_numpy(g.adj(transpose=True, etype='follows')) adj = F.sparse_to_numpy(g.adj(transpose=False, etype='follows'))
assert np.allclose( assert np.allclose(
adj, adj,
np.array([[0., 1., 0.], np.array([[0., 1., 0.],
[0., 0., 1.], [0., 0., 1.],
[0., 0., 0.]])) [0., 0., 0.]]))
adj = F.sparse_to_numpy(g.adj(transpose=False, etype='plays')) adj = F.sparse_to_numpy(g.adj(transpose=True, etype='plays'))
assert np.allclose( assert np.allclose(
adj, adj,
np.array([[1., 1., 0.], np.array([[1., 1., 0.],
[0., 1., 1.]])) [0., 1., 1.]]))
adj = F.sparse_to_numpy(g.adj(transpose=True, etype='plays')) adj = F.sparse_to_numpy(g.adj(transpose=False, etype='plays'))
assert np.allclose( assert np.allclose(
adj, adj,
np.array([[1., 0.], np.array([[1., 0.],
[1., 1.], [1., 1.],
[0., 1.]])) [0., 1.]]))
adj = g.adj(transpose=False, scipy_fmt='csr', etype='follows') adj = g.adj(transpose=True, scipy_fmt='csr', etype='follows')
assert np.allclose( assert np.allclose(
adj.todense(), adj.todense(),
np.array([[0., 0., 0.], np.array([[0., 0., 0.],
[1., 0., 0.], [1., 0., 0.],
[0., 1., 0.]])) [0., 1., 0.]]))
adj = g.adj(transpose=False, scipy_fmt='coo', etype='follows') adj = g.adj(transpose=True, scipy_fmt='coo', etype='follows')
assert np.allclose( assert np.allclose(
adj.todense(), adj.todense(),
np.array([[0., 0., 0.], np.array([[0., 0., 0.],
[1., 0., 0.], [1., 0., 0.],
[0., 1., 0.]])) [0., 1., 0.]]))
adj = g.adj(transpose=False, scipy_fmt='csr', etype='plays') adj = g.adj(transpose=True, scipy_fmt='csr', etype='plays')
assert np.allclose( assert np.allclose(
adj.todense(), adj.todense(),
np.array([[1., 1., 0.], np.array([[1., 1., 0.],
[0., 1., 1.]])) [0., 1., 1.]]))
adj = g.adj(transpose=False, scipy_fmt='coo', etype='plays') adj = g.adj(transpose=True, scipy_fmt='coo', etype='plays')
assert np.allclose( assert np.allclose(
adj.todense(), adj.todense(),
np.array([[1., 1., 0.], np.array([[1., 1., 0.],
[0., 1., 1.]])) [0., 1., 1.]]))
adj = F.sparse_to_numpy(g['follows'].adj(transpose=False)) adj = F.sparse_to_numpy(g['follows'].adj(transpose=True))
assert np.allclose( assert np.allclose(
adj, adj,
np.array([[0., 0., 0.], np.array([[0., 0., 0.],
...@@ -2648,6 +2648,63 @@ def test_create_block(idtype): ...@@ -2648,6 +2648,63 @@ def test_create_block(idtype):
assert hg.edges['AB'].data['x'] is eabx assert hg.edges['AB'].data['x'] is eabx
assert hg.edges['BA'].data['x'] is ebax assert hg.edges['BA'].data['x'] is ebax
@parametrize_dtype
@pytest.mark.parametrize('fmt', ['coo', 'csr', 'csc'])
def test_adj_sparse(idtype, fmt):
if fmt == 'coo':
A = ssp.random(10, 10, 0.2).tocoo()
A.data = np.arange(20)
row = F.tensor(A.row, idtype)
col = F.tensor(A.col, idtype)
g = dgl.graph((row, col))
elif fmt == 'csr':
A = ssp.random(10, 10, 0.2).tocsr()
A.data = np.arange(20)
indptr = F.tensor(A.indptr, idtype)
indices = F.tensor(A.indices, idtype)
g = dgl.graph(('csr', (indptr, indices, [])))
with pytest.raises(DGLError):
g2 = dgl.graph(('csr', (indptr[:-1], indices, [])), num_nodes=10)
elif fmt == 'csc':
A = ssp.random(10, 10, 0.2).tocsc()
A.data = np.arange(20)
indptr = F.tensor(A.indptr, idtype)
indices = F.tensor(A.indices, idtype)
g = dgl.graph(('csc', (indptr, indices, [])))
with pytest.raises(DGLError):
g2 = dgl.graph(('csr', (indptr[:-1], indices, [])), num_nodes=10)
A_coo = A.tocoo()
A_csr = A.tocsr()
A_csc = A.tocsc()
row, col = g.adj_sparse('coo')
assert np.array_equal(F.asnumpy(row), A_coo.row)
assert np.array_equal(F.asnumpy(col), A_coo.col)
indptr, indices, eids = g.adj_sparse('csr')
assert np.array_equal(F.asnumpy(indptr), A_csr.indptr)
if fmt == 'csr':
assert len(eids) == 0
assert np.array_equal(F.asnumpy(indices), A_csr.indices)
else:
indices_sorted = F.zeros(len(indices), idtype)
indices_sorted = F.scatter_row(indices_sorted, eids, indices)
indices_sorted_np = np.zeros(len(indices), dtype=A_csr.indices.dtype)
indices_sorted_np[A_csr.data] = A_csr.indices
assert np.array_equal(F.asnumpy(indices_sorted), indices_sorted_np)
indptr, indices, eids = g.adj_sparse('csc')
assert np.array_equal(F.asnumpy(indptr), A_csc.indptr)
if fmt == 'csc':
assert len(eids) == 0
assert np.array_equal(F.asnumpy(indices), A_csc.indices)
else:
indices_sorted = F.zeros(len(indices), idtype)
indices_sorted = F.scatter_row(indices_sorted, eids, indices)
indices_sorted_np = np.zeros(len(indices), dtype=A_csc.indices.dtype)
indices_sorted_np[A_csc.data] = A_csc.indices
assert np.array_equal(F.asnumpy(indices_sorted), indices_sorted_np)
if __name__ == '__main__': if __name__ == '__main__':
# test_create() # test_create()
......
...@@ -62,8 +62,8 @@ def test_sort_with_tag(idtype): ...@@ -62,8 +62,8 @@ def test_sort_with_tag(idtype):
assert(not check_sort(old_csr, tag)) # Check the original csr is not modified. assert(not check_sort(old_csr, tag)) # Check the original csr is not modified.
new_g = dgl.sort_in_edges(g, tag) new_g = dgl.sort_in_edges(g, tag)
old_csc = g.adjacency_matrix(transpose=False, scipy_fmt='csr') old_csc = g.adjacency_matrix(transpose=True, scipy_fmt='csr')
new_csc = new_g.adjacency_matrix(transpose=False, scipy_fmt='csr') new_csc = new_g.adjacency_matrix(transpose=True, scipy_fmt='csr')
assert(check_sort(new_csc, tag, new_g.ndata["_TAG_OFFSET"])) assert(check_sort(new_csc, tag, new_g.ndata["_TAG_OFFSET"]))
assert(not check_sort(old_csc, tag)) assert(not check_sort(old_csc, tag))
...@@ -83,8 +83,8 @@ def test_sort_with_tag_bipartite(idtype): ...@@ -83,8 +83,8 @@ def test_sort_with_tag_bipartite(idtype):
assert(not check_sort(old_csr, vtag)) assert(not check_sort(old_csr, vtag))
new_g = dgl.sort_in_edges(g, utag) new_g = dgl.sort_in_edges(g, utag)
old_csc = g.adjacency_matrix(transpose=False, scipy_fmt='csr') old_csc = g.adjacency_matrix(transpose=True, scipy_fmt='csr')
new_csc = new_g.adjacency_matrix(transpose=False, scipy_fmt='csr') new_csc = new_g.adjacency_matrix(transpose=True, scipy_fmt='csr')
assert(check_sort(new_csc, utag, new_g.nodes['_V'].data['_TAG_OFFSET'])) assert(check_sort(new_csc, utag, new_g.nodes['_V'].data['_TAG_OFFSET']))
assert(not check_sort(old_csc, utag)) assert(not check_sort(old_csc, utag))
......
...@@ -67,7 +67,7 @@ def test_topological_nodes(idtype, n=100): ...@@ -67,7 +67,7 @@ def test_topological_nodes(idtype, n=100):
layers_dgl = dgl.topological_nodes_generator(g) layers_dgl = dgl.topological_nodes_generator(g)
adjmat = g.adjacency_matrix(transpose=False) adjmat = g.adjacency_matrix(transpose=True)
def tensor_topo_traverse(): def tensor_topo_traverse():
n = g.number_of_nodes() n = g.number_of_nodes()
mask = F.copy_to(F.ones((n, 1)), F.cpu()) mask = F.copy_to(F.ones((n, 1)), F.cpu())
......
...@@ -134,7 +134,7 @@ def check_compute_func(worker_id, graph_name, return_dict): ...@@ -134,7 +134,7 @@ def check_compute_func(worker_id, graph_name, return_dict):
in_feats = g.nodes[0].data['feat'].shape[1] in_feats = g.nodes[0].data['feat'].shape[1]
# Test update all. # Test update all.
g.update_all(fn.copy_src(src='feat', out='m'), fn.sum(msg='m', out='preprocess')) g.update_all(fn.copy_src(src='feat', out='m'), fn.sum(msg='m', out='preprocess'))
adj = g.adjacency_matrix() adj = g.adjacency_matrix(transpose=True)
tmp = F.spmm(adj, g.nodes[:].data['feat']) tmp = F.spmm(adj, g.nodes[:].data['feat'])
assert_almost_equal(F.asnumpy(g.nodes[:].data['preprocess']), F.asnumpy(tmp)) assert_almost_equal(F.asnumpy(g.nodes[:].data['preprocess']), F.asnumpy(tmp))
g._sync_barrier(60) g._sync_barrier(60)
...@@ -261,13 +261,13 @@ def check_mem(gidx, cond_v, shared_v): ...@@ -261,13 +261,13 @@ def check_mem(gidx, cond_v, shared_v):
cond_v.release() cond_v.release()
gidx1 = dgl.graph_index.from_shared_mem_graph_index("test_graph5") gidx1 = dgl.graph_index.from_shared_mem_graph_index("test_graph5")
in_csr = gidx.adjacency_matrix_scipy(False, "csr") in_csr = gidx.adjacency_matrix_scipy(True, "csr")
out_csr = gidx.adjacency_matrix_scipy(True, "csr") out_csr = gidx.adjacency_matrix_scipy(False, "csr")
in_csr1 = gidx1.adjacency_matrix_scipy(False, "csr") in_csr1 = gidx1.adjacency_matrix_scipy(True, "csr")
assert_array_equal(in_csr.indptr, in_csr1.indptr) assert_array_equal(in_csr.indptr, in_csr1.indptr)
assert_array_equal(in_csr.indices, in_csr1.indices) assert_array_equal(in_csr.indices, in_csr1.indices)
out_csr1 = gidx1.adjacency_matrix_scipy(True, "csr") out_csr1 = gidx1.adjacency_matrix_scipy(False, "csr")
assert_array_equal(out_csr.indptr, out_csr1.indptr) assert_array_equal(out_csr.indptr, out_csr1.indptr)
assert_array_equal(out_csr.indices, out_csr1.indices) assert_array_equal(out_csr.indices, out_csr1.indices)
......
...@@ -25,7 +25,7 @@ def test_graph_conv(idtype, out_dim): ...@@ -25,7 +25,7 @@ def test_graph_conv(idtype, out_dim):
g = dgl.from_networkx(nx.path_graph(3)) g = dgl.from_networkx(nx.path_graph(3))
g = g.astype(idtype).to(F.ctx()) g = g.astype(idtype).to(F.ctx())
ctx = F.ctx() ctx = F.ctx()
adj = g.adjacency_matrix(transpose=False, ctx=ctx) adj = g.adjacency_matrix(transpose=True, ctx=ctx)
conv = nn.GraphConv(5, out_dim, norm='none', bias=True) conv = nn.GraphConv(5, out_dim, norm='none', bias=True)
conv.initialize(ctx=ctx) conv.initialize(ctx=ctx)
...@@ -136,7 +136,7 @@ def _S2AXWb(A, N, X, W, b): ...@@ -136,7 +136,7 @@ def _S2AXWb(A, N, X, W, b):
def test_tagconv(out_dim): def test_tagconv(out_dim):
g = dgl.from_networkx(nx.path_graph(3)).to(F.ctx()) g = dgl.from_networkx(nx.path_graph(3)).to(F.ctx())
ctx = F.ctx() ctx = F.ctx()
adj = g.adjacency_matrix(transpose=False, ctx=ctx) adj = g.adjacency_matrix(transpose=True, ctx=ctx)
norm = mx.nd.power(g.in_degrees().astype('float32'), -0.5) norm = mx.nd.power(g.in_degrees().astype('float32'), -0.5)
conv = nn.TAGConv(5, out_dim, bias=True) conv = nn.TAGConv(5, out_dim, bias=True)
...@@ -317,7 +317,7 @@ def test_dense_cheb_conv(out_dim): ...@@ -317,7 +317,7 @@ def test_dense_cheb_conv(out_dim):
for k in range(1, 4): for k in range(1, 4):
ctx = F.ctx() ctx = F.ctx()
g = dgl.from_scipy(sp.sparse.random(100, 100, density=0.3)).to(F.ctx()) g = dgl.from_scipy(sp.sparse.random(100, 100, density=0.3)).to(F.ctx())
adj = g.adjacency_matrix(transpose=False, ctx=ctx).tostype('default') adj = g.adjacency_matrix(transpose=True, ctx=ctx).tostype('default')
cheb = nn.ChebConv(5, out_dim, k) cheb = nn.ChebConv(5, out_dim, k)
dense_cheb = nn.DenseChebConv(5, out_dim, k) dense_cheb = nn.DenseChebConv(5, out_dim, k)
cheb.initialize(ctx=ctx) cheb.initialize(ctx=ctx)
...@@ -342,7 +342,7 @@ def test_dense_cheb_conv(out_dim): ...@@ -342,7 +342,7 @@ def test_dense_cheb_conv(out_dim):
def test_dense_graph_conv(idtype, g, norm_type, out_dim): def test_dense_graph_conv(idtype, g, norm_type, out_dim):
g = g.astype(idtype).to(F.ctx()) g = g.astype(idtype).to(F.ctx())
ctx = F.ctx() ctx = F.ctx()
adj = g.adjacency_matrix(transpose=False, ctx=ctx).tostype('default') adj = g.adjacency_matrix(transpose=True, ctx=ctx).tostype('default')
conv = nn.GraphConv(5, out_dim, norm=norm_type, bias=True) conv = nn.GraphConv(5, out_dim, norm=norm_type, bias=True)
dense_conv = nn.DenseGraphConv(5, out_dim, norm=norm_type, bias=True) dense_conv = nn.DenseGraphConv(5, out_dim, norm=norm_type, bias=True)
conv.initialize(ctx=ctx) conv.initialize(ctx=ctx)
...@@ -362,7 +362,7 @@ def test_dense_graph_conv(idtype, g, norm_type, out_dim): ...@@ -362,7 +362,7 @@ def test_dense_graph_conv(idtype, g, norm_type, out_dim):
def test_dense_sage_conv(idtype, g, out_dim): def test_dense_sage_conv(idtype, g, out_dim):
g = g.astype(idtype).to(F.ctx()) g = g.astype(idtype).to(F.ctx())
ctx = F.ctx() ctx = F.ctx()
adj = g.adjacency_matrix(transpose=False, ctx=ctx).tostype('default') adj = g.adjacency_matrix(transpose=True, ctx=ctx).tostype('default')
sage = nn.SAGEConv(5, out_dim, 'gcn') sage = nn.SAGEConv(5, out_dim, 'gcn')
dense_sage = nn.DenseSAGEConv(5, out_dim) dense_sage = nn.DenseSAGEConv(5, out_dim)
sage.initialize(ctx=ctx) sage.initialize(ctx=ctx)
......
...@@ -24,7 +24,7 @@ def _AXWb(A, X, W, b): ...@@ -24,7 +24,7 @@ def _AXWb(A, X, W, b):
def test_graph_conv0(out_dim): def test_graph_conv0(out_dim):
g = dgl.DGLGraph(nx.path_graph(3)).to(F.ctx()) g = dgl.DGLGraph(nx.path_graph(3)).to(F.ctx())
ctx = F.ctx() ctx = F.ctx()
adj = g.adjacency_matrix(transpose=False, ctx=ctx) adj = g.adjacency_matrix(transpose=True, ctx=ctx)
conv = nn.GraphConv(5, out_dim, norm='none', bias=True) conv = nn.GraphConv(5, out_dim, norm='none', bias=True)
conv = conv.to(ctx) conv = conv.to(ctx)
...@@ -186,7 +186,7 @@ def test_tagconv(out_dim): ...@@ -186,7 +186,7 @@ def test_tagconv(out_dim):
g = dgl.DGLGraph(nx.path_graph(3)) g = dgl.DGLGraph(nx.path_graph(3))
g = g.to(F.ctx()) g = g.to(F.ctx())
ctx = F.ctx() ctx = F.ctx()
adj = g.adjacency_matrix(transpose=False, ctx=ctx) adj = g.adjacency_matrix(transpose=True, ctx=ctx)
norm = th.pow(g.in_degrees().float(), -0.5) norm = th.pow(g.in_degrees().float(), -0.5)
conv = nn.TAGConv(5, out_dim, bias=True) conv = nn.TAGConv(5, out_dim, bias=True)
...@@ -806,7 +806,7 @@ def test_dense_graph_conv(norm_type, g, idtype, out_dim): ...@@ -806,7 +806,7 @@ def test_dense_graph_conv(norm_type, g, idtype, out_dim):
g = g.astype(idtype).to(F.ctx()) g = g.astype(idtype).to(F.ctx())
ctx = F.ctx() ctx = F.ctx()
# TODO(minjie): enable the following option after #1385 # TODO(minjie): enable the following option after #1385
adj = g.adjacency_matrix(transpose=False, ctx=ctx).to_dense() adj = g.adjacency_matrix(transpose=True, ctx=ctx).to_dense()
conv = nn.GraphConv(5, out_dim, norm=norm_type, bias=True) conv = nn.GraphConv(5, out_dim, norm=norm_type, bias=True)
dense_conv = nn.DenseGraphConv(5, out_dim, norm=norm_type, bias=True) dense_conv = nn.DenseGraphConv(5, out_dim, norm=norm_type, bias=True)
dense_conv.weight.data = conv.weight.data dense_conv.weight.data = conv.weight.data
...@@ -824,7 +824,7 @@ def test_dense_graph_conv(norm_type, g, idtype, out_dim): ...@@ -824,7 +824,7 @@ def test_dense_graph_conv(norm_type, g, idtype, out_dim):
def test_dense_sage_conv(g, idtype, out_dim): def test_dense_sage_conv(g, idtype, out_dim):
g = g.astype(idtype).to(F.ctx()) g = g.astype(idtype).to(F.ctx())
ctx = F.ctx() ctx = F.ctx()
adj = g.adjacency_matrix(transpose=False, ctx=ctx).to_dense() adj = g.adjacency_matrix(transpose=True, ctx=ctx).to_dense()
sage = nn.SAGEConv(5, out_dim, 'gcn') sage = nn.SAGEConv(5, out_dim, 'gcn')
dense_sage = nn.DenseSAGEConv(5, out_dim) dense_sage = nn.DenseSAGEConv(5, out_dim)
dense_sage.fc.weight.data = sage.fc_neigh.weight.data dense_sage.fc.weight.data = sage.fc_neigh.weight.data
...@@ -911,7 +911,7 @@ def test_dense_cheb_conv(out_dim): ...@@ -911,7 +911,7 @@ def test_dense_cheb_conv(out_dim):
ctx = F.ctx() ctx = F.ctx()
g = dgl.DGLGraph(sp.sparse.random(100, 100, density=0.1), readonly=True) g = dgl.DGLGraph(sp.sparse.random(100, 100, density=0.1), readonly=True)
g = g.to(F.ctx()) g = g.to(F.ctx())
adj = g.adjacency_matrix(transpose=False, ctx=ctx).to_dense() adj = g.adjacency_matrix(transpose=True, ctx=ctx).to_dense()
cheb = nn.ChebConv(5, out_dim, k, None) cheb = nn.ChebConv(5, out_dim, k, None)
dense_cheb = nn.DenseChebConv(5, out_dim, k) dense_cheb = nn.DenseChebConv(5, out_dim, k)
#for i in range(len(cheb.fc)): #for i in range(len(cheb.fc)):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment