Unverified Commit 9e9a9488 authored by Da Zheng's avatar Da Zheng Committed by GitHub
Browse files

[GraphIndex] Immutable graph index using MXNet CSRArray (#86)

* add immutable graph index.

* update ImmutableGraphIndex.

* add benchmarks of subgraph generation.

* fix node_subgraphs.

* use scipy synthetic data in immutable graph.

* create immutable_graph.

* use spmv in gcn.

* fix immutable graph index.

* update graph index.

* create ImmutableSubgraphIndex.

* return subgraph node and edge mapping.

* fix benchmark.

* add mapping to subgraph nid.

* test in_edges and out_edges.

* update benchmark.

* Fix immutable subgraph.

* complete in_edges and out_edges.

* fix benchmarks.

* add test on node_subgraphs

* add sse.

* construct subgraphs in parallel.

* add in/out degree(s).

* make immutable graph index backend-specific.

* implement has_edge(s) and edge_id(s).

* Revert "use spmv in gcn."

This reverts commit 9cfed5f5fb3dd2ed9b98745348b1c0e9731ed7f7.

* implement node_subgraphs in GraphIndex.

* fix sse.

* address comments.

* address comments.

* Update mxnet docker.
parent 0d6cd30b
......@@ -139,7 +139,7 @@ pipeline {
stage('CPU') {
agent {
docker {
image 'zhengda1936/dgl-mxnet-cpu:v2'
image 'zhengda1936/dgl-mxnet-cpu:v3'
}
}
stages {
......
"""
Learning Steady-States of Iterative Algorithms over Graphs
Paper: http://proceedings.mlr.press/v80/dai18a.html
"""
import argparse
import numpy as np
import time
import mxnet as mx
from mxnet import gluon
import dgl
import dgl.function as fn
from dgl import DGLGraph, utils
from dgl.data import register_data_args, load_data
def gcn_msg(src, edge):
# TODO should we use concat?
return {'m': mx.nd.concat(src['in'], src['h'], dim=1)}
def gcn_reduce(node, msgs):
return {'accum': mx.nd.sum(msgs['m'], 1)}
class NodeUpdate(gluon.Block):
def __init__(self, out_feats, activation=None, alpha=0.9):
super(NodeUpdate, self).__init__()
self.linear1 = gluon.nn.Dense(out_feats, activation=activation)
# TODO what is the dimension here?
self.linear2 = gluon.nn.Dense(out_feats)
self.alpha = alpha
def forward(self, node):
tmp = mx.nd.concat(node['in'], node['accum'], dim=1)
hidden = self.linear2(self.linear1(tmp))
return {'h': node['h'] * (1 - self.alpha) + self.alpha * hidden}
class SSEUpdateHidden(gluon.Block):
def __init__(self,
n_hidden,
activation,
dropout,
use_spmv):
super(SSEUpdateHidden, self).__init__()
self.layer = NodeUpdate(n_hidden, activation)
self.dropout = dropout
self.use_spmv = use_spmv
def forward(self, g, vertices):
if self.use_spmv:
feat = g.get_n_repr()['in']
h = g.get_n_repr()['h']
g.set_n_repr({'cat': mx.nd.concat(feat, h, dim=1)})
msg_func = fn.copy_src(src='cat', out='tmp')
reduce_func = fn.sum(msg='tmp', out='accum')
else:
msg_func = gcn_msg
reduce_func = gcn_reduce
if vertices is None:
g.update_all(msg_func, reduce_func, self.layer)
ret = g.get_n_repr()['h']
else:
# We don't need dropout for inference.
if self.dropout:
# TODO here we apply dropout on all vertex representation.
val = mx.nd.Dropout(g.get_n_repr()['h'], p=self.dropout)
g.set_n_repr({'h': val})
g.pull(vertices, msg_func, reduce_func, self.layer)
ctx = g.get_n_repr()['h'].context
ret = mx.nd.take(g.get_n_repr()['h'], vertices.tousertensor().as_in_context(ctx))
return ret
class SSEPredict(gluon.Block):
def __init__(self, update_hidden, out_feats, dropout):
super(SSEPredict, self).__init__()
self.linear1 = gluon.nn.Dense(out_feats, activation='relu')
self.linear2 = gluon.nn.Dense(out_feats)
self.update_hidden = update_hidden
self.dropout = dropout
def forward(self, g, vertices):
hidden = self.update_hidden(g, vertices)
if self.dropout:
hidden = mx.nd.Dropout(hidden, p=self.dropout)
return self.linear2(self.linear1(hidden))
def subgraph_gen(g, seed_vertices):
vertices = []
for seed in seed_vertices:
src, _, _ = g.in_edges(seed)
vs = np.concatenate((src.asnumpy(), seed.asnumpy()), axis=0)
vs = mx.nd.array(np.unique(vs), dtype=np.int64)
vertices.append(vs)
subgs = g.subgraphs(vertices)
nids = []
for i, subg in enumerate(subgs):
subg.copy_from_parent()
nids.append(subg.map_to_subgraph_nid(utils.toindex(seed_vertices[i])))
return subgs, nids
def main(args, data):
features = mx.nd.array(data.features)
labels = mx.nd.array(data.labels)
train_size = len(labels) * args.train_percent
train_vs = np.arange(train_size, dtype='int64')
eval_vs = np.arange(train_size, len(labels), dtype='int64')
print("train size: " + str(len(train_vs)))
print("eval size: " + str(len(eval_vs)))
train_labels = mx.nd.array(data.labels[train_vs])
eval_labels = mx.nd.array(data.labels[eval_vs])
in_feats = features.shape[1]
n_classes = data.num_labels
n_edges = data.graph.number_of_edges()
if args.gpu <= 0:
cuda = False
ctx = mx.cpu(0)
else:
cuda = True
features = features.as_in_context(mx.gpu(0))
train_labels = train_labels.as_in_context(mx.gpu(0))
eval_labels = eval_labels.as_in_context(mx.gpu(0))
ctx = mx.gpu(0)
# create the SSE model
try:
graph = data.graph.get_graph()
except AttributeError:
graph = data.graph
g = DGLGraph(graph, readonly=True)
g.set_n_repr({'in': features, 'h': mx.nd.random.normal(shape=(g.number_of_nodes(), args.n_hidden),
ctx=ctx)})
update_hidden = SSEUpdateHidden(args.n_hidden, 'relu', args.update_dropout, args.use_spmv)
model = SSEPredict(update_hidden, args.n_hidden, args.predict_dropout)
model.initialize(ctx=ctx)
# use optimizer
num_batches = int(g.number_of_nodes() / args.batch_size)
scheduler = mx.lr_scheduler.CosineScheduler(args.n_epochs * num_batches,
args.lr * 10, 0, 0, args.lr/5)
trainer = gluon.Trainer(model.collect_params(), 'adam', {'learning_rate': args.lr,
'lr_scheduler': scheduler})
# initialize graph
dur = []
for epoch in range(args.n_epochs):
# compute vertex embedding.
update_hidden(g, None)
t0 = time.time()
permute = np.random.permutation(len(train_vs))
randv = train_vs[permute]
rand_labels = train_labels[permute]
data_iter = mx.io.NDArrayIter(data=mx.nd.array(randv, dtype='int64'), label=rand_labels,
batch_size=args.batch_size)
train_loss = 0
data = []
labels = []
for batch in data_iter:
data.append(batch.data[0])
labels.append(batch.label[0])
if len(data) < args.num_parallel_subgraphs:
continue
subgs, seed_ids = subgraph_gen(g, data)
for subg, seed_id, label, d in zip(subgs, seed_ids, labels, data):
with mx.autograd.record():
logits = model(subg, seed_id)
loss = mx.nd.softmax_cross_entropy(logits, label)
loss.backward()
trainer.step(d.shape[0])
train_loss += loss.asnumpy()[0]
data = []
labels = []
#logits = model(eval_vs)
#eval_loss = mx.nd.softmax_cross_entropy(logits, eval_labels)
#eval_loss = eval_loss.asnumpy()[0]
eval_loss = 0
dur.append(time.time() - t0)
print("Epoch {:05d} | Train Loss {:.4f} | Eval Loss {:.4f} | Time(s) {:.4f} | ETputs(KTEPS) {:.2f}".format(
epoch, train_loss, eval_loss, np.mean(dur), n_edges / np.mean(dur) / 1000))
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='GCN')
register_data_args(parser)
parser.add_argument("--gpu", type=int, default=-1,
help="gpu")
parser.add_argument("--lr", type=float, default=1e-3,
help="learning rate")
parser.add_argument("--batch-size", type=int, default=128,
help="number of vertices in a batch")
parser.add_argument("--n-epochs", type=int, default=20,
help="number of training epochs")
parser.add_argument("--n-hidden", type=int, default=16,
help="number of hidden gcn units")
parser.add_argument("--warmup", type=int, default=10,
help="number of iterations to warm up with large learning rate")
parser.add_argument("--update-dropout", type=float, default=0.5,
help="the dropout rate for updating vertex embedding")
parser.add_argument("--predict-dropout", type=float, default=0.5,
help="the dropout rate for prediction")
parser.add_argument("--train_percent", type=float, default=0.5,
help="the percentage of data used for training")
parser.add_argument("--use-spmv", type=bool, default=False,
help="use SpMV for faster speed.")
parser.add_argument("--num-parallel-subgraphs", type=int, default=1,
help="the number of subgraphs to construct in parallel.")
args = parser.parse_args()
# load and preprocess dataset
data = load_data(args)
main(args, data)
......@@ -64,6 +64,33 @@ class GraphOp {
* \return a list of partitioned graphs
*/
static std::vector<Graph> DisjointPartitionBySizes(const Graph* graph, IdArray sizes);
/*!
* \brief Map vids in the parent graph to the vids in the subgraph.
*
* \param parent_vid_map An array that maps the vids in the parent graph to the
* subgraph. The elements store the vertex Ids in the parent graph, and the
* indices indicate the vertex Ids in the subgraph.
* \param query The vertex Ids in the parent graph.
* \return an Id array that contains the subgraph node Ids.
*/
static IdArray MapParentIdToSubgraphId(IdArray parent_vid_map, IdArray query);
/*!
* \brief Expand an Id array based on the offset array.
*
* For example,
* ids: [0, 1, 2, 3, 4],
* offset: [0, 2, 2, 5, 6, 7],
* result: [0, 0, 2, 2, 2, 3, 4].
* The offset array has one more element than the ids array.
* (offset[i], offset[i+1]) shows the location of ids[i] in the result array.
*
* \param ids An array that contains the node or edge Ids.
* \param offset An array that contains the offset after expansion.
* \return a expanded Id array.
*/
static IdArray ExpandIds(IdArray ids, IdArray offset);
};
} // namespace dgl
......
......@@ -5,9 +5,12 @@ import os
__backend__ = os.environ.get('DGLBACKEND', 'pytorch').lower()
if __backend__ == 'numpy':
from .numpy import *
create_immutable_graph_index=None
elif __backend__ == 'pytorch':
from .pytorch import *
create_immutable_graph_index=None
elif __backend__ == 'mxnet':
from .mxnet import *
from .mxnet_immutable_graph_index import create_immutable_graph_index
else:
raise Exception("Unsupported backend %s" % __backend__)
from __future__ import absolute_import
import ctypes
import numpy as np
import networkx as nx
import scipy.sparse as sp
import mxnet as mx
from .mxnet import to_context
class ImmutableGraphIndex(object):
"""Backend-specific graph index object on immutable graphs.
We can use a CSR matrix to represent a graph structure. For functionality,
one CSR matrix is sufficient. However, for efficient access
to in-edges and out-edges of a directed graph, we need to use two CSR matrices.
In these CSR matrices, both rows and columns represents vertices. In one CSR
matrix, a row stores in-edges of a vertex (whose source vertex is a neighbor
and destination vertex is the vertex itself). Thus, a non-zero entry is
the neighbor Id and the value is the corresponding edge Id.
The other CSR matrix stores the out-edges in the same fashion.
Parameters
----------
in_csr : a csr array that stores in-edges.
MXNet CSRArray
out_csr : a csr array that stores out-edges.
MXNet CSRArray
"""
def __init__(self, in_csr, out_csr):
self._in_csr = in_csr
self._out_csr = out_csr
def number_of_nodes(self):
"""Return the number of nodes.
Returns
-------
int
The number of nodes
"""
return len(self._in_csr)
def number_of_edges(self):
"""Return the number of edges.
Returns
-------
int
The number of edges
"""
return self._in_csr.indices.shape[0]
def has_edges(self, u, v):
"""Return true if the edge exists.
Parameters
----------
u : NDArray
The src nodes.
v : NDArray
The dst nodes.
Returns
-------
NDArray
0-1 array indicating existence
"""
ids = mx.nd.contrib.edge_id(self._in_csr, v, u)
return ids >= 0
def edge_ids(self, u, v):
"""Return the edge ids.
Parameters
----------
u : NDArray
The src nodes.
v : NDArray
The dst nodes.
Returns
-------
NDArray
Teh edge id array.
"""
ids = mx.nd.contrib.edge_id(self._in_csr, v, u)
ids = ids.asnumpy()
return ids[ids >= 0]
def predecessors(self, v, radius=1):
"""Return the predecessors of the node.
Parameters
----------
v : int
The node.
radius : int, optional
The radius of the neighborhood.
Returns
-------
NDArray
Array of predecessors
"""
if radius > 1:
raise Exception('Immutable graph doesn\'t support predecessors with radius > 1 for now.')
return self._in_csr[v].indices
def successors(self, v, radius=1):
"""Return the successors of the node.
Parameters
----------
v : int
The node.
radius : int, optional
The radius of the neighborhood.
Returns
-------
NDArray
Array of successors
"""
if radius > 1:
raise Exception('Immutable graph doesn\'t support successors with radius > 1 for now.')
return self._out_csr[v].indices
def in_edges(self, v):
"""Return the in edges of the node(s).
Parameters
----------
v : NDArray
The node(s).
Returns
-------
NDArray
index pointers
NDArray
The src nodes.
NDArray
The edge ids.
"""
rows = mx.nd.take(self._in_csr, v)
return rows.indptr, rows.indices, rows.data
def out_edges(self, v):
"""Return the out edges of the node(s).
Parameters
----------
v : NDArray
The node(s).
Returns
-------
NDArray
index pointers
NDArray
The dst nodes.
NDArray
The edge ids.
"""
rows = mx.nd.take(self._out_csr, v)
return rows.indptr, rows.indices, rows.data
def edges(self, sorted=False):
"""Return all the edges
Parameters
----------
sorted : bool
True if the returned edges are sorted by their src and dst ids.
Returns
-------
NDArray
The src nodes.
NDArray
The dst nodes.
NDArray
The edge ids.
"""
#TODO(zhengda) we need to return NDArray directly
# We don't need to take care of the sorted flag because the vertex Ids
# are already sorted.
coo = self._in_csr.asscipy().tocoo()
return coo.col, coo.row, coo.data
def get_in_degree(self):
"""Return the in degrees of all nodes.
Returns
-------
NDArray
degrees
"""
return mx.nd.contrib.getnnz(self._in_csr, axis=1)
def get_out_degree(self):
"""Return the out degrees of all nodes.
Returns
-------
NDArray
degrees
"""
return mx.nd.contrib.getnnz(self._out_csr, axis=1)
def node_subgraph(self, v):
"""Return the induced node subgraph.
Parameters
----------
v : NDArray
The nodes.
Returns
-------
ImmutableGraphIndex
The subgraph index.
NDArray
Induced nodes
NDArray
Induced edges
"""
v = mx.nd.sort(v)
# when return_mapping is turned on, dgl_subgraph returns another CSRArray that
# stores the edge Ids of the original graph.
csr = mx.nd.contrib.dgl_subgraph(self._in_csr, v, return_mapping=True)
induced_nodes = v
induced_edges = csr[1].data
return ImmutableGraphIndex(csr[0], None), induced_nodes, induced_edges
def node_subgraphs(self, vs_arr):
"""Return the induced node subgraphs.
Parameters
----------
vs_arr : a vector of utils.Index
The nodes.
Returns
-------
a vector of ImmutableGraphIndex
The subgraph index.
a vector of NDArrays
Induced nodes of subgraphs.
a vector of NDArrays
Induced edges of subgraphs.
"""
vs_arr = [mx.nd.sort(v) for v in vs_arr]
res = mx.nd.contrib.dgl_subgraph(self._in_csr, *vs_arr, return_mapping=True)
in_csrs = res[0:len(vs_arr)]
induced_nodes = vs_arr
induced_edges = [e.data for e in res[len(vs_arr):]]
assert len(in_csrs) == len(induced_nodes)
assert len(in_csrs) == len(induced_edges)
gis = []
induced_ns = []
induced_es = []
for in_csr, induced_n, induced_e in zip(in_csrs, induced_nodes, induced_edges):
gis.append(ImmutableGraphIndex(in_csr, None))
induced_ns.append(induced_n)
induced_es.append(induced_e)
return gis, induced_ns, induced_es
def adjacency_matrix(self, transpose=False):
"""Return the adjacency matrix representation of this graph.
By default, a row of returned adjacency matrix represents the destination
of an edge and the column represents the source.
When transpose is True, a row represents the source and a column represents
a destination.
Parameters
----------
transpose : bool
A flag to tranpose the returned adjacency matrix.
Returns
-------
NDArray
An object that returns tensor given context.
"""
if transpose:
mat = self._out_csr
else:
mat = self._in_csr
indices = mat.indices
indptr = mat.indptr
data = mx.nd.ones(indices.shape, dtype=np.float32)
return mx.nd.sparse.csr_matrix((data, indices, indptr), shape=mat.shape)
def from_coo_matrix(self, out_coo):
"""construct the graph index from a SciPy coo matrix.
Parameters
----------
out_coo : SciPy coo matrix
The non-zero entries indicate out-edges of the graph.
"""
edge_ids = mx.nd.arange(0, len(out_coo.data), step=1, repeat=1, dtype=np.int32)
src = mx.nd.array(out_coo.row, dtype=np.int64)
dst = mx.nd.array(out_coo.col, dtype=np.int64)
# TODO we can't generate a csr_matrix with np.int64 directly.
self.__init__(mx.nd.sparse.csr_matrix((edge_ids, (dst, src)), shape=out_coo.shape).astype(np.int64),
mx.nd.sparse.csr_matrix((edge_ids, (src, dst)), shape=out_coo.shape).astype(np.int64))
def create_immutable_graph_index():
""" Create an empty backend-specific immutable graph index.
Returns
-------
ImmutableGraphIndex
The backend-specific immutable graph index.
"""
return ImmutableGraphIndex(None, None)
......@@ -33,14 +33,18 @@ class DGLGraph(object):
Edge feature storage.
multigraph : bool, optional
Whether the graph would be a multigraph (default: False)
readonly : bool, optional
Whether the graph structure is read-only (default: False).
"""
def __init__(self,
graph_data=None,
node_frame=None,
edge_frame=None,
multigraph=False):
multigraph=False,
readonly=False):
# graph
self._graph = create_graph_index(graph_data, multigraph)
self._readonly=readonly
self._graph = create_graph_index(graph_data, multigraph, readonly)
# frame
self._node_frame = node_frame if node_frame is not None else FrameRef()
self._edge_frame = edge_frame if edge_frame is not None else FrameRef()
......@@ -1386,7 +1390,26 @@ class DGLGraph(object):
"""
induced_nodes = utils.toindex(nodes)
sgi = self._graph.node_subgraph(induced_nodes)
return dgl.DGLSubGraph(self, sgi.induced_nodes, sgi.induced_edges, sgi)
return dgl.DGLSubGraph(self, sgi.induced_nodes, sgi.induced_edges,
sgi, readonly=self._readonly)
def subgraphs(self, nodes):
"""Generate the subgraphs among the given nodes.
Parameters
----------
nodes : a list of lists or iterable
A list of the nodes to construct subgraph.
Returns
-------
G : A list of DGLSubGraph
The subgraphs.
"""
induced_nodes = [utils.toindex(n) for n in nodes]
sgis = self._graph.node_subgraphs(induced_nodes)
return [dgl.DGLSubGraph(self, sgi.induced_nodes, sgi.induced_edges,
sgi, readonly=self._readonly) for sgi in sgis]
def edge_subgraph(self, edges):
"""Generate the subgraph among the given edges.
......
......@@ -9,6 +9,7 @@ from ._ffi.base import c_array
from ._ffi.function import _init_api
from . import backend as F
from . import utils
from .immutable_graph_index import create_immutable_graph_index
GraphIndexHandle = ctypes.c_void_p
......@@ -432,6 +433,24 @@ class GraphIndex(object):
induced_edges = utils.toindex(rst(2))
return SubgraphIndex(rst(0), self, v, induced_edges)
def node_subgraphs(self, vs_arr):
"""Return the induced node subgraphs.
Parameters
----------
vs_arr : a list of utils.Index
The nodes.
Returns
-------
a vector of SubgraphIndex
The subgraph index.
"""
gis = []
for v in vs_arr:
gis.append(self.node_subgraph(v))
return gis
def edge_subgraph(self, e):
"""Return the induced edge subgraph.
......@@ -451,9 +470,20 @@ class GraphIndex(object):
induced_nodes = utils.toindex(rst(1))
return SubgraphIndex(rst(0), self, induced_nodes, e)
def adjacency_matrix(self):
def adjacency_matrix(self, transpose=False):
"""Return the adjacency matrix representation of this graph.
By default, a row of returned adjacency matrix represents the destination
of an edge and the column represents the source.
When transpose is True, a row represents the source and a column represents
a destination.
Parameters
----------
transpose : bool
A flag to tranpose the returned adjacency matrix.
Returns
-------
utils.CtxCachedObject
......@@ -463,6 +493,9 @@ class GraphIndex(object):
src, dst, _ = self.edges(sorted=False)
src = F.unsqueeze(src.tousertensor(), 0)
dst = F.unsqueeze(dst.tousertensor(), 0)
if transpose:
idx = F.pack([src, dst])
else:
idx = F.pack([dst, src])
n = self.number_of_nodes()
dat = F.ones((self.number_of_edges(),))
......@@ -658,6 +691,25 @@ class SubgraphIndex(GraphIndex):
"""
return self._induced_edges
def map_to_subgraph_nid(subgraph, parent_nids):
"""Map parent node Ids to the subgraph node Ids.
Parameters
----------
subgraph: SubgraphIndex or ImmutableSubgraphIndex
the graph index of a subgraph
parent_nids: utils.Index
Node Ids in the parent graph.
Returns
-------
utils.Index
Node Ids in the subgraph.
"""
return utils.toindex(_CAPI_DGLMapSubgraphNID(subgraph.induced_nodes.todgltensor(),
parent_nids.todgltensor()))
def disjoint_union(graphs):
"""Return a disjoint union of the input graphs.
......@@ -716,7 +768,7 @@ def disjoint_partition(graph, num_or_size_splits):
graphs.append(GraphIndex(handle))
return graphs
def create_graph_index(graph_data=None, multigraph=False):
def create_graph_index(graph_data=None, multigraph=False, readonly=False):
"""Create a graph index object.
Parameters
......@@ -729,6 +781,12 @@ def create_graph_index(graph_data=None, multigraph=False):
if isinstance(graph_data, GraphIndex):
return graph_data
if readonly and graph_data is not None:
gi = create_immutable_graph_index(graph_data)
# If we can't create an immutable graph index, we'll have to fall back.
if gi is not None:
return gi
handle = _CAPI_DGLGraphCreate(multigraph)
gi = GraphIndex(handle)
......
from __future__ import absolute_import
import ctypes
import numpy as np
import networkx as nx
import scipy.sparse as sp
from ._ffi.function import _init_api
from . import backend as F
from . import utils
class ImmutableGraphIndex(object):
"""Graph index object on immutable graphs.
Parameters
----------
backend_csr: a csr array provided by the backend framework.
"""
def __init__(self, backend_sparse):
self._sparse = backend_sparse
self._num_nodes = None
self._num_edges = None
self._in_deg = None
self._out_deg = None
self._cache = {}
def add_nodes(self, num):
"""Add nodes.
Parameters
----------
num : int
Number of nodes to be added.
"""
raise Exception('Immutable graph doesn\'t support adding nodes')
def add_edge(self, u, v):
"""Add one edge.
Parameters
----------
u : int
The src node.
v : int
The dst node.
"""
raise Exception('Immutable graph doesn\'t support adding an edge')
def add_edges(self, u, v):
"""Add many edges.
Parameters
----------
u : utils.Index
The src nodes.
v : utils.Index
The dst nodes.
"""
raise Exception('Immutable graph doesn\'t support adding edges')
def clear(self):
"""Clear the graph."""
raise Exception('Immutable graph doesn\'t support clearing up')
def number_of_nodes(self):
"""Return the number of nodes.
Returns
-------
int
The number of nodes
"""
if self._num_nodes is None:
self._num_nodes = self._sparse.number_of_nodes()
return self._num_nodes
def number_of_edges(self):
"""Return the number of edges.
Returns
-------
int
The number of edges
"""
if self._num_edges is None:
self._num_edges = self._sparse.number_of_edges()
return self._num_edges
def has_node(self, vid):
"""Return true if the node exists.
Parameters
----------
vid : int
The nodes
Returns
-------
bool
True if the node exists
"""
return vid < self.number_of_nodes()
def has_nodes(self, vids):
"""Return true if the nodes exist.
Parameters
----------
vid : utils.Index
The nodes
Returns
-------
utils.Index
0-1 array indicating existence
"""
vid_array = vids.tousertensor()
return utils.toindex(vid_array < self.number_of_nodes())
def has_edge_between(self, u, v):
"""Return true if the edge exists.
Parameters
----------
u : int
The src node.
v : int
The dst node.
Returns
-------
bool
True if the edge exists
"""
u = F.tensor([u])
v = F.tensor([v])
return self._sparse.has_edges(u, v).asnumpy()[0]
def has_edges_between(self, u, v):
"""Return true if the edge exists.
Parameters
----------
u : utils.Index
The src nodes.
v : utils.Index
The dst nodes.
Returns
-------
utils.Index
0-1 array indicating existence
"""
return utils.toindex(self._sparse.has_edges(u.tousertensor(), v.tousertensor()))
def predecessors(self, v, radius=1):
"""Return the predecessors of the node.
Parameters
----------
v : int
The node.
radius : int, optional
The radius of the neighborhood.
Returns
-------
utils.Index
Array of predecessors
"""
pred = self._sparse.predecessors(v, radius)
return utils.toindex(pred)
def successors(self, v, radius=1):
"""Return the successors of the node.
Parameters
----------
v : int
The node.
radius : int, optional
The radius of the neighborhood.
Returns
-------
utils.Index
Array of successors
"""
succ = self._sparse.successors(v, radius)
return utils.toindex(succ)
def edge_id(self, u, v):
"""Return the id of the edge.
Parameters
----------
u : int
The src node.
v : int
The dst node.
Returns
-------
int
The edge id.
"""
u = F.tensor([u])
v = F.tensor([v])
id = self._sparse.edge_ids(u, v)
return utils.toindex(id)
def edge_ids(self, u, v):
"""Return the edge ids.
Parameters
----------
u : utils.Index
The src nodes.
v : utils.Index
The dst nodes.
Returns
-------
utils.Index
The edge id array.
"""
u = u.tousertensor()
v = v.tousertensor()
ids = self._sparse.edge_ids(u, v)
return utils.toindex(ids)
def in_edges(self, v):
"""Return the in edges of the node(s).
Parameters
----------
v : utils.Index
The node(s).
Returns
-------
utils.Index
The src nodes.
utils.Index
The dst nodes.
utils.Index
The edge ids.
"""
dst = v.tousertensor()
indptr, src, edges = self._sparse.in_edges(dst)
off = utils.toindex(indptr)
dst = _CAPI_DGLExpandIds(v.todgltensor(), off.todgltensor())
return utils.toindex(src), utils.toindex(dst), utils.toindex(edges)
def out_edges(self, v):
"""Return the out edges of the node(s).
Parameters
----------
v : utils.Index
The node(s).
Returns
-------
utils.Index
The src nodes.
utils.Index
The dst nodes.
utils.Index
The edge ids.
"""
src = v.tousertensor()
indptr, dst, edges = self._sparse.out_edges(src)
off = utils.toindex(indptr)
src = _CAPI_DGLExpandIds(v.todgltensor(), off.todgltensor())
return utils.toindex(src), utils.toindex(dst), utils.toindex(edges)
def edges(self, sorted=False):
"""Return all the edges
Parameters
----------
sorted : bool
True if the returned edges are sorted by their src and dst ids.
Returns
-------
utils.Index
The src nodes.
utils.Index
The dst nodes.
utils.Index
The edge ids.
"""
if "all_edges" in self._cache:
return self._cache["all_edges"]
src, dst, edges = self._sparse.edges(sorted)
self._cache["all_edges"] = (utils.toindex(src), utils.toindex(dst), utils.toindex(edges))
return self._cache["all_edges"]
def _get_in_degree(self):
if 'in_deg' not in self._cache:
self._cache['in_deg'] = self._sparse.get_in_degree()
return self._cache['in_deg']
def _get_out_degree(self):
if 'out_deg' not in self._cache:
self._cache['out_deg'] = self._sparse.get_out_degree()
return self._cache['out_deg']
def in_degree(self, v):
"""Return the in degree of the node.
Parameters
----------
v : int
The node.
Returns
-------
int
The in degree.
"""
deg = self._get_in_degree()
return deg[v]
def in_degrees(self, v):
"""Return the in degrees of the nodes.
Parameters
----------
v : utils.Index
The nodes.
Returns
-------
int
The in degree array.
"""
v_array = v.tousertensor()
deg = self._get_in_degree()
return utils.toindex(F.gather_row(deg, v_array))
def out_degree(self, v):
"""Return the out degree of the node.
Parameters
----------
v : int
The node.
Returns
-------
int
The out degree.
"""
deg = self._get_out_degree()
return deg[v]
def out_degrees(self, v):
"""Return the out degrees of the nodes.
Parameters
----------
v : utils.Index
The nodes.
Returns
-------
int
The out degree array.
"""
v_array = v.tousertensor()
deg = self._get_out_degree()
return utils.toindex(F.gather_row(deg, v_array))
def node_subgraph(self, v):
"""Return the induced node subgraph.
Parameters
----------
v : utils.Index
The nodes.
Returns
-------
ImmutableSubgraphIndex
The subgraph index.
"""
v = v.tousertensor()
gi, induced_n, induced_e = self._sparse.node_subgraph(v)
induced_nodes = utils.toindex(induced_n)
induced_edges = utils.toindex(induced_e)
return ImmutableSubgraphIndex(gi, self, induced_nodes, induced_edges)
def node_subgraphs(self, vs_arr):
"""Return the induced node subgraphs.
Parameters
----------
vs_arr : a vector of utils.Index
The nodes.
Returns
-------
a vector of ImmutableSubgraphIndex
The subgraph index.
"""
vs_arr = [v.tousertensor() for v in vs_arr]
gis, induced_nodes, induced_edges = self._sparse.node_subgraphs(vs_arr)
induced_nodes = [utils.toindex(v) for v in induced_nodes]
induced_edges = [utils.toindex(e) for e in induced_edges]
return [ImmutableSubgraphIndex(gi, self, induced_n,
induced_e) for gi, induced_n, induced_e in zip(gis, induced_nodes, induced_edges)]
def adjacency_matrix(self, transpose=False):
"""Return the adjacency matrix representation of this graph.
By default, a row of returned adjacency matrix represents the destination
of an edge and the column represents the source.
When transpose is True, a row represents the source and a column represents
a destination.
Parameters
----------
transpose : bool
A flag to tranpose the returned adjacency matrix.
Returns
-------
utils.CtxCachedObject
An object that returns tensor given context.
"""
def get_adj(ctx):
new_mat = self._sparse.adjacency_matrix(transpose)
return F.to_context(new_mat, ctx)
if not transpose and 'in_adj' in self._cache:
return self._cache['in_adj']
elif transpose and 'out_adj' in self._cache:
return self._cache['out_adj']
else:
return utils.CtxCachedObject(lambda ctx: get_adj(ctx))
def incidence_matrix(self, oriented=False):
"""Return the incidence matrix representation of this graph.
Parameters
----------
oriented : bool, optional (default=False)
Whether the returned incidence matrix is oriented.
Returns
-------
utils.CtxCachedObject
An object that returns tensor given context.
"""
raise Exception('immutable graph doesn\'t support incidence_matrix for now.')
def to_networkx(self):
"""Convert to networkx graph.
The edge id will be saved as the 'id' edge attribute.
Returns
-------
networkx.DiGraph
The nx graph
"""
src, dst, eid = self.edges()
ret = nx.DiGraph()
for u, v, id in zip(src, dst, eid):
ret.add_edge(u, v, id=id)
return ret
def from_networkx(self, nx_graph):
"""Convert from networkx graph.
If 'id' edge attribute exists, the edge will be added follows
the edge id order. Otherwise, order is undefined.
Parameters
----------
nx_graph : networkx.DiGraph
The nx graph
"""
assert isinstance(nx_graph, nx.DiGraph), "The input graph has to be a NetworkX DiGraph."
# We store edge Ids as an edge attribute.
out_mat = nx.convert_matrix.to_scipy_sparse_matrix(nx_graph, format='coo')
self._sparse.from_coo_matrix(out_mat)
def from_scipy_sparse_matrix(self, adj):
"""Convert from scipy sparse matrix.
Parameters
----------
adj : scipy sparse matrix
"""
assert isinstance(adj, sp.csr_matrix) or isinstance(adj, sp.coo_matrix), \
"The input matrix has to be a SciPy sparse matrix."
out_mat = adj.tocoo()
self._sparse.from_coo_matrix(out_mat)
def line_graph(self, backtracking=True):
"""Return the line graph of this graph.
Parameters
----------
backtracking : bool, optional (default=False)
Whether (i, j) ~ (j, i) in L(G).
(i, j) ~ (j, i) is the behavior of networkx.line_graph.
Returns
-------
ImmutableGraphIndex
The line graph of this graph.
"""
raise Exception('immutable graph doesn\'t support line_graph')
class ImmutableSubgraphIndex(ImmutableGraphIndex):
def __init__(self, backend_sparse, parent, induced_nodes, induced_edges):
super(ImmutableSubgraphIndex, self).__init__(backend_sparse)
self._parent = parent
self._induced_nodes = induced_nodes
self._induced_edges = induced_edges
@property
def induced_edges(self):
return self._induced_edges
@property
def induced_nodes(self):
return self._induced_nodes
def create_immutable_graph_index(graph_data=None):
"""Create a graph index object.
Parameters
----------
graph_data : graph data, optional
Data to initialize graph. Same as networkx's semantics.
"""
if isinstance(graph_data, ImmutableGraphIndex):
return graph_data
assert F.create_immutable_graph_index is not None, \
"The selected backend doesn't support read-only graph!"
gi = ImmutableGraphIndex(F.create_immutable_graph_index())
if graph_data is None:
return gi
# scipy format
if isinstance(graph_data, sp.spmatrix):
try:
gi.from_scipy_sparse_matrix(graph_data)
return gi
except:
raise Exception('Graph data is not a valid scipy sparse matrix.')
# networkx - any format
try:
gi.from_networkx(graph_data)
except:
raise Exception('Error while creating graph from input of type "%s".'
% type(graph_data))
return gi
_init_api("dgl.immutable_graph_index")
......@@ -7,6 +7,7 @@ from . import backend as F
from .frame import Frame, FrameRef
from .graph import DGLGraph
from . import utils
from .graph_index import map_to_subgraph_nid
class DGLSubGraph(DGLGraph):
"""The subgraph class.
......@@ -42,9 +43,11 @@ class DGLSubGraph(DGLGraph):
The graph index.
shared : bool, optional
Whether the subgraph shares node/edge features with the parent graph.
readonly : bool, optional
Whether the graph structure is read-only (default: False).
"""
def __init__(self, parent, parent_nid, parent_eid, graph_idx, shared=False):
super(DGLSubGraph, self).__init__(graph_data=graph_idx)
def __init__(self, parent, parent_nid, parent_eid, graph_idx, shared=False, readonly=False):
super(DGLSubGraph, self).__init__(graph_data=graph_idx, readonly=readonly)
self._parent = parent
self._parent_nid = parent_nid
self._parent_eid = parent_eid
......@@ -114,3 +117,6 @@ class DGLSubGraph(DGLGraph):
if self._parent._edge_frame.num_rows != 0:
self._edge_frame = FrameRef(Frame(
self._parent._edge_frame[self._parent_eid]))
def map_to_subgraph_nid(self, parent_vids):
return map_to_subgraph_nid(self._graph, parent_vids)
......@@ -140,6 +140,20 @@ TVM_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphHasVertices")
*rv = gptr->HasVertices(vids);
});
TVM_REGISTER_GLOBAL("graph_index._CAPI_DGLMapSubgraphNID")
.set_body([] (TVMArgs args, TVMRetValue* rv) {
const IdArray parent_vids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[0]));
const IdArray query = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1]));
*rv = GraphOp::MapParentIdToSubgraphId(parent_vids, query);
});
TVM_REGISTER_GLOBAL("immutable_graph_index._CAPI_DGLExpandIds")
.set_body([] (TVMArgs args, TVMRetValue* rv) {
const IdArray ids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[0]));
const IdArray offsets = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1]));
*rv = GraphOp::ExpandIds(ids, offsets);
});
TVM_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphHasEdgeBetween")
.set_body([] (TVMArgs args, TVMRetValue* rv) {
GraphHandle ghandle = args[0];
......
......@@ -7,6 +7,12 @@
#include <algorithm>
namespace dgl {
namespace {
inline bool IsValidIdArray(const IdArray& arr) {
return arr->ctx.device_type == kDLCPU && arr->ndim == 1
&& arr->dtype.code == kDLInt && arr->dtype.bits == 64;
}
} // namespace
Graph GraphOp::LineGraph(const Graph* g, bool backtracking) {
typedef std::pair<dgl_id_t, dgl_id_t> entry;
......@@ -124,4 +130,56 @@ std::vector<Graph> GraphOp::DisjointPartitionBySizes(const Graph* graph, IdArray
return rst;
}
IdArray GraphOp::MapParentIdToSubgraphId(IdArray parent_vids, IdArray query) {
CHECK(IsValidIdArray(parent_vids)) << "Invalid parent id array.";
CHECK(IsValidIdArray(query)) << "Invalid query id array.";
const auto parent_len = parent_vids->shape[0];
const auto query_len = query->shape[0];
const dgl_id_t* parent_data = static_cast<dgl_id_t*>(parent_vids->data);
const dgl_id_t* query_data = static_cast<dgl_id_t*>(query->data);
IdArray rst = IdArray::Empty({query_len}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
dgl_id_t* rst_data = static_cast<dgl_id_t*>(rst->data);
const bool is_sorted = std::is_sorted(parent_data, parent_data + parent_len);
if (is_sorted) {
for (int64_t i = 0; i < query_len; i++) {
const dgl_id_t id = query_data[i];
const auto it = std::find(parent_data, parent_data + parent_len, id);
CHECK(it != parent_data + parent_len) << id << " doesn't exist in the parent Ids";
rst_data[i] = it - parent_data;
}
} else {
std::unordered_map<dgl_id_t, dgl_id_t> parent_map;
for (int64_t i = 0; i < parent_len; i++) {
const dgl_id_t id = parent_data[i];
parent_map[id] = i;
}
for (int64_t i = 0; i < query_len; i++) {
const dgl_id_t id = query_data[i];
auto it = parent_map.find(id);
CHECK(it != parent_map.end()) << id << " doesn't exist in the parent Ids";
rst_data[i] = it->second;
}
}
return rst;
}
IdArray GraphOp::ExpandIds(IdArray ids, IdArray offset) {
const auto id_len = ids->shape[0];
const auto off_len = offset->shape[0];
CHECK_EQ(id_len + 1, off_len);
const dgl_id_t *id_data = static_cast<dgl_id_t*>(ids->data);
const dgl_id_t *off_data = static_cast<dgl_id_t*>(offset->data);
const int64_t len = off_data[off_len - 1];
IdArray rst = IdArray::Empty({len}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
dgl_id_t *rst_data = static_cast<dgl_id_t*>(rst->data);
for (int64_t i = 0; i < id_len; i++) {
const int64_t local_len = off_data[i + 1] - off_data[i];
for (int64_t j = 0; j < local_len; j++) {
rst_data[off_data[i] + j] = id_data[i];
}
}
return rst;
}
} // namespace dgl
import os
os.environ['DGLBACKEND'] = 'mxnet'
import mxnet as mx
import numpy as np
import scipy as sp
from dgl.graph import GraphIndex, create_graph_index
from dgl.graph_index import map_to_subgraph_nid
import dgl.backend as F
from dgl import utils
def generate_graph():
g = create_graph_index()
g.add_nodes(10) # 10 nodes.
# create a graph where 0 is the source and 9 is the sink
for i in range(1, 9):
g.add_edge(0, i)
g.add_edge(i, 9)
# add a back flow from 9 to 0
g.add_edge(9, 0)
ig = create_graph_index(g.to_networkx(), readonly=True)
return g, ig
def generate_rand_graph(n):
arr = (sp.sparse.random(n, n, density=0.1, format='coo') != 0).astype(np.int64)
g = create_graph_index(arr)
ig = create_graph_index(arr, readonly=True)
return g, ig
def check_graph_equal(g1, g2):
ctx = F.get_context(mx.nd.array([1]))
adj1 = g1.adjacency_matrix().get(ctx) != 0
adj2 = g2.adjacency_matrix().get(ctx) != 0
assert mx.nd.sum(adj1 - adj2).asnumpy() == 0
def test_graph_gen():
g, ig = generate_rand_graph(10)
check_graph_equal(g, ig)
def check_basics(g, ig):
assert g.number_of_nodes() == ig.number_of_nodes()
assert g.number_of_edges() == ig.number_of_edges()
edges = g.edges()
iedges = ig.edges()
for i in range(g.number_of_nodes()):
assert g.has_node(i) == ig.has_node(i)
for i in range(g.number_of_nodes()):
assert mx.nd.sum(g.predecessors(i).tousertensor()).asnumpy() == mx.nd.sum(ig.predecessors(i).tousertensor()).asnumpy()
assert mx.nd.sum(g.successors(i).tousertensor()).asnumpy() == mx.nd.sum(ig.successors(i).tousertensor()).asnumpy()
randv = np.random.randint(0, g.number_of_nodes(), 10)
randv = utils.toindex(randv)
in_src1, in_dst1, in_eids1 = g.in_edges(randv)
in_src2, in_dst2, in_eids2 = ig.in_edges(randv)
nnz = in_src2.tousertensor().shape[0]
assert mx.nd.sum(in_src1.tousertensor() == in_src2.tousertensor()).asnumpy() == nnz
assert mx.nd.sum(in_dst1.tousertensor() == in_dst2.tousertensor()).asnumpy() == nnz
assert mx.nd.sum(in_eids1.tousertensor() == in_eids2.tousertensor()).asnumpy() == nnz
out_src1, out_dst1, out_eids1 = g.out_edges(randv)
out_src2, out_dst2, out_eids2 = ig.out_edges(randv)
nnz = out_dst2.tousertensor().shape[0]
assert mx.nd.sum(out_dst1.tousertensor() == out_dst2.tousertensor()).asnumpy() == nnz
assert mx.nd.sum(out_src1.tousertensor() == out_src2.tousertensor()).asnumpy() == nnz
assert mx.nd.sum(out_eids1.tousertensor() == out_eids2.tousertensor()).asnumpy() == nnz
num_v = len(randv)
assert mx.nd.sum(g.in_degrees(randv).tousertensor() == ig.in_degrees(randv).tousertensor()).asnumpy() == num_v
assert mx.nd.sum(g.out_degrees(randv).tousertensor() == ig.out_degrees(randv).tousertensor()).asnumpy() == num_v
randv = randv.tousertensor()
for v in randv.asnumpy():
assert g.in_degree(v) == ig.in_degree(v)
assert g.out_degree(v) == ig.out_degree(v)
for u in randv.asnumpy():
for v in randv.asnumpy():
if len(g.edge_id(u, v).tolist()) == 1:
assert g.edge_id(u, v).tolist() == ig.edge_id(u, v).tolist()
assert g.has_edge_between(u, v) == ig.has_edge_between(u, v)
randv = utils.toindex(randv)
ids = g.edge_ids(randv, randv)[2].tolist()
assert sum(ig.edge_ids(randv, randv).tolist() == ids) == len(ids)
assert sum(g.has_edges_between(randv, randv).tolist() == ig.has_edges_between(randv, randv).tolist()) == len(randv)
def test_basics():
g, ig = generate_rand_graph(100)
check_basics(g, ig)
def test_node_subgraph():
num_vertices = 100
g, ig = generate_rand_graph(num_vertices)
# node_subgraph
randv1 = np.random.randint(0, num_vertices, 20)
randv = np.unique(randv1)
subg = g.node_subgraph(utils.toindex(randv))
subig = ig.node_subgraph(utils.toindex(randv))
check_graph_equal(subg, subig)
assert mx.nd.sum(map_to_subgraph_nid(subg, utils.toindex(randv1[0:10])).tousertensor()
== map_to_subgraph_nid(subig, utils.toindex(randv1[0:10])).tousertensor()) == 10
# node_subgraphs
randvs = []
subgs = []
for i in range(4):
randv = np.unique(np.random.randint(0, num_vertices, 20))
randvs.append(utils.toindex(randv))
subgs.append(g.node_subgraph(utils.toindex(randv)))
subigs= ig.node_subgraphs(randvs)
for i in range(4):
check_graph_equal(subgs[i], subigs[i])
if __name__ == '__main__':
test_basics()
test_graph_gen()
test_node_subgraph()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment