Unverified Commit 01a4cc5b authored by Minjie Wang's avatar Minjie Wang Committed by GitHub
Browse files

[Graph] Add API to convert graph to simple graph (#587)

* to simple

* WIP: multigraph flag

* graph index refactor; pass basic testing

* graph index refactor; pass basic testing

* fix bug in to_simple; pass torch test

* fix mx utest

* fix example

* fix lint

* fix ci

* poke ci

* poke ci

* WIP

* poke ci

* poke ci

* poke ci

* change ci workspace

* poke ci

* poke ci

* poke ci

* poke ci

* delete ci

* use enum for multigraph flag
parent 372203f0
......@@ -12,10 +12,9 @@ class GraphData:
num_nodes = csr.shape[0]
num_edges = mx.nd.contrib.getnnz(csr).asnumpy()[0]
edge_ids = np.arange(0, num_edges, step=1, dtype=np.int64)
self.graph = dgl.graph_index.GraphIndex(multigraph=False, readonly=True)
self.graph.from_csr_matrix(dgl.utils.toindex(csr.indptr),
dgl.utils.toindex(csr.indices), "in",
dgl.contrib.graph_store._get_graph_path(graph_name))
self.graph = dgl.graph_index.from_csr_matrix(
dgl.utils.toindex(csr.indptr), dgl.utils.toindex(csr.indices), False,
"in", dgl.contrib.graph_store._get_graph_path(graph_name))
self.features = mx.nd.random.normal(shape=(csr.shape[0], num_feats))
self.num_labels = 10
self.labels = mx.nd.floor(mx.nd.random.uniform(low=0, high=self.num_labels,
......
......@@ -137,6 +137,13 @@ class GraphOp {
* \return a expanded Id array.
*/
static IdArray ExpandIds(IdArray ids, IdArray offset);
/*!
* \brief Convert the graph to a simple graph.
* \param graph The input graph.
* \return a new immutable simple graph with no multi-edge.
*/
static ImmutableGraph ToSimpleGraph(const GraphInterface* graph);
};
} // namespace dgl
......
......@@ -34,9 +34,17 @@ class CSR : public GraphInterface {
// that has the given number of verts and edges.
CSR(const std::string &shared_mem_name,
int64_t num_vertices, int64_t num_edges, bool is_multigraph);
// Create a csr graph that shares the given indptr and indices.
CSR(IdArray indptr, IdArray indices, IdArray edge_ids);
CSR(IdArray indptr, IdArray indices, IdArray edge_ids, bool is_multigraph);
// Create a csr graph by data iterator
template <typename IndptrIter, typename IndicesIter, typename EdgeIdIter>
CSR(int64_t num_vertices, int64_t num_edges,
IndptrIter indptr_begin, IndicesIter indices_begin, EdgeIdIter edge_ids_begin,
bool is_multigraph);
// Create a csr graph whose memory is stored in the shared memory
// and the structure is given by the indptr and indcies.
CSR(IdArray indptr, IdArray indices, IdArray edge_ids,
......@@ -892,6 +900,26 @@ class ImmutableGraph: public GraphInterface {
COOPtr coo_;
};
// inline implementations
template <typename IndptrIter, typename IndicesIter, typename EdgeIdIter>
CSR::CSR(int64_t num_vertices, int64_t num_edges,
IndptrIter indptr_begin, IndicesIter indices_begin, EdgeIdIter edge_ids_begin,
bool is_multigraph): is_multigraph_(is_multigraph) {
indptr_ = NewIdArray(num_vertices + 1);
indices_ = NewIdArray(num_edges);
edge_ids_ = NewIdArray(num_edges);
dgl_id_t* indptr_data = static_cast<dgl_id_t*>(indptr_->data);
dgl_id_t* indices_data = static_cast<dgl_id_t*>(indices_->data);
dgl_id_t* edge_ids_data = static_cast<dgl_id_t*>(edge_ids_->data);
for (int64_t i = 0; i < num_vertices + 1; ++i)
*(indptr_data++) = *(indptr_begin++);
for (int64_t i = 0; i < num_edges; ++i) {
*(indices_data++) = *(indices_begin++);
*(edge_ids_data++) = *(edge_ids_begin++);
}
}
} // namespace dgl
#endif // DGL_IMMUTABLE_GRAPH_H_
......@@ -13,7 +13,7 @@ from ..base import ALL, is_all, DGLError, dgl_warning
from .. import backend as F
from ..graph import DGLGraph
from .. import utils
from ..graph_index import GraphIndex, create_graph_index
from ..graph_index import GraphIndex, create_graph_index, from_csr, from_shared_mem_csr_matrix
from .._ffi.ndarray import empty_shared_mem
from .._ffi.function import _init_api
from .. import ndarray as nd
......@@ -309,10 +309,9 @@ class SharedMemoryStoreServer(object):
if isinstance(graph_data, GraphIndex):
graph_idx = graph_data
else:
graph_idx = GraphIndex(multigraph=multigraph, readonly=True)
indptr, indices = _to_csr(graph_data, edge_dir, multigraph)
graph_idx.from_csr_matrix(utils.toindex(indptr), utils.toindex(indices),
edge_dir, _get_graph_path(graph_name))
graph_idx = from_csr(utils.toindex(indptr), utils.toindex(indices),
multigraph, edge_dir, _get_graph_path(graph_name))
self._graph = DGLGraph(graph_idx, multigraph=multigraph, readonly=True)
self._num_workers = num_workers
......@@ -541,8 +540,8 @@ class SharedMemoryDGLGraph(BaseGraphStore):
num_nodes, num_edges, multigraph, edge_dir = self.proxy.get_graph_info(graph_name)
num_nodes, num_edges = int(num_nodes), int(num_edges)
graph_idx = GraphIndex(multigraph=multigraph, readonly=True)
graph_idx.from_shared_mem_csr_matrix(_get_graph_path(graph_name), num_nodes, num_edges, edge_dir)
graph_idx = from_shared_mem_csr_matrix(_get_graph_path(graph_name),
num_nodes, num_edges, edge_dir, multigraph)
super(SharedMemoryDGLGraph, self).__init__(graph_idx, multigraph=multigraph)
self._init_manager = InitializerManager()
......
......@@ -9,7 +9,7 @@ from .base import ALL, is_all, DGLError
from . import backend as F
from . import init
from .frame import FrameRef, Frame, Scheme
from .graph_index import create_graph_index
from . import graph_index
from .runtime import ir, scheduler, Runtime
from . import utils
from .view import NodeView, EdgeView
......@@ -765,7 +765,8 @@ class DGLGraph(DGLBaseGraph):
edge_frame : FrameRef, optional
Edge feature storage.
multigraph : bool, optional
Whether the graph would be a multigraph (default: False)
Whether the graph would be a multigraph. If none, the flag will be determined
by scanning the whole graph. (default: None)
readonly : bool, optional
Whether the graph structure is read-only (default: False).
......@@ -894,10 +895,11 @@ class DGLGraph(DGLBaseGraph):
graph_data=None,
node_frame=None,
edge_frame=None,
multigraph=False,
multigraph=None,
readonly=False):
# graph
super(DGLGraph, self).__init__(create_graph_index(graph_data, multigraph, readonly))
gidx = graph_index.create_graph_index(graph_data, multigraph, readonly)
super(DGLGraph, self).__init__(gidx)
# node and edge frame
if node_frame is None:
......@@ -1225,7 +1227,7 @@ class DGLGraph(DGLBaseGraph):
nx_graph = nx_graph.to_directed()
self.clear()
self._graph.from_networkx(nx_graph)
self._graph = graph_index.from_networkx(nx_graph, self.is_readonly)
self._node_frame.add_rows(self.number_of_nodes())
self._edge_frame.add_rows(self.number_of_edges())
self._msg_frame.add_rows(self.number_of_edges())
......@@ -1291,7 +1293,7 @@ class DGLGraph(DGLBaseGraph):
>>> g.from_scipy_sparse_matrix(a)
"""
self.clear()
self._graph.from_scipy_sparse_matrix(spmat)
self._graph = graph_index.from_scipy_sparse_matrix(spmat, self.is_readonly)
self._node_frame.add_rows(self.number_of_nodes())
self._edge_frame.add_rows(self.number_of_edges())
self._msg_frame.add_rows(self.number_of_edges())
......
......@@ -14,6 +14,12 @@ from . import utils
GraphIndexHandle = ctypes.c_void_p
class BoolFlag(object):
"""Bool flag with unknown value"""
BOOL_UNKNOWN = -1
BOOL_FALSE = 0
BOOL_TRUE = 1
class GraphIndex(object):
"""Graph index object.
......@@ -22,10 +28,10 @@ class GraphIndex(object):
handle : GraphIndexHandle
Handler
"""
def __init__(self, handle=None, multigraph=None, readonly=None):
def __init__(self, handle):
self._handle = handle
self._multigraph = multigraph
self._readonly = readonly
self._multigraph = None # python-side cache of the flag
self._readonly = None # python-side cache of the flag
self._cache = {}
def __del__(self):
......@@ -36,6 +42,7 @@ class GraphIndex(object):
def __getstate__(self):
src, dst, _ = self.edges()
n_nodes = self.number_of_nodes()
# TODO(minjie): should try to avoid calling is_multigraph
multigraph = self.is_multigraph()
readonly = self.is_readonly()
......@@ -45,28 +52,24 @@ class GraphIndex(object):
"""The pickle state of GraphIndex is defined as a triplet
(number_of_nodes, multigraph, readonly, src_nodes, dst_nodes)
"""
n_nodes, multigraph, readonly, src, dst = state
num_nodes, multigraph, readonly, src, dst = state
self._cache = {}
self._multigraph = multigraph
self._readonly = readonly
if readonly:
self._init(src, dst, n_nodes)
else:
self._handle = _CAPI_DGLGraphCreateMutable(multigraph)
self.clear()
self.add_nodes(n_nodes)
self.add_edges(src, dst)
def _init(self, src_ids, dst_ids, num_nodes):
"""The actual init function"""
assert len(src_ids) == len(dst_ids)
if multigraph is None:
multigraph = BoolFlag.BOOL_UNKNOWN
self._handle = _CAPI_DGLGraphCreate(
src_ids.todgltensor(),
dst_ids.todgltensor(),
self._multigraph,
src.todgltensor(),
dst.todgltensor(),
int(multigraph),
int(num_nodes),
self._readonly)
readonly)
@property
def handle(self):
"""Get the CAPI handle."""
return self._handle
def add_nodes(self, num):
"""Add nodes.
......@@ -741,141 +744,6 @@ class GraphIndex(object):
ret.add_edge(u, v, id=e)
return ret
def from_networkx(self, nx_graph):
"""Convert from networkx graph.
If 'id' edge attribute exists, the edge will be added follows
the edge id order. Otherwise, order is undefined.
Parameters
----------
nx_graph : networkx.DiGraph
The nx graph
"""
if not isinstance(nx_graph, nx.Graph):
nx_graph = (nx.MultiDiGraph(nx_graph) if self.is_multigraph()
else nx.DiGraph(nx_graph))
else:
if not nx_graph.is_directed():
# to_directed creates a deep copy of the networkx graph even if
# the original graph is already directed and we do not want to do it.
nx_graph = nx_graph.to_directed()
num_nodes = nx_graph.number_of_nodes()
if not self.is_readonly():
self.clear()
self.add_nodes(num_nodes)
if nx_graph.number_of_edges() == 0:
if self.is_readonly():
raise Exception("can't create an empty immutable graph")
return
# nx_graph.edges(data=True) returns src, dst, attr_dict
has_edge_id = 'id' in next(iter(nx_graph.edges(data=True)))[-1]
if has_edge_id:
num_edges = nx_graph.number_of_edges()
src = np.zeros((num_edges,), dtype=np.int64)
dst = np.zeros((num_edges,), dtype=np.int64)
for u, v, attr in nx_graph.edges(data=True):
eid = attr['id']
src[eid] = u
dst[eid] = v
else:
src = []
dst = []
for e in nx_graph.edges:
src.append(e[0])
dst.append(e[1])
num_nodes = nx_graph.number_of_nodes()
# We store edge Ids as an edge attribute.
src = utils.toindex(src)
dst = utils.toindex(dst)
self._init(src, dst, num_nodes)
def from_scipy_sparse_matrix(self, adj):
"""Convert from scipy sparse matrix.
Parameters
----------
adj : scipy sparse matrix
"""
if not self.is_readonly():
self.clear()
num_nodes = max(adj.shape[0], adj.shape[1])
adj_coo = adj.tocoo()
src = utils.toindex(adj_coo.row)
dst = utils.toindex(adj_coo.col)
self._init(src, dst, num_nodes)
def from_csr_matrix(self, indptr, indices, edge_dir, shared_mem_name=""):
"""Load a graph from the CSR matrix.
Parameters
----------
indptr : utils.Index
index pointer in the CSR format
indices : utils.Index
column index array in the CSR format
edge_dir : string
the edge direction. The supported option is "in" and "out".
shared_mem_name : string
the name of shared memory
"""
assert self.is_readonly()
self._handle = _CAPI_DGLGraphCSRCreate(
indptr.todgltensor(),
indices.todgltensor(),
shared_mem_name,
self._multigraph,
edge_dir)
def from_shared_mem_csr_matrix(self, shared_mem_name,
num_nodes, num_edges, edge_dir):
"""Load a graph from the shared memory in the CSR format.
Parameters
----------
shared_mem_name : string
the name of shared memory
num_nodes : int
the number of nodes
num_edges : int
the number of edges
edge_dir : string
the edge direction. The supported option is "in" and "out".
"""
assert self.is_readonly()
self._handle = _CAPI_DGLGraphCSRCreateMMap(
shared_mem_name,
int(num_nodes), int(num_edges),
self._multigraph,
edge_dir)
def from_edge_list(self, elist):
"""Convert from an edge list.
Parameters
---------
elist : list
List of (u, v) edge tuple.
"""
if not self.is_readonly():
self.clear()
src, dst = zip(*elist)
src = np.array(src)
dst = np.array(dst)
src_ids = utils.toindex(src)
dst_ids = utils.toindex(dst)
num_nodes = max(src.max(), dst.max()) + 1
min_nodes = min(src.min(), dst.min())
if min_nodes != 0:
raise DGLError('Invalid edge list. Nodes must start from 0.')
self._init(src_ids, dst_ids, num_nodes)
def line_graph(self, backtracking=True):
"""Return the line graph of this graph.
......@@ -908,7 +776,7 @@ class SubgraphIndex(GraphIndex):
The parent edge ids in this subgraph.
"""
def __init__(self, handle, parent, induced_nodes, induced_edges):
super(SubgraphIndex, self).__init__(handle, parent.is_multigraph(), parent.is_readonly())
super(SubgraphIndex, self).__init__(handle)
self._parent = parent
self._induced_nodes = induced_nodes
self._induced_edges = induced_edges
......@@ -955,6 +823,196 @@ class SubgraphIndex(GraphIndex):
raise NotImplementedError(
"SubgraphIndex unpickling is not supported yet.")
###############################################################
# Conversion functions
###############################################################
def from_coo(num_nodes, src, dst, is_multigraph, readonly):
"""Convert from coo arrays.
Parameters
----------
num_nodes : int
Number of nodes.
src : Tensor
Src end nodes of the edges.
dst : Tensor
Dst end nodes of the edges.
is_multigraph : bool or None
True if the graph is a multigraph. None means determined by data.
readonly : bool
True if the returned graph is readonly.
Returns
-------
GraphIndex
The graph index.
"""
src = utils.toindex(src)
dst = utils.toindex(dst)
if is_multigraph is None:
is_multigraph = BoolFlag.BOOL_UNKNOWN
if readonly:
handle = _CAPI_DGLGraphCreate(
src.todgltensor(),
dst.todgltensor(),
int(is_multigraph),
int(num_nodes),
readonly)
gidx = GraphIndex(handle)
else:
if is_multigraph is BoolFlag.BOOL_UNKNOWN:
# TODO(minjie): better behavior in the future
is_multigraph = BoolFlag.BOOL_FALSE
handle = _CAPI_DGLGraphCreateMutable(bool(is_multigraph))
gidx = GraphIndex(handle)
gidx.add_nodes(num_nodes)
gidx.add_edges(src, dst)
return gidx
def from_csr(indptr, indices, is_multigraph,
direction, shared_mem_name=""):
"""Load a graph from CSR arrays.
Parameters
----------
indptr : Tensor
index pointer in the CSR format
indices : Tensor
column index array in the CSR format
is_multigraph : bool or None
True if the graph is a multigraph. None means determined by data.
direction : str
the edge direction. Either "in" or "out".
shared_mem_name : str
the name of shared memory
"""
indptr = utils.toindex(indptr)
indices = utils.toindex(indices)
if is_multigraph is None:
is_multigraph = BoolFlag.BOOL_UNKNOWN
handle = _CAPI_DGLGraphCSRCreate(
indptr.todgltensor(),
indices.todgltensor(),
shared_mem_name,
int(is_multigraph),
direction)
return GraphIndex(handle)
def from_shared_mem_csr_matrix(shared_mem_name,
num_nodes, num_edges, edge_dir,
is_multigraph):
"""Load a graph from the shared memory in the CSR format.
Parameters
----------
shared_mem_name : string
the name of shared memory
num_nodes : int
the number of nodes
num_edges : int
the number of edges
edge_dir : string
the edge direction. The supported option is "in" and "out".
"""
handle = _CAPI_DGLGraphCSRCreateMMap(
shared_mem_name,
int(num_nodes), int(num_edges),
is_multigraph,
edge_dir)
return GraphIndex(handle)
def from_networkx(nx_graph, readonly):
"""Convert from networkx graph.
If 'id' edge attribute exists, the edge will be added follows
the edge id order. Otherwise, order is undefined.
Parameters
----------
nx_graph : networkx.DiGraph
The nx graph or any graph that can be converted to nx.DiGraph
readonly : bool
True if the returned graph is readonly.
Returns
-------
GraphIndex
The graph index.
"""
if not isinstance(nx_graph, nx.Graph):
nx_graph = nx.DiGraph(nx_graph)
else:
if not nx_graph.is_directed():
# to_directed creates a deep copy of the networkx graph even if
# the original graph is already directed and we do not want to do it.
nx_graph = nx_graph.to_directed()
is_multigraph = isinstance(nx_graph, nx.MultiDiGraph)
num_nodes = nx_graph.number_of_nodes()
# nx_graph.edges(data=True) returns src, dst, attr_dict
has_edge_id = 'id' in next(iter(nx_graph.edges(data=True)))[-1]
if has_edge_id:
num_edges = nx_graph.number_of_edges()
src = np.zeros((num_edges,), dtype=np.int64)
dst = np.zeros((num_edges,), dtype=np.int64)
for u, v, attr in nx_graph.edges(data=True):
eid = attr['id']
src[eid] = u
dst[eid] = v
else:
src = []
dst = []
for e in nx_graph.edges:
src.append(e[0])
dst.append(e[1])
num_nodes = nx_graph.number_of_nodes()
# We store edge Ids as an edge attribute.
src = utils.toindex(src)
dst = utils.toindex(dst)
return from_coo(num_nodes, src, dst, is_multigraph, readonly)
def from_scipy_sparse_matrix(adj, readonly):
"""Convert from scipy sparse matrix.
Parameters
----------
adj : scipy sparse matrix
readonly : bool
True if the returned graph is readonly.
Returns
-------
GraphIndex
The graph index.
"""
if adj.getformat() != 'csr' or not readonly:
num_nodes = max(adj.shape[0], adj.shape[1])
adj_coo = adj.tocoo()
return from_coo(num_nodes, adj_coo.row, adj_coo.col, False, readonly)
else:
return from_csr(adj.indptr, adj.indices, False, "out")
def from_edge_list(elist, is_multigraph, readonly):
"""Convert from an edge list.
Parameters
---------
elist : list
List of (u, v) edge tuple.
"""
src, dst = zip(*elist)
src = np.array(src)
dst = np.array(dst)
src_ids = utils.toindex(src)
dst_ids = utils.toindex(dst)
num_nodes = max(src.max(), dst.max()) + 1
min_nodes = min(src.min(), dst.min())
if min_nodes != 0:
raise DGLError('Invalid edge list. Nodes must start from 0.')
return from_coo(num_nodes, src_ids, dst_ids, is_multigraph, readonly)
def map_to_subgraph_nid(subgraph, parent_nids):
"""Map parent node Ids to the subgraph node Ids.
......@@ -1050,56 +1108,43 @@ def disjoint_partition(graph, num_or_size_splits):
graphs.append(GraphIndex(handle))
return graphs
def create_graph_index(graph_data=None, multigraph=False, readonly=False):
def create_graph_index(graph_data, multigraph, readonly):
"""Create a graph index object.
Parameters
----------
graph_data : graph data, optional
graph_data : graph data
Data to initialize graph. Same as networkx's semantics.
multigraph : bool, optional
Whether the graph is multigraph (default is False)
multigraph : bool
Whether the graph would be a multigraph. If none, the flag will be determined
by the data.
readonly : bool
Whether the graph structure is read-only.
"""
if isinstance(graph_data, GraphIndex):
# FIXME(minjie): this return is not correct for mutable graph index
return graph_data
if graph_data is None:
if readonly:
# FIXME(zhengda): we should construct a C graph index before constructing GraphIndex.
gidx = GraphIndex(None, multigraph, readonly)
else:
handle = _CAPI_DGLGraphCreateMutable(multigraph)
gidx = GraphIndex(handle, multigraph, readonly)
if graph_data is None and readonly:
raise Exception("can't create an empty immutable graph")
elif graph_data is None:
return gidx
if multigraph is None:
multigraph = False
handle = _CAPI_DGLGraphCreateMutable(multigraph)
return GraphIndex(handle)
elif isinstance(graph_data, (list, tuple)):
# edge list
if isinstance(graph_data, (list, tuple)):
try:
gidx.from_edge_list(graph_data)
return gidx
except Exception: # pylint: disable=broad-except
raise DGLError('Graph data is not a valid edge list.')
return from_edge_list(graph_data, multigraph, readonly)
elif isinstance(graph_data, scipy.sparse.spmatrix):
# scipy format
if isinstance(graph_data, scipy.sparse.spmatrix):
try:
gidx.from_scipy_sparse_matrix(graph_data)
return gidx
except Exception: # pylint: disable=broad-except
raise DGLError('Graph data is not a valid scipy sparse matrix.')
return from_scipy_sparse_matrix(graph_data, readonly)
else:
# networkx - any format
try:
gidx.from_networkx(graph_data)
gidx = from_networkx(graph_data, readonly)
except Exception: # pylint: disable=broad-except
raise DGLError('Error while creating graph from input of type "%s".'
% type(graph_data))
return gidx
_init_api("dgl.graph_index")
"""Module for graph transformation methods."""
from ._ffi.function import _init_api
from .graph import DGLGraph
from .graph_index import GraphIndex
from .batched_graph import BatchedDGLGraph
__all__ = ['line_graph', 'reverse']
__all__ = ['line_graph', 'reverse', 'to_simple_graph']
def line_graph(g, backtracking=True, shared=False):
......@@ -103,3 +105,23 @@ def reverse(g, share_ndata=False, share_edata=False):
if share_edata:
g_reversed._edge_frame = g._edge_frame
return g_reversed
def to_simple_graph(g):
"""Convert the graph to a simple graph with no multi-edge.
The function generates a new *readonly* graph with no node/edge feature.
Parameters
----------
g : DGLGraph
The input graph.
Returns
-------
DGLGraph
A simple graph.
"""
newgidx = GraphIndex(_CAPI_DGLToSimpleGraph(g._graph.handle))
return DGLGraph(newgidx, readonly=True)
_init_api("dgl.transform")
......@@ -20,6 +20,13 @@ typedef void* GraphHandle;
// Communicator handler type
typedef void* CommunicatorHandle;
/*! \brief Enum type for bool value with unknown */
enum BoolFlag {
kBoolUnknown = -1,
kBoolFalse = 0,
kBoolTrue = 1
};
/*!
* \brief Convert a vector of NDArray to PackedFunc.
*/
......
......@@ -127,22 +127,29 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphCreateMutable")
*rv = ghandle;
});
DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphCreate")
.set_body([] (DGLArgs args, DGLRetValue* rv) {
const IdArray src_ids = args[0];
const IdArray dst_ids = args[1];
const bool multigraph = static_cast<bool>(args[2]);
const int64_t num_nodes = static_cast<int64_t>(args[3]);
const bool readonly = static_cast<bool>(args[4]);
const int multigraph = args[2];
const int64_t num_nodes = args[3];
const bool readonly = args[4];
GraphHandle ghandle;
if (readonly) {
// TODO(minjie): The array copy here is unnecessary and adds extra overhead.
// However, with MXNet backend, the memory would be corrupted if we directly
// save the passed-in ndarrays into DGL's graph object. We hope MXNet team
// could help look into this.
if (multigraph == kBoolUnknown) {
COOPtr coo(new COO(num_nodes, Clone(src_ids), Clone(dst_ids)));
ghandle = new ImmutableGraph(coo);
} else {
COOPtr coo(new COO(num_nodes, Clone(src_ids), Clone(dst_ids), multigraph));
ghandle = new ImmutableGraph(coo);
}
} else {
CHECK_NE(multigraph, kBoolUnknown);
ghandle = new Graph(src_ids, dst_ids, num_nodes, multigraph);
}
*rv = ghandle;
......@@ -153,7 +160,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphCSRCreate")
const IdArray indptr = args[0];
const IdArray indices = args[1];
const std::string shared_mem_name = args[2];
const bool multigraph = static_cast<bool>(args[3]);
const int multigraph = args[3];
const std::string edge_dir = args[4];
CSRPtr csr;
......@@ -162,14 +169,23 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphCSRCreate")
int64_t *edge_data = static_cast<int64_t *>(edge_ids->data);
for (size_t i = 0; i < edge_ids->shape[0]; i++)
edge_data[i] = i;
if (shared_mem_name.empty())
if (shared_mem_name.empty()) {
// TODO(minjie): The array copy here is unnecessary and adds extra overhead.
// However, with MXNet backend, the memory would be corrupted if we directly
// save the passed-in ndarrays into DGL's graph object. We hope MXNet team
// could help look into this.
if (multigraph == kBoolUnknown) {
csr.reset(new CSR(Clone(indptr), Clone(indices), Clone(edge_ids)));
} else {
csr.reset(new CSR(Clone(indptr), Clone(indices), Clone(edge_ids), multigraph));
else
}
} else {
if (multigraph == kBoolUnknown) {
csr.reset(new CSR(indptr, indices, edge_ids, shared_mem_name));
} else {
csr.reset(new CSR(indptr, indices, edge_ids, multigraph, shared_mem_name));
}
}
GraphHandle ghandle;
if (edge_dir == "in")
......@@ -507,4 +523,12 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphGetAdj")
*rv = ConvertAdjToPackedFunc(res);
});
DGL_REGISTER_GLOBAL("transform._CAPI_DGLToSimpleGraph")
.set_body([] (DGLArgs args, DGLRetValue* rv) {
GraphHandle ghandle = args[0];
const GraphInterface *ptr = static_cast<const GraphInterface *>(ghandle);
GraphHandle ret = GraphOp::ToSimpleGraph(ptr).Reset();
*rv = ret;
});
} // namespace dgl
......@@ -10,10 +10,34 @@
namespace dgl {
namespace {
inline bool IsValidIdArray(const IdArray& arr) {
return arr->ctx.device_type == kDLCPU && arr->ndim == 1
&& arr->dtype.code == kDLInt && arr->dtype.bits == 64;
}
// generate consecutive dgl ids
class RangeIter : public std::iterator<std::input_iterator_tag, dgl_id_t> {
public:
explicit RangeIter(dgl_id_t from): cur_(from) {}
RangeIter& operator++() {
++cur_;
return *this;
}
RangeIter operator++(int) {
RangeIter retval = *this;
++cur_;
return retval;
}
bool operator==(RangeIter other) const {
return cur_ == other.cur_;
}
bool operator!=(RangeIter other) const {
return cur_ != other.cur_;
}
dgl_id_t operator*() const {
return cur_;
}
private:
dgl_id_t cur_;
};
} // namespace
Graph GraphOp::LineGraph(const Graph* g, bool backtracking) {
......@@ -212,8 +236,8 @@ std::vector<ImmutableGraph> GraphOp::DisjointPartitionBySizes(const ImmutableGra
}
IdArray GraphOp::MapParentIdToSubgraphId(IdArray parent_vids, IdArray query) {
CHECK(dgl::IsValidIdArray(parent_vids)) << "Invalid parent id array.";
CHECK(dgl::IsValidIdArray(query)) << "Invalid query id array.";
CHECK(IsValidIdArray(parent_vids)) << "Invalid parent id array.";
CHECK(IsValidIdArray(query)) << "Invalid query id array.";
const auto parent_len = parent_vids->shape[0];
const auto query_len = query->shape[0];
const dgl_id_t* parent_data = static_cast<dgl_id_t*>(parent_vids->data);
......@@ -273,4 +297,22 @@ IdArray GraphOp::ExpandIds(IdArray ids, IdArray offset) {
return rst;
}
ImmutableGraph GraphOp::ToSimpleGraph(const GraphInterface* graph) {
std::vector<dgl_id_t> indptr(graph->NumVertices() + 1), indices;
indptr[0] = 0;
for (dgl_id_t src = 0; src < graph->NumVertices(); ++src) {
std::unordered_set<dgl_id_t> hashmap;
for (const dgl_id_t dst : graph->SuccVec(src)) {
if (!hashmap.count(dst)) {
indices.push_back(dst);
hashmap.insert(dst);
}
}
indptr[src+1] = indices.size();
}
CSRPtr csr(new CSR(graph->NumVertices(), indices.size(),
indptr.begin(), indices.begin(), RangeIter(0), false));
return ImmutableGraph(csr);
}
} // namespace dgl
......@@ -12,20 +12,20 @@ def generate_from_networkx():
edges = [[2, 3], [2, 5], [3, 0], [1, 0], [4, 3], [4, 5]]
nx_graph = nx.DiGraph()
nx_graph.add_edges_from(edges)
g = create_graph_index(nx_graph)
ig = create_graph_index(nx_graph, readonly=True)
g = create_graph_index(nx_graph, multigraph=False, readonly=False)
ig = create_graph_index(nx_graph, multigraph=False, readonly=True)
return g, ig
def generate_from_edgelist():
edges = [[2, 3], [2, 5], [3, 0], [6, 10], [10, 3], [10, 15]]
g = create_graph_index(edges)
ig = create_graph_index(edges, readonly=True)
g = create_graph_index(edges, multigraph=False, readonly=False)
ig = create_graph_index(edges, multigraph=False, readonly=True)
return g, ig
def generate_rand_graph(n):
arr = (sp.sparse.random(n, n, density=0.1, format='coo') != 0).astype(np.int64)
g = create_graph_index(arr)
ig = create_graph_index(arr, readonly=True)
g = create_graph_index(arr, multigraph=False, readonly=False)
ig = create_graph_index(arr, multigraph=False, readonly=True)
return g, ig
def check_graph_equal(g1, g2):
......@@ -160,8 +160,8 @@ def test_load_csr():
csr = (sp.sparse.random(n, n, density=0.1, format='csr') != 0).astype(np.int64)
# Load CSR normally.
idx = dgl.graph_index.GraphIndex(multigraph=False, readonly=True)
idx.from_csr_matrix(utils.toindex(csr.indptr), utils.toindex(csr.indices), 'out')
idx = dgl.graph_index.from_csr(
utils.toindex(csr.indptr), utils.toindex(csr.indices), False, 'out')
assert idx.number_of_nodes() == n
assert idx.number_of_edges() == csr.nnz
src, dst, eid = idx.edges()
......@@ -173,9 +173,9 @@ def test_load_csr():
# Load CSR to shared memory.
# Shared memory isn't supported in Windows.
if os.name is not 'nt':
idx = dgl.graph_index.GraphIndex(multigraph=False, readonly=True)
idx.from_csr_matrix(utils.toindex(csr.indptr), utils.toindex(csr.indices),
'out', '/test_graph_struct')
idx = dgl.graph_index.from_csr(
utils.toindex(csr.indptr), utils.toindex(csr.indices),
False, 'out', '/test_graph_struct')
assert idx.number_of_nodes() == n
assert idx.number_of_edges() == csr.nnz
src, dst, eid = idx.edges()
......
......@@ -80,7 +80,7 @@ def test_pickling_index():
_assert_is_identical_index(i, i2)
def test_pickling_graph_index():
gi = create_graph_index()
gi = create_graph_index(None, False, False)
gi.add_nodes(3)
src_idx = toindex([0, 0])
dst_idx = toindex([1, 2])
......
......@@ -84,9 +84,20 @@ def test_reverse_shared_frames():
rg.update_all(src_msg, sum_reduce)
assert F.allclose(g.ndata['h'], rg.ndata['h'])
def test_simple_graph():
elist = [(0, 1), (0, 2), (1, 2), (0, 1)]
g = dgl.DGLGraph(elist, readonly=True)
assert g.is_multigraph
sg = dgl.to_simple_graph(g)
assert not sg.is_multigraph
assert sg.number_of_edges() == 3
src, dst = sg.edges()
eset = set(zip(list(F.asnumpy(src)), list(F.asnumpy(dst))))
assert eset == set(elist)
if __name__ == '__main__':
test_line_graph()
test_no_backtracking()
test_reverse()
test_reverse_shared_frames()
test_simple_graph()
from dgl import DGLError
from dgl.utils import toindex
from dgl.graph_index import create_graph_index
import networkx as nx
def test_edge_id():
gi = create_graph_index(multigraph=False)
assert not gi.is_multigraph()
gi = create_graph_index(multigraph=True)
gi.add_nodes(4)
gi.add_edge(0, 1)
eid = gi.edge_id(0, 1).tonumpy()
assert len(eid) == 1
assert eid[0] == 0
assert gi.is_multigraph()
# multiedges
gi.add_edge(0, 1)
eid = gi.edge_id(0, 1).tonumpy()
assert len(eid) == 2
assert eid[0] == 0
assert eid[1] == 1
gi.add_edges(toindex([0, 1, 1, 2]), toindex([2, 2, 2, 3]))
src, dst, eid = gi.edge_ids(toindex([0, 0, 2, 1]), toindex([2, 1, 3, 2]))
eid_answer = [2, 0, 1, 5, 3, 4]
assert len(eid) == 6
assert all(e == ea for e, ea in zip(eid, eid_answer))
# find edges
src, dst, eid = gi.find_edges(toindex([1, 3, 5]))
assert len(src) == len(dst) == len(eid) == 3
assert src[0] == 0 and src[1] == 1 and src[2] == 2
assert dst[0] == 1 and dst[1] == 2 and dst[2] == 3
assert eid[0] == 1 and eid[1] == 3 and eid[2] == 5
# source broadcasting
src, dst, eid = gi.edge_ids(toindex([0]), toindex([1, 2]))
eid_answer = [0, 1, 2]
assert len(eid) == 3
assert all(e == ea for e, ea in zip(eid, eid_answer))
# destination broadcasting
src, dst, eid = gi.edge_ids(toindex([1, 0]), toindex([2]))
eid_answer = [3, 4, 2]
assert len(eid) == 3
assert all(e == ea for e, ea in zip(eid, eid_answer))
gi.clear()
# the following assumes that grabbing nonexistent edge will throw an error
try:
gi.edge_id(0, 1)
fail = True
except DGLError:
fail = False
finally:
assert not fail
gi.add_nodes(4)
gi.add_edge(0, 1)
eid = gi.edge_id(0, 1).tonumpy()
assert len(eid) == 1
assert eid[0] == 0
def test_nx():
gi = create_graph_index(multigraph=True)
gi.add_nodes(2)
gi.add_edge(0, 1)
nxg = gi.to_networkx()
assert len(nxg.nodes) == 2
assert len(nxg.edges(0, 1)) == 1
gi.add_edge(0, 1)
nxg = gi.to_networkx()
assert len(nxg.edges(0, 1)) == 2
nxg = nx.DiGraph()
nxg.add_edge(0, 1)
gi = create_graph_index(nxg)
assert not gi.is_multigraph()
assert gi.number_of_nodes() == 2
assert gi.number_of_edges() == 1
assert gi.edge_id(0, 1)[0] == 0
nxg = nx.MultiDiGraph()
nxg.add_edge(0, 1)
nxg.add_edge(0, 1)
gi = create_graph_index(nxg, True)
assert gi.is_multigraph()
assert gi.number_of_nodes() == 2
assert gi.number_of_edges() == 2
assert 0 in gi.edge_id(0, 1)
assert 1 in gi.edge_id(0, 1)
nxg = nx.DiGraph()
nxg.add_nodes_from(range(3))
gi = create_graph_index(nxg)
assert gi.number_of_nodes() == 3
assert gi.number_of_edges() == 0
gi = create_graph_index()
gi.add_nodes(3)
nxg = gi.to_networkx()
assert len(nxg.nodes) == 3
assert len(nxg.edges) == 0
nxg = nx.DiGraph()
nxg.add_edge(0, 1, id=0)
nxg.add_edge(1, 2, id=1)
gi = create_graph_index(nxg)
assert 0 in gi.edge_id(0, 1)
assert 1 in gi.edge_id(1, 2)
assert gi.number_of_edges() == 2
assert gi.number_of_nodes() == 3
def test_predsucc():
gi = create_graph_index(multigraph=True)
gi.add_nodes(4)
gi.add_edge(0, 1)
gi.add_edge(0, 1)
gi.add_edge(0, 2)
gi.add_edge(2, 0)
gi.add_edge(3, 0)
gi.add_edge(0, 0)
gi.add_edge(0, 0)
pred = gi.predecessors(0)
assert len(pred) == 3
assert 2 in pred
assert 3 in pred
assert 0 in pred
succ = gi.successors(0)
assert len(succ) == 3
assert 1 in succ
assert 2 in succ
assert 0 in succ
def test_create_from_elist():
elist = [(2, 1), (1, 0), (2, 0), (3, 0), (0, 2)]
g = create_graph_index(elist)
for i, (u, v) in enumerate(elist):
assert g.edge_id(u, v)[0] == i
# immutable graph
# TODO: disabled due to torch support
#g = create_graph_index(elist, readonly=True)
#for i, (u, v) in enumerate(elist):
# print(u, v, g.edge_id(u, v)[0])
# assert g.edge_id(u, v)[0] == i
def test_edges():
gi = create_graph_index()
gi.add_nodes(10)
gi.add_edges(toindex([5,5,5,5]), toindex([6,7,8,9]))
gi.add_edges(toindex([0,0,0,0]), toindex([1,2,3,4]))
gi.add_edges(toindex([1,1,1,1]), toindex([2,3,4,5]))
src, dst, eid = gi.edges()
src0, dst0, eid0 = src.tonumpy(), dst.tonumpy(), eid.tonumpy()
gi.readonly()
src, dst, eid = gi.edges()
src, dst, eid = src.tonumpy(), dst.tonumpy(), eid.tonumpy()
import numpy as np
assert np.array_equal(src, src0)
assert np.array_equal(dst, dst0)
assert np.array_equal(eid, eid0)
if __name__ == '__main__':
test_edge_id()
test_nx()
test_predsucc()
test_create_from_elist()
test_edges()
......@@ -3,7 +3,7 @@ from dgl.utils import toindex
from dgl.graph_index import create_graph_index
def test_node_subgraph():
gi = create_graph_index()
gi = create_graph_index(None, True, False)
gi.add_nodes(4)
gi.add_edge(0, 1)
gi.add_edge(0, 2)
......@@ -18,7 +18,7 @@ def test_node_subgraph():
sgi.induced_nodes[s], sgi.induced_nodes[d])
def test_edge_subgraph():
gi = create_graph_index()
gi = create_graph_index(None, True, False)
gi.add_nodes(4)
gi.add_edge(0, 1)
gi.add_edge(0, 1)
......@@ -33,7 +33,7 @@ def test_edge_subgraph():
sgi.induced_nodes[s], sgi.induced_nodes[d])
def test_immutable_edge_subgraph():
gi = create_graph_index()
gi = create_graph_index(None, True, False)
gi.add_nodes(4)
gi.add_edge(0, 1)
gi.add_edge(0, 1)
......
#!/bin/bash
echo $PWD
ls -lh
pushd build
./runUnitTests
popd
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment