Commit d6c80e0c authored by Gan Quan's avatar Gan Quan Committed by Minjie Wang
Browse files

[API] bfs with edges (#132)

* bfs with edges

* dfs toy test case

* clean up

* style fix

* bugfix

* updating docs for bfs_edges and readouts
parent a36d0841
......@@ -9,3 +9,7 @@ BatchedDGLGraph
batch
unbatch
sum_nodes
sum_edges
mean_nodes
mean_edges
......@@ -7,6 +7,7 @@ Graph Traversal
:toctree: ../../generated/
bfs_nodes_generator
bfs_edges_generator
topological_nodes_generator
dfs_edges_generator
dfs_labeled_edges_generator
......@@ -179,6 +179,15 @@ class Graph {
*/
EdgeArray EdgeIds(IdArray src, IdArray dst) const;
/*!
* \brief Find the edge ID and return the pair of endpoints
* \param eid The edge ID
* \return a pair whose first element is the source and the second the destination.
*/
std::pair<dgl_id_t, dgl_id_t> FindEdge(dgl_id_t eid) const {
return std::make_pair(all_edges_src_[eid], all_edges_dst_[eid]);
}
/*!
* \brief Find the edge IDs and return their source and target node IDs.
* \param eids The edge ID array.
......
......@@ -5,7 +5,8 @@ from ._ffi.function import _init_api
from . import backend as F
from . import utils
__all__ = ['bfs_nodes_generator', 'topological_nodes_generator',
__all__ = ['bfs_nodes_generator', 'bfs_edges_generator',
'topological_nodes_generator',
'dfs_edges_generator', 'dfs_labeled_edges_generator',]
def bfs_nodes_generator(graph, source, reversed=False):
......@@ -17,7 +18,7 @@ def bfs_nodes_generator(graph, source, reversed=False):
The graph object.
source : list, tensor of nodes
Source nodes.
reversed : bool, optional
reversed : bool, default False
If true, traverse following the in-edge direction.
Returns
......@@ -44,7 +45,48 @@ def bfs_nodes_generator(graph, source, reversed=False):
all_nodes = utils.toindex(ret(0)).tousertensor()
# TODO(minjie): how to support directly creating python list
sections = utils.toindex(ret(1)).tousertensor().tolist()
return F.split(all_nodes, sections, dim=0)
node_frontiers = F.split(all_nodes, sections, dim=0)
return node_frontiers
def bfs_edges_generator(graph, source, reversed=False):
"""Edges frontiers generator using breadth-first search.
Parameters
----------
graph : DGLGraph
The graph object.
source : list, tensor of nodes
Source nodes.
reversed : bool, default False
If true, traverse following the in-edge direction.
Returns
-------
list of edge frontiers
Each edge frontier is a list, tensor of edges.
Examples
--------
Given a graph (directed, edges from small node id to large, sorted
in lexicographical order of source-destination node id tuple):
::
2 - 4
/ \
0 - 1 - 3 - 5
>>> g = ... # the graph above
>>> list(dgl.bfs_edges_generator(g, 0))
[tensor([0]), tensor([1, 2]), tensor([4, 5])]
"""
ghandle = graph._graph._handle
source = utils.toindex(source).todgltensor()
ret = _CAPI_DGLBFSEdges(ghandle, source, reversed)
all_edges = utils.toindex(ret(0)).tousertensor()
# TODO(minjie): how to support directly creating python list
sections = utils.toindex(ret(1)).tousertensor().tolist()
edge_frontiers = F.split(all_edges, sections, dim=0)
return edge_frontiers
def topological_nodes_generator(graph, reversed=False):
"""Node frontiers generator using topological traversal.
......
......@@ -4,6 +4,7 @@
* \brief Graph traversal implementation
*/
#include <algorithm>
#include <queue>
#include "./traversal.h"
#include "../c_api_common.h"
......@@ -104,10 +105,10 @@ IdArray ComputeMergedSections(
* An optional tag can be specified on each node/edge (represented by an int value).
*/
struct Frontiers {
/*!\brief a vector store for the edges in all the fronties */
/*!\brief a vector store for the nodes/edges in all the frontiers */
std::vector<dgl_id_t> ids;
/*!\brief a vector store for edge tags. The vector is empty is no tags. */
/*!\brief a vector store for node/edge tags. Empty if no tags are requested */
std::vector<int64_t> tags;
/*!\brief a section vector to indicate each frontier */
......@@ -140,6 +141,37 @@ TVM_REGISTER_GLOBAL("traversal._CAPI_DGLBFSNodes")
*rv = ConvertNDArrayVectorToPackedFunc({node_ids, sections});
});
Frontiers BFSEdgesFrontiers(const Graph& graph, IdArray source, bool reversed) {
Frontiers front;
// NOTE: std::queue has no top() method.
std::vector<dgl_id_t> nodes;
VectorQueueWrapper<dgl_id_t> queue(&nodes);
auto visit = [&] (const dgl_id_t e) { front.ids.push_back(e); };
bool first_frontier = true;
auto make_frontier = [&] {
if (first_frontier) {
first_frontier = false; // do not push the first section when doing edges
} else if (!queue.empty()) {
// do not push zero-length frontier
front.sections.push_back(queue.size());
}
};
BFSEdges(graph, source, reversed, &queue, visit, make_frontier);
return front;
}
TVM_REGISTER_GLOBAL("traversal._CAPI_DGLBFSEdges")
.set_body([] (TVMArgs args, TVMRetValue* rv) {
GraphHandle ghandle = args[0];
const Graph* gptr = static_cast<Graph*>(ghandle);
const IdArray src = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1]));
bool reversed = args[2];
const auto& front = BFSEdgesFrontiers(*gptr, src, reversed);
IdArray edge_ids = CopyVectorToNDArray(front.ids);
IdArray sections = CopyVectorToNDArray(front.sections);
*rv = ConvertNDArrayVectorToPackedFunc({edge_ids, sections});
});
Frontiers TopologicalNodesFrontiers(const Graph& graph, bool reversed) {
Frontiers front;
VectorQueueWrapper<dgl_id_t> queue(&front.ids);
......
......@@ -81,6 +81,71 @@ void BFSNodes(const Graph& graph,
}
}
/*!
* \brief Traverse the graph in a breadth-first-search (BFS) order, returning
* the edges of the BFS tree.
*
* The queue object must suffice following interface:
* Members:
* void push(dgl_id_t); // push one node
* dgl_id_t top(); // get the first node
* void pop(); // pop one node
* bool empty(); // return true if the queue is empty
* size_t size(); // return the size of the queue
* For example, std::queue<dgl_id_t> is a valid queue type.
*
* The visit function must be compatible with following interface:
* void (*visit)(dgl_id_t );
*
* The frontier function must be compatible with following interface:
* void (*make_frontier)(void);
*
* \param graph The graph.
* \param sources Source nodes.
* \param reversed If true, BFS follows the in-edge direction
* \param queue The queue used to do bfs.
* \param visit The function to call when a node is visited.
* The argument would be edge ID.
* \param make_frontier The function to indicate that a new frontier can be made;
*/
template<typename Queue, typename VisitFn, typename FrontierFn>
void BFSEdges(const Graph& graph,
IdArray source,
bool reversed,
Queue* queue,
VisitFn visit,
FrontierFn make_frontier) {
const int64_t len = source->shape[0];
const int64_t* src_data = static_cast<int64_t*>(source->data);
std::vector<bool> visited(graph.NumVertices());
for (int64_t i = 0; i < len; ++i) {
const dgl_id_t u = src_data[i];
visited[u] = true;
queue->push(u);
}
make_frontier();
const auto neighbor_iter = reversed? &Graph::InEdgeVec : &Graph::OutEdgeVec;
while (!queue->empty()) {
const size_t size = queue->size();
for (size_t i = 0; i < size; ++i) {
const dgl_id_t u = queue->top();
queue->pop();
for (auto e : (graph.*neighbor_iter)(u)) {
const auto uv = graph.FindEdge(e);
const dgl_id_t v = (reversed ? uv.first : uv.second);
if (!visited[v]) {
visited[v] = true;
visit(e);
queue->push(v);
}
}
}
make_frontier();
}
}
/*!
* \brief Traverse the graph in topological order.
*
......
......@@ -9,8 +9,13 @@ import scipy.sparse as sp
import torch as th
import utils as U
import itertools
np.random.seed(42)
def toset(x):
return set(x.tolist())
def test_bfs_nodes(n=1000):
g = dgl.DGLGraph()
a = sp.random(n, n, 10 / n, data_rvs=lambda n: np.ones(n))
......@@ -21,21 +26,31 @@ def test_bfs_nodes(n=1000):
edges = nx.bfs_edges(g_nx, src)
layers_nx = [set([src])]
edges_nx = []
frontier = set()
edge_frontier = set()
for u, v in edges:
if u in layers_nx[-1]:
frontier.add(v)
edge_frontier.add(g.edge_id(u, v))
else:
layers_nx.append(frontier)
edges_nx.append(edge_frontier)
frontier = set([v])
edge_frontier = set([g.edge_id(u, v)])
layers_nx.append(frontier)
edges_nx.append(edge_frontier)
layers_dgl = dgl.bfs_nodes_generator(g, src)
toset = lambda x: set(x.tolist())
assert len(layers_dgl) == len(layers_nx)
assert all(toset(x) == y for x, y in zip(layers_dgl, layers_nx))
edges_dgl = dgl.bfs_edges_generator(g, src)
assert len(edges_dgl) == len(edges_nx)
assert all(toset(x) == y for x, y in zip(edges_dgl, edges_nx))
def test_topological_nodes(n=1000):
g = dgl.DGLGraph()
a = sp.random(n, n, 10 / n, data_rvs=lambda n: np.ones(n))
......@@ -59,29 +74,45 @@ def test_topological_nodes(n=1000):
layers_spmv = list(tensor_topo_traverse())
toset = lambda x: set(x.tolist())
assert len(layers_dgl) == len(layers_spmv)
assert all(toset(x) == toset(y) for x, y in zip(layers_dgl, layers_spmv))
DFS_LABEL_NAMES = ['forward', 'reverse', 'nontree']
def test_dfs_labeled_edges(n=1000, example=False):
nx_g = nx.DiGraph()
nx_g.add_edges_from([(0, 1), (1, 2), (0, 2)])
nx_rst = list(nx.dfs_labeled_edges(nx_g, 0))[1:-1] # the first and the last are not edges
dgl_g = dgl.DGLGraph()
dgl_g.add_nodes(3)
dgl_g.add_edge(0, 1)
dgl_g.add_edge(1, 2)
dgl_g.add_edge(0, 2)
dgl_g.add_nodes(6)
dgl_g.add_edges([0, 1, 0, 3, 3], [1, 2, 2, 4, 5])
dgl_edges, dgl_labels = dgl.dfs_labeled_edges_generator(
dgl_g, 0, has_reverse_edge=True, has_nontree_edge=True)
dgl_edges = [dgl_g.find_edges(e) for e in dgl_edges]
dgl_u = [int(u) for u, v in dgl_edges]
dgl_v = [int(v) for u, v in dgl_edges]
dgl_labels = [DFS_LABEL_NAMES[l] for l in dgl_labels]
dgl_rst = list(zip(dgl_u, dgl_v, dgl_labels))
assert nx_rst == dgl_rst
dgl_g, [0, 3], has_reverse_edge=True, has_nontree_edge=True)
dgl_edges = [toset(t) for t in dgl_edges]
dgl_labels = [toset(t) for t in dgl_labels]
g1_solutions = [
# edges labels
[[0, 1, 1, 0, 2], [0, 0, 1, 1, 2]],
[[2, 2, 0, 1, 0], [0, 1, 0, 2, 1]],
]
g2_solutions = [
# edges labels
[[3, 3, 4, 4], [0, 1, 0, 1]],
[[4, 4, 3, 3], [0, 1, 0, 1]],
]
def combine_frontiers(sol):
es, ls = zip(*sol)
es = [set(i for i in t if i is not None)
for t in itertools.zip_longest(*es)]
ls = [set(i for i in t if i is not None)
for t in itertools.zip_longest(*ls)]
return es, ls
for sol_set in itertools.product(g1_solutions, g2_solutions):
es, ls = combine_frontiers(sol_set)
if es == dgl_edges and ls == dgl_labels:
break
else:
assert False
if __name__ == '__main__':
test_bfs_nodes()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment