Commit 23191674 authored by GaiYu0's avatar GaiYu0 Committed by Minjie Wang
Browse files

[API] Graph traversal (#103)

* bfs, dfs and topological traversal

* dfs and test cases

* Conflicts:
	python/dgl/graph.py
	src/graph/graph.cc
	src/graph/graph_op.cc

* documentation

* requested changes

* Conflicts:
	Jenkinsfile
	examples/pytorch/gcn/gcn.py
	examples/pytorch/gcn/gcn_spmv.py
	python/dgl/graph.py
	python/dgl/graph_index.py
	src/graph/graph.cc
	src/graph/graph_op.cc

* Conflicts:
	Jenkinsfile
	python/dgl/graph_index.py

* fix lint errors

* fix lint errors

* fix lint errors

* fix test cases

* requested changes

* traversal interface

* [Bug] fixed a typo that caused syntax error (#120)

* WIP

* bfs nodes generator works

* topological traversal

* WIP: dfs_edges

* dfs edges

* dfs labeled edges

* utest for traversal

* fix lint

* fix utest

* code clean

* changes as requested
parent 79ceccef
......@@ -108,9 +108,7 @@ class TreeLSTM(nn.Module):
g.set_n_repr({'x' : x, 'h' : h, 'c' : c, 'h_tild' : h_tild, 'c_tild' : c_tild})
# TODO(minjie): potential bottleneck
if iterator is None:
for frontier in topological_traverse(g):
#print('frontier', frontier)
g.pull(frontier)
g.propagate('topo')
else:
for frontier in iterator:
g.pull(frontier)
......
......@@ -8,6 +8,8 @@
#include <vector>
#include <cstdint>
#include <utility>
#include <tuple>
#include "runtime/ndarray.h"
namespace dgl {
......@@ -16,6 +18,7 @@ typedef uint64_t dgl_id_t;
typedef tvm::runtime::NDArray IdArray;
typedef tvm::runtime::NDArray DegreeArray;
typedef tvm::runtime::NDArray BoolArray;
typedef tvm::runtime::NDArray IntArray;
class Graph;
class GraphOp;
......@@ -301,6 +304,42 @@ class Graph {
*/
Graph Reverse() const;
/*!
* \brief Return the successor vector
* \param vid The vertex id.
* \return the successor vector
*/
const std::vector<dgl_id_t>& SuccVec(dgl_id_t vid) const {
return adjlist_[vid].succ;
}
/*!
* \brief Return the out edge id vector
* \param vid The vertex id.
* \return the out edge id vector
*/
const std::vector<dgl_id_t>& OutEdgeVec(dgl_id_t vid) const {
return adjlist_[vid].edge_id;
}
/*!
* \brief Return the predecessor vector
* \param vid The vertex id.
* \return the predecessor vector
*/
const std::vector<dgl_id_t>& PredVec(dgl_id_t vid) const {
return reverse_adjlist_[vid].succ;
}
/*!
* \brief Return the in edge id vector
* \param vid The vertex id.
* \return the in edge id vector
*/
const std::vector<dgl_id_t>& InEdgeVec(dgl_id_t vid) const {
return reverse_adjlist_[vid].edge_id;
}
protected:
friend class GraphOp;
/*! \brief Internal edge list type */
......
......@@ -25,6 +25,7 @@ class GraphOp {
* \return the line graph
*/
static Graph LineGraph(const Graph* graph, bool backtracking);
/*!
* \brief Return a disjoint union of the input graphs.
*
......
......@@ -11,4 +11,5 @@ from .base import ALL
from .batched_graph import *
from .graph import DGLGraph
from .subgraph import DGLSubGraph
from .traversal import *
from .udf import NodeBatch, EdgeBatch
"""Base graph class specialized for neural networks on graphs.
"""
"""Base graph class specialized for neural networks on graphs."""
from __future__ import absolute_import
import networkx as nx
......@@ -336,7 +335,7 @@ class DGLGraph(object):
tensor, tensor
The source and destination node IDs.
"""
eid = utils.toindex(u)
eid = utils.toindex(eid)
src, dst, _ = self._graph.find_edges(eid)
return src.tousertensor(), dst.tousertensor()
......@@ -1207,39 +1206,59 @@ class DGLGraph(object):
self.send(ALL, message_func)
self.recv(ALL, reduce_func, apply_node_func)
def propagate(self,
traverser='topo',
def prop_nodes(self,
nodes_generator,
message_func="default",
reduce_func="default",
apply_node_func="default",
**kwargs):
"""Propagate messages and update nodes using graph traversal.
apply_node_func="default"):
"""Propagate messages using graph traversal by triggering pull() on nodes.
A convenient function for passing messages and updating
nodes according to the traverser. The traverser can be
any of the pre-defined traverser (e.g. 'topo'). User can also provide custom
traverser that generates the edges and nodes.
The traversal order is specified by the ``nodes_generator``. It generates
node frontiers, which is a list or a tensor of nodes. The nodes in the
same frontier will be triggered together, while nodes in different frontiers
will be triggered according to the generating order.
Parameters
----------
traverser : str or generator of edges.
The traverser of the graph.
message_func : str or callable
node_generators : generator
The generator of node frontiers.
message_func : str or callable, optional
The message function.
reduce_func : str or callable
reduce_func : str or callable, optional
The reduce function.
apply_node_func : str or callable
apply_node_func : str or callable, optional
The update function.
kwargs : keyword arguments, optional
Arguments for pre-defined iterators.
"""
if isinstance(traverser, str):
# TODO(minjie): Call pre-defined routine to unroll the computation.
raise RuntimeError('Not implemented.')
else:
# NOTE: the iteration can return multiple edges at each step.
for u, v in traverser:
self.send_and_recv((u, v),
for node_frontier in nodes_generator:
self.pull(node_frontier,
message_func, reduce_func, apply_node_func)
def prop_edges(self,
edge_generator,
message_func="default",
reduce_func="default",
apply_node_func="default"):
"""Propagate messages using graph traversal by triggering send_and_recv() on edges.
The traversal order is specified by the ``edges_generator``. It
generates edge frontiers, which is a list or a tensor of edge ids or
end points. The edges in the same frontier will be triggered together,
while edges in different frontiers will be triggered according to the
generating order.
Parameters
----------
edge_generators : generator
The generator of edge frontiers.
message_func : str or callable, optional
The message function.
reduce_func : str or callable, optional
The reduce function.
apply_node_func : str or callable, optional
The update function.
"""
for edge_frontier in edge_generator:
self.send_and_recv(edge_frontier,
message_func, reduce_func, apply_node_func)
def subgraph(self, nodes):
......@@ -1337,7 +1356,7 @@ class DGLGraph(object):
self._edge_frame.num_rows,
reduce_func)
def adjacency_matrix(self, ctx=None):
def adjacency_matrix(self, ctx=F.cpu()):
"""Return the adjacency matrix representation of this graph.
Parameters
......@@ -1352,7 +1371,7 @@ class DGLGraph(object):
"""
return self._graph.adjacency_matrix().get(ctx)
def incidence_matrix(self, oriented=False, ctx=None):
def incidence_matrix(self, oriented=False, ctx=F.cpu()):
"""Return the incidence matrix representation of this graph.
Parameters
......
"""Module for graph traversal methods."""
from __future__ import absolute_import
from ._ffi.function import _init_api
from . import backend as F
from . import utils
__all__ = ['bfs_nodes_generator', 'topological_nodes_generator',
'dfs_edges_generator', 'dfs_labeled_edges_generator',]
def bfs_nodes_generator(graph, source, reversed=False):
"""Node frontiers generator using breadth-first search.
Parameters
----------
graph : DGLGraph
The graph object.
source : list, tensor of nodes
Source nodes.
reversed : bool, optional
If true, traverse following the in-edge direction.
Returns
-------
list of node frontiers
Each node frontier is a list, tensor of nodes.
"""
ghandle = graph._graph._handle
source = utils.toindex(source).todgltensor()
ret = _CAPI_DGLBFSNodes(ghandle, source, reversed)
all_nodes = utils.toindex(ret(0)).tousertensor()
# TODO(minjie): how to support directly creating python list
sections = utils.toindex(ret(1)).tousertensor().tolist()
return F.split(all_nodes, sections, dim=0)
def topological_nodes_generator(graph, reversed=False):
"""Node frontiers generator using topological traversal.
Parameters
----------
graph : DGLGraph
The graph object.
reversed : bool, optional
If true, traverse following the in-edge direction.
Returns
-------
list of node frontiers
Each node frontier is a list, tensor of nodes.
"""
ghandle = graph._graph._handle
ret = _CAPI_DGLTopologicalNodes(ghandle, reversed)
all_nodes = utils.toindex(ret(0)).tousertensor()
# TODO(minjie): how to support directly creating python list
sections = utils.toindex(ret(1)).tousertensor().tolist()
return F.split(all_nodes, sections, dim=0)
def dfs_edges_generator(graph, source, reversed=False):
"""Edge frontiers generator using depth-first-search (DFS).
Multiple source nodes can be specified to start the DFS traversal. One
needs to make sure that each source node belongs to different connected
component, so the frontiers can be easily merged. Otherwise, the behavior
is undefined.
Parameters
----------
graph : DGLGraph
The graph object.
source : list, tensor of nodes
Source nodes.
reversed : bool, optional
If true, traverse following the in-edge direction.
Returns
-------
list of edge frontiers
Each edge frontier is a list, tensor of edges.
"""
ghandle = graph._graph._handle
source = utils.toindex(source).todgltensor()
ret = _CAPI_DGLDFSEdges(ghandle, source, reversed)
all_edges = utils.toindex(ret(0)).tousertensor()
# TODO(minjie): how to support directly creating python list
sections = utils.toindex(ret(1)).tousertensor().tolist()
return F.split(all_edges, sections, dim=0)
def dfs_labeled_edges_generator(
graph,
source,
reversed=False,
has_reverse_edge=False,
has_nontree_edge=False,
return_labels=True):
"""Produce edges in a depth-first-search (DFS) labeled by type.
There are three labels: FORWARD(0), REVERSE(1), NONTREE(2)
A FORWARD edge is one in which `u` has been visisted but `v` has not. A
REVERSE edge is one in which both `u` and `v` have been visisted and the
edge is in the DFS tree. A NONTREE edge is one in which both `u` and `v`
have been visisted but the edge is NOT in the DFS tree.
Multiple source nodes can be specified to start the DFS traversal. One
needs to make sure that each source node belongs to different connected
component, so the frontiers can be easily merged. Otherwise, the behavior
is undefined.
Parameters
----------
graph : DGLGraph
The graph object.
source : list, tensor of nodes
Source nodes.
reversed : bool, optional
If true, traverse following the in-edge direction.
has_reverse_edge : bool, optional
True to include reverse edges.
has_nontree_edge : bool, optional
True to include nontree edges.
return_labels : bool, optional
True to return the labels of each edge.
Returns
-------
list of edge frontiers
Each edge frontier is a list, tensor of edges.
list of list of int
Label of each edge, organized in the same as the edge frontiers.
"""
ghandle = graph._graph._handle
source = utils.toindex(source).todgltensor()
ret = _CAPI_DGLDFSLabeledEdges(
ghandle,
source,
reversed,
has_reverse_edge,
has_nontree_edge,
return_labels)
all_edges = utils.toindex(ret(0)).tousertensor()
# TODO(minjie): how to support directly creating python list
if return_labels:
all_labels = utils.toindex(ret(1)).tousertensor()
sections = utils.toindex(ret(2)).tousertensor().tolist()
return (F.split(all_edges, sections, dim=0),
F.split(all_labels, sections, dim=0))
else:
sections = utils.toindex(ret(1)).tousertensor().tolist()
return F.split(all_edges, sections, dim=0)
_init_api("dgl.traversal")
......@@ -24,7 +24,7 @@ DLManagedTensor* CreateTmpDLManagedTensor(const TVMArgValue& arg) {
PackedFunc ConvertNDArrayVectorToPackedFunc(const std::vector<NDArray>& vec) {
auto body = [vec](TVMArgs args, TVMRetValue* rv) {
int which = args[0];
const size_t which = args[0];
if (which >= vec.size()) {
LOG(FATAL) << "invalid choice";
} else {
......
......@@ -9,6 +9,7 @@
#include <dgl/runtime/ndarray.h>
#include <dgl/runtime/packed_func.h>
#include <dgl/runtime/registry.h>
#include <algorithm>
#include <vector>
namespace dgl {
......@@ -30,6 +31,27 @@ DLManagedTensor* CreateTmpDLManagedTensor(
tvm::runtime::PackedFunc ConvertNDArrayVectorToPackedFunc(
const std::vector<tvm::runtime::NDArray>& vec);
/*!\brief Return whether the array is a valid 1D int array*/
inline bool IsValidIdArray(const tvm::runtime::NDArray& arr) {
return arr->ctx.device_type == kDLCPU && arr->ndim == 1
&& arr->dtype.code == kDLInt && arr->dtype.bits == 64;
}
/*!
* \brief Copy a vector to an int64_t NDArray.
*
* The element type of the vector must be convertible to int64_t.
*/
template<typename DType>
tvm::runtime::NDArray CopyVectorToNDArray(
const std::vector<DType>& vec) {
using tvm::runtime::NDArray;
const int64_t len = vec.size();
NDArray a = NDArray::Empty({len}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
std::copy(vec.begin(), vec.end(), static_cast<int64_t*>(a->data));
return a;
}
} // namespace dgl
#endif // DGL_C_API_COMMON_H_
......@@ -8,15 +8,10 @@
#include <unordered_map>
#include <set>
#include <functional>
#include <tuple>
#include "../c_api_common.h"
namespace dgl {
namespace {
inline bool IsValidIdArray(const IdArray& arr) {
return arr->ctx.device_type == kDLCPU && arr->ndim == 1
&& arr->dtype.code == kDLInt && arr->dtype.bits == 64;
}
} // namespace
void Graph::AddVertices(uint64_t num_vertices) {
CHECK(!read_only_) << "Graph is read-only. Mutations are not allowed.";
adjlist_.resize(adjlist_.size() + num_vertices);
......
......@@ -366,4 +366,5 @@ TVM_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphLineGraph")
GraphHandle lghandle = lgptr;
*rv = lghandle;
});
} // namespace dgl
/*!
* Copyright (c) 2018 by Contributors
* \file graph/traversal.cc
* \brief Graph traversal implementation
*/
#include <algorithm>
#include "./traversal.h"
#include "../c_api_common.h"
using tvm::runtime::TVMArgs;
using tvm::runtime::TVMArgValue;
using tvm::runtime::TVMRetValue;
using tvm::runtime::PackedFunc;
using tvm::runtime::NDArray;
namespace dgl {
namespace traverse {
namespace {
// A utility view class for a range of data in a vector.
template<typename DType>
struct VectorView {
const std::vector<DType>* vec;
size_t range_start, range_end;
explicit VectorView(const std::vector<DType>* vec): vec(vec) {}
auto begin() const -> decltype(vec->begin()) {
return vec->begin() + range_start;
}
auto end() const -> decltype(vec->end()) {
return vec->begin() + range_end;
}
size_t size() const { return range_end - range_start; }
};
// Internal function to merge multiple traversal traces into one ndarray.
// It is similar to zip the vectors together.
template<typename DType>
IdArray MergeMultipleTraversals(
const std::vector<std::vector<DType>>& traces) {
int64_t max_len = 0, total_len = 0;
for (size_t i = 0; i < traces.size(); ++i) {
const int64_t tracelen = traces[i].size();
max_len = std::max(max_len, tracelen);
total_len += traces[i].size();
}
IdArray ret = IdArray::Empty({total_len}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
int64_t* ret_data = static_cast<int64_t*>(ret->data);
for (int64_t i = 0; i < max_len; ++i) {
for (size_t j = 0; j < traces.size(); ++j) {
const int64_t tracelen = traces[j].size();
if (i >= tracelen) {
continue;
}
*(ret_data++) = traces[j][i];
}
}
return ret;
}
// Internal function to compute sections if multiple traversal traces
// are merged into one ndarray.
template<typename DType>
IdArray ComputeMergedSections(
const std::vector<std::vector<DType>>& traces) {
int64_t max_len = 0;
for (size_t i = 0; i < traces.size(); ++i) {
const int64_t tracelen = traces[i].size();
max_len = std::max(max_len, tracelen);
}
IdArray ret = IdArray::Empty({max_len}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
int64_t* ret_data = static_cast<int64_t*>(ret->data);
for (int64_t i = 0; i < max_len; ++i) {
int64_t sec_len = 0;
for (size_t j = 0; j < traces.size(); ++j) {
const int64_t tracelen = traces[j].size();
if (i < tracelen) {
++sec_len;
}
}
*(ret_data++) = sec_len;
}
return ret;
}
} // namespace
/*!
* \brief Class for representing frontiers.
*
* Each frontier is a list of nodes/edges (specified by their ids).
* An optional tag can be specified on each node/edge (represented by an int value).
*/
struct Frontiers {
/*!\brief a vector store for the edges in all the fronties */
std::vector<dgl_id_t> ids;
/*!\brief a vector store for edge tags. The vector is empty is no tags. */
std::vector<int64_t> tags;
/*!\brief a section vector to indicate each frontier */
std::vector<int64_t> sections;
};
Frontiers BFSNodesFrontiers(const Graph& graph, IdArray source, bool reversed) {
Frontiers front;
size_t i = 0;
VectorView<dgl_id_t> front_view(&front.ids);
auto visit = [&] (const dgl_id_t v) { front.ids.push_back(v); };
auto make_frontier = [&] () {
front_view.range_start = i;
front_view.range_end = front.ids.size();
if (front.ids.size() != i) {
// do not push zero-length frontier
front.sections.push_back(front.ids.size() - i);
}
i = front.ids.size();
return front_view;
};
BFSNodes(graph, source, reversed, visit, make_frontier);
return front;
}
TVM_REGISTER_GLOBAL("traversal._CAPI_DGLBFSNodes")
.set_body([] (TVMArgs args, TVMRetValue* rv) {
GraphHandle ghandle = args[0];
const Graph* gptr = static_cast<Graph*>(ghandle);
const IdArray src = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1]));
bool reversed = args[2];
const auto& front = BFSNodesFrontiers(*gptr, src, reversed);
IdArray node_ids = CopyVectorToNDArray(front.ids);
IdArray sections = CopyVectorToNDArray(front.sections);
*rv = ConvertNDArrayVectorToPackedFunc({node_ids, sections});
});
Frontiers TopologicalNodesFrontiers(const Graph& graph, bool reversed) {
Frontiers front;
size_t i = 0;
VectorView<dgl_id_t> front_view(&front.ids);
auto visit = [&] (const dgl_id_t v) { front.ids.push_back(v); };
auto make_frontier = [&] () {
front_view.range_start = i;
front_view.range_end = front.ids.size();
if (front.ids.size() != i) {
// do not push zero-length frontier
front.sections.push_back(front.ids.size() - i);
}
i = front.ids.size();
return front_view;
};
TopologicalNodes(graph, reversed, visit, make_frontier);
return front;
}
TVM_REGISTER_GLOBAL("traversal._CAPI_DGLTopologicalNodes")
.set_body([] (TVMArgs args, TVMRetValue* rv) {
GraphHandle ghandle = args[0];
const Graph* gptr = static_cast<Graph*>(ghandle);
bool reversed = args[1];
const auto& front = TopologicalNodesFrontiers(*gptr, reversed);
IdArray node_ids = CopyVectorToNDArray(front.ids);
IdArray sections = CopyVectorToNDArray(front.sections);
*rv = ConvertNDArrayVectorToPackedFunc({node_ids, sections});
});
TVM_REGISTER_GLOBAL("traversal._CAPI_DGLDFSEdges")
.set_body([] (TVMArgs args, TVMRetValue* rv) {
GraphHandle ghandle = args[0];
const Graph* gptr = static_cast<Graph*>(ghandle);
const IdArray source = args[1];
const bool reversed = args[2];
CHECK(IsValidIdArray(source)) << "Invalid source node id array.";
const int64_t len = source->shape[0];
const int64_t* src_data = static_cast<int64_t*>(source->data);
std::vector<std::vector<dgl_id_t>> edges(len);
for (int64_t i = 0; i < len; ++i) {
auto visit = [&] (dgl_id_t e, int tag) { edges[i].push_back(e); };
DFSLabeledEdges(*gptr, src_data[i], reversed, false, false, visit);
}
IdArray ids = MergeMultipleTraversals(edges);
IdArray sections = ComputeMergedSections(edges);
*rv = ConvertNDArrayVectorToPackedFunc({ids, sections});
});
TVM_REGISTER_GLOBAL("traversal._CAPI_DGLDFSLabeledEdges")
.set_body([] (TVMArgs args, TVMRetValue* rv) {
GraphHandle ghandle = args[0];
const Graph* gptr = static_cast<Graph*>(ghandle);
const IdArray source = args[1];
const bool reversed = args[2];
const bool has_reverse_edge = args[3];
const bool has_nontree_edge = args[4];
const bool return_labels = args[5];
CHECK(IsValidIdArray(source)) << "Invalid source node id array.";
const int64_t len = source->shape[0];
const int64_t* src_data = static_cast<int64_t*>(source->data);
std::vector<std::vector<dgl_id_t>> edges(len);
std::vector<std::vector<int64_t>> tags;
if (return_labels) {
tags.resize(len);
}
for (int64_t i = 0; i < len; ++i) {
auto visit = [&] (dgl_id_t e, int tag) {
edges[i].push_back(e);
if (return_labels) {
tags[i].push_back(tag);
}
};
DFSLabeledEdges(*gptr, src_data[i], reversed,
has_reverse_edge, has_nontree_edge, visit);
}
IdArray ids = MergeMultipleTraversals(edges);
IdArray sections = ComputeMergedSections(edges);
if (return_labels) {
IdArray labels = MergeMultipleTraversals(tags);
*rv = ConvertNDArrayVectorToPackedFunc({ids, labels, sections});
} else {
*rv = ConvertNDArrayVectorToPackedFunc({ids, sections});
}
});
} // namespace traverse
} // namespace dgl
/*!
* Copyright (c) 2018 by Contributors
* \file graph/traversal.h
* \brief Graph traversal routines.
*
* Traversal routines generate frontiers. Frontiers can be node frontiers or edge
* frontiers depending on the traversal function. Each frontier is a
* list of nodes/edges (specified by their ids). An optional tag can be specified
* for each node/edge (represented by an int value).
*/
#ifndef DGL_GRAPH_TRAVERSAL_H_
#define DGL_GRAPH_TRAVERSAL_H_
#include <dgl/graph.h>
#include <stack>
#include <tuple>
#include <vector>
namespace dgl {
namespace traverse {
/*!
* \brief Traverse the graph in a breadth-first-search (BFS) order.
*
* \param sources Source nodes.
* \param reversed If true, BFS follows the in-edge direction
* \param visit The function to call when a node is visited; the node id will be
* given as its only argument.
* \param make_frontier The function to make a new froniter; the function should return a
* node iterator to the just created frontier.
*/
template<typename VisitFn, typename FrontierFn>
void BFSNodes(const Graph& graph,
IdArray source,
bool reversed,
VisitFn visit,
FrontierFn make_frontier) {
const int64_t len = source->shape[0];
const int64_t* src_data = static_cast<int64_t*>(source->data);
std::vector<bool> visited(graph.NumVertices());
for (int64_t i = 0; i < len; ++i) {
visited[src_data[i]] = true;
visit(src_data[i]);
}
auto frontier = make_frontier();
const auto neighbor_iter = reversed? &Graph::PredVec : &Graph::SuccVec;
while (frontier.size() != 0) {
for (const dgl_id_t u : frontier) {
for (auto v : (graph.*neighbor_iter)(u)) {
if (!visited[v]) {
visit(v);
visited[v] = true;
}
}
}
frontier = make_frontier();
}
}
/*!
* \brief Traverse the graph in topological order.
*
* \param reversed If true, follows the in-edge direction
* \param visit The function to call when a node is visited; the node id will be
* given as its only argument.
* \param make_frontier The function to make a new froniter; the function should return a
* node iterator to the just created frontier.
*/
template<typename VisitFn, typename FrontierFn>
void TopologicalNodes(const Graph& graph,
bool reversed,
VisitFn visit,
FrontierFn make_frontier) {
const auto get_degree = reversed? &Graph::OutDegree : &Graph::InDegree;
const auto neighbor_iter = reversed? &Graph::PredVec : &Graph::SuccVec;
uint64_t num_visited_nodes = 0;
std::vector<uint64_t> degrees(graph.NumVertices(), 0);
for (dgl_id_t vid = 0; vid < graph.NumVertices(); ++vid) {
degrees[vid] = (graph.*get_degree)(vid);
if (degrees[vid] == 0) {
visit(vid);
++num_visited_nodes;
}
}
auto frontier = make_frontier();
while (frontier.size() != 0) {
for (const dgl_id_t u : frontier) {
for (auto v : (graph.*neighbor_iter)(u)) {
if (--(degrees[v]) == 0) {
visit(v);
++num_visited_nodes;
}
}
}
// new node frointer
frontier = make_frontier();
}
if (num_visited_nodes != graph.NumVertices()) {
LOG(FATAL) << "Error in topological traversal: loop detected in the given graph.";
}
}
/*!\brief Tags for ``DFSEdges``. */
enum DFSEdgeTag {
kForward = 0,
kReverse,
kNonTree,
};
/*!
* \brief Traverse the graph in a depth-first-search (DFS) order.
*
* The traversal visit edges in its DFS order. Edges have three tags:
* FORWARD(0), REVERSE(1), NONTREE(2)
*
* A FORWARD edge is one in which `u` has been visisted but `v` has not.
* A REVERSE edge is one in which both `u` and `v` have been visisted and the edge
* is in the DFS tree.
* A NONTREE edge is one in which both `u` and `v` have been visisted but the edge
* is NOT in the DFS tree.
*
* \param source Source node.
* \param reversed If true, DFS follows the in-edge direction
* \param has_reverse_edge If true, REVERSE edges are included
* \param has_nontree_edge If true, NONTREE edges are included
* \param visit The function to call when an edge is visited; the edge id and its
* tag will be given as the arguments.
*/
template<typename VisitFn>
void DFSLabeledEdges(const Graph& graph,
dgl_id_t source,
bool reversed,
bool has_reverse_edge,
bool has_nontree_edge,
VisitFn visit) {
const auto succ = reversed? &Graph::PredVec : &Graph::SuccVec;
const auto out_edge = reversed? &Graph::InEdgeVec : &Graph::OutEdgeVec;
if ((graph.*succ)(source).size() == 0) {
// no out-going edges from the source node
return;
}
typedef std::tuple<dgl_id_t, size_t, bool> StackEntry;
std::stack<StackEntry> stack;
std::vector<bool> visited(graph.NumVertices());
visited[source] = true;
stack.push(std::make_tuple(source, 0, false));
dgl_id_t u = 0;
size_t i = 0;
bool on_tree = false;
while (!stack.empty()) {
std::tie(u, i, on_tree) = stack.top();
const dgl_id_t v = (graph.*succ)(u)[i];
const dgl_id_t uv = (graph.*out_edge)(u)[i];
if (visited[v]) {
if (!on_tree && has_nontree_edge) {
visit(uv, kNonTree);
} else if (on_tree && has_reverse_edge) {
visit(uv, kReverse);
}
stack.pop();
// find next one.
if (i < (graph.*succ)(u).size() - 1) {
stack.push(std::make_tuple(u, i+1, false));
}
} else {
visited[v] = true;
std::get<2>(stack.top()) = true;
visit(uv, kForward);
// expand
if ((graph.*succ)(v).size() > 0) {
stack.push(std::make_tuple(v, 0, false));
}
}
}
}
} // namespace traverse
} // namespace dgl
#endif // DGL_GRAPH_TRAVERSAL_H_
......@@ -233,7 +233,7 @@ def test_reduce_0deg():
new_repr = g.ndata['h']
assert th.allclose(new_repr[1:], old_repr[1:])
assert th.allclose(new_repr[0], old_repr.sum(0))
assert th.allclose(new_repr[0], old_repr.sum(0), rtol=1e-3, atol=1e-3)
def test_pull_0deg():
g = DGLGraph()
......
......@@ -120,7 +120,7 @@ def test_batch_propagate():
v = [0, 0, 1 + 5, 1 + 5]
order.append((u, v))
bg.propagate(traverser=order)
bg.prop_edges(order)
t1, t2 = dgl.unbatch(bg)
assert t1.ndata['h'][0] == 9
......
import random
import sys
import time
import dgl
import networkx as nx
import numpy as np
import scipy.sparse as sp
import torch as th
np.random.seed(42)
def test_bfs_nodes(n=100):
g = dgl.DGLGraph()
a = sp.random(n, n, 10 / n, data_rvs=lambda n: np.ones(n))
g.from_scipy_sparse_matrix(a)
g_nx = g.to_networkx()
src = random.choice(range(n))
layers_dgl = dgl.bfs_nodes_generator(g, src)
edges = nx.bfs_edges(g_nx, src)
layers_nx = [set([src])]
frontier = set()
for u, v in edges:
if u in layers_nx[-1]:
frontier.add(v)
else:
layers_nx.append(frontier)
frontier = set([v])
layers_nx.append(frontier)
toset = lambda x: set(x.tolist())
assert len(layers_dgl) == len(layers_nx)
assert all(toset(x) == y for x, y in zip(layers_dgl, layers_nx))
def test_topological_nodes(n=100):
g = dgl.DGLGraph()
a = sp.random(n, n, 10 / n, data_rvs=lambda n: np.ones(n))
b = sp.tril(a, -1).tocoo()
g.from_scipy_sparse_matrix(b)
layers_dgl = dgl.topological_nodes_generator(g)
adjmat = g.adjacency_matrix()
def tensor_topo_traverse():
n = g.number_of_nodes()
mask = th.ones((n, 1))
degree = th.spmm(adjmat, mask)
while th.sum(mask) != 0.:
v = (degree == 0.).float()
v = v * mask
mask = mask - v
frontier = th.squeeze(th.squeeze(v).nonzero(), 1)
yield frontier
degree -= th.spmm(adjmat, v)
layers_spmv = list(tensor_topo_traverse())
toset = lambda x: set(x.tolist())
assert len(layers_dgl) == len(layers_spmv)
assert all(toset(x) == toset(y) for x, y in zip(layers_dgl, layers_spmv))
DFS_LABEL_NAMES = ['forward', 'reverse', 'nontree']
def test_dfs_labeled_edges(n=1000, example=False):
nx_g = nx.DiGraph()
nx_g.add_edges_from([(0, 1), (1, 2), (0, 2)])
nx_rst = list(nx.dfs_labeled_edges(nx_g, 0))[1:-1] # the first and the last are not edges
dgl_g = dgl.DGLGraph()
dgl_g.add_nodes(3)
dgl_g.add_edge(0, 1)
dgl_g.add_edge(1, 2)
dgl_g.add_edge(0, 2)
dgl_edges, dgl_labels = dgl.dfs_labeled_edges_generator(
dgl_g, 0, has_reverse_edge=True, has_nontree_edge=True)
dgl_edges = [dgl_g.find_edges(e) for e in dgl_edges]
dgl_u = [int(u) for u, v in dgl_edges]
dgl_v = [int(v) for u, v in dgl_edges]
dgl_labels = [DFS_LABEL_NAMES[l] for l in dgl_labels]
dgl_rst = list(zip(dgl_u, dgl_v, dgl_labels))
assert nx_rst == dgl_rst
if __name__ == '__main__':
test_bfs_nodes()
test_topological_nodes()
test_dfs_labeled_edges()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment