Merge pull request #90 from jermainewang/cpp

[GraphIndex] Graph index and many related changes

Merge pull request #90 from jermainewang/cpp
[GraphIndex] Graph index and many related changes
00add9f2 · Minjie Wang · GitHub · ec4216dd · dce1f44d · 00add9f2
Unverified Commit 00add9f2 authored Oct 18, 2018 by Minjie Wang Committed by GitHub Oct 18, 2018
20 changed files
--- a/python/dgl/function/message.py
+++ b/python/dgl/function/message.py
@@ -4,17 +4,25 @@ from __future__ import absolute_import
 import operator
 import dgl.backend as F

-__all__ = ["MessageFunction", "src_mul_edge", "copy_src", "copy_edge"]
+__all__ = ["src_mul_edge", "copy_src", "copy_edge"]


 class MessageFunction(object):
+    """Base builtin message function class."""
+
    def __call__(self, src, edge):
+        """Regular computation of this builtin.
+
+        This will be used when optimization is not available.
+        """
        raise NotImplementedError

    def name(self):
+        """Return the name of this builtin function."""
        raise NotImplementedError

    def is_spmv_supported(self, g):
+        """Return whether the SPMV optimization is supported."""
        raise NotImplementedError


@@ -22,12 +30,6 @@ class BundledMessageFunction(MessageFunction):
    def __init__(self, fn_list):
        if not isinstance(fn_list, (list, tuple)):
            fn_list = [fn_list]
-        else:
-            # sanity check on out field
-            for fn in fn_list:
-                # cannot perform check for udf
-                if isinstance(fn, MessageFunction) and fn.out_field is None:
-                    raise RuntimeError("Not specifying out field for multiple message is ambiguous")
        self.fn_list = fn_list

    def is_spmv_supported(self, g):
@@ -43,11 +45,8 @@ class BundledMessageFunction(MessageFunction):
            if ret is None:
                ret = msg
            else:
-                try:
-                    # ret and msg must be dict
-                    ret.update(msg)
-                except:
-                    raise RuntimeError("Must specify out field for multiple message")
+                # ret and msg must be dict
+                ret.update(msg)
        return ret

    def name(self):
@@ -55,25 +54,26 @@ class BundledMessageFunction(MessageFunction):


 def _is_spmv_supported_node_feat(g, field):
-    if field is None:
-        feat = g.get_n_repr()
-    else:
-        feat = g.get_n_repr()[field]
+    """Return whether the node feature shape supports SPMV optimization.
+
+    Only scalar and vector features are supported currently.
+    """
+    feat = g.get_n_repr()[field]
    shape = F.shape(feat)
    return len(shape) == 1 or len(shape) == 2

 def _is_spmv_supported_edge_feat(g, field):
-    # check shape, only scalar edge feature can be optimized at the moment
-    if field is None:
-        feat = g.get_e_repr()
-    else:
-        feat = g.get_e_repr()[field]
+    """Return whether the edge feature shape supports SPMV optimization.
+
+    Only scalar feature is supported currently.
+    """
+    feat = g.get_e_repr()[field]
    shape = F.shape(feat)
    return len(shape) == 1 or (len(shape) == 2 and shape[1] == 1)


 class SrcMulEdgeMessageFunction(MessageFunction):
-    def __init__(self, mul_op, src_field=None, edge_field=None, out_field=None):
+    def __init__(self, mul_op, src_field, edge_field, out_field):
        self.mul_op = mul_op
        self.src_field = src_field
        self.edge_field = edge_field
@@ -84,21 +84,14 @@ class SrcMulEdgeMessageFunction(MessageFunction):
                and _is_spmv_supported_edge_feat(g, self.edge_field)

    def __call__(self, src, edge):
-        if self.src_field is not None:
-            src = src[self.src_field]
-        if self.edge_field is not None:
-            edge = edge[self.edge_field]
-        ret = self.mul_op(src, edge)
-        if self.out_field is None:
-            return ret
-        else:
-            return {self.out_field : ret}
+        ret = self.mul_op(src[self.src_field], edge[self.edge_field])
+        return {self.out_field : ret}

    def name(self):
        return "src_mul_edge"

 class CopySrcMessageFunction(MessageFunction):
-    def __init__(self, src_field=None, out_field=None):
+    def __init__(self, src_field, out_field):
        self.src_field = src_field
        self.out_field = out_field

@@ -106,14 +99,7 @@ class CopySrcMessageFunction(MessageFunction):
        return _is_spmv_supported_node_feat(g, self.src_field)

    def __call__(self, src, edge):
-        if self.src_field is not None:
-            ret = src[self.src_field]
-        else:
-            ret = src
-        if self.out_field is None:
-            return ret
-        else:
-            return {self.out_field : ret}
+        return {self.out_field : src[self.src_field]}

    def name(self):
        return "copy_src"
@@ -142,14 +128,41 @@ class CopyEdgeMessageFunction(MessageFunction):
        return "copy_edge"


-def src_mul_edge(src=None, edge=None, out=None):
-    """TODO(minjie): docstring """
+def src_mul_edge(src, edge, out):
+    """Builtin message function that computes message by multiplying source node features
+    with edge features.
+
+    Parameters
+    ----------
+    src : str
+        The source feature name.
+    edge : str
+        The edge feature name.
+    out : str
+        The output message name.
+    """
    return SrcMulEdgeMessageFunction(operator.mul, src, edge, out)

-def copy_src(src=None, out=None):
-    """TODO(minjie): docstring """
+def copy_src(src, out):
+    """Builtin message function that computes message using source node feature.
+
+    Parameters
+    ----------
+    src : str
+        The source feature name.
+    out : str
+        The output message name.
+    """
    return CopySrcMessageFunction(src, out)

-def copy_edge(edge=None, out=None):
-    """TODO(minjie): docstring """
+def copy_edge(edge, out):
+    """Builtin message function that computes message using edge feature.
+
+    Parameters
+    ----------
+    edge : str
+        The edge feature name.
+    out : str
+        The output message name.
+    """
    return CopyEdgeMessageFunction(edge, out)
--- a/python/dgl/function/reducer.py
+++ b/python/dgl/function/reducer.py
 """Built-in reducer function."""
 from __future__ import absolute_import

-import dgl.backend as F
+from .. import backend as F

-__all__ = ["ReduceFunction", "sum", "max"]
+__all__ = ["sum", "max"]

 class ReduceFunction(object):
+    """Base builtin reduce function class."""
+
    def __call__(self, node, msgs):
+        """Regular computation of this builtin.
+
+        This will be used when optimization is not available.
+        """
        raise NotImplementedError

    def name(self):
+        """Return the name of this builtin function."""
        raise NotImplementedError

    def is_spmv_supported(self):
+        """Return whether the SPMV optimization is supported."""
        raise NotImplementedError

 class BundledReduceFunction(ReduceFunction):
    def __init__(self, fn_list):
        if not isinstance(fn_list, (list, tuple)):
            fn_list = [fn_list]
-        else:
-            # sanity check on out field
-            for fn in fn_list:
-                if isinstance(fn, ReduceFunction) and fn.out_field is None:
-                    raise RuntimeError("Not specifying out field for multiple reduce is ambiguous")
        self.fn_list = fn_list

    def is_spmv_supported(self):
@@ -39,51 +42,50 @@ class BundledReduceFunction(ReduceFunction):
            if ret is None:
                ret = rpr
            else:
-                try:
-                    # ret and rpr must be dict
-                    ret.update(rpr)
-                except:
-                    raise RuntimeError("Must specify out field for multiple reudce")
+                # ret and rpr must be dict
+                ret.update(rpr)
        return ret

    def name(self):
        return "bundled"

 class ReducerFunctionTemplate(ReduceFunction):
-    def __init__(self, name, batch_op, nonbatch_op, msg_field=None, out_field=None):
+    def __init__(self, name, op, msg_field, out_field):
        self.name = name
-        self.batch_op = batch_op
-        self.nonbatch_op = nonbatch_op
+        self.op = op
        self.msg_field = msg_field
        self.out_field = out_field

    def is_spmv_supported(self):
-        # TODO: support max
+        # NOTE: only sum is supported right now.
        return self.name == "sum"

    def __call__(self, node, msgs):
-        if isinstance(msgs, list):
-            if self.msg_field is None:
-                ret = self.nonbatch_op(msgs)
-            else:
-                ret = self.nonbatch_op([msg[self.msg_field] for msg in msgs])
-        else:
-            if self.msg_field is None:
-                ret = self.batch_op(msgs, 1)
-            else:
-                ret = self.batch_op(msgs[self.msg_field], 1)
-        if self.out_field is None:
-            return ret
-        else:
-            return {self.out_field : ret}
+        return {self.out_field : self.op(msgs[self.msg_field], 1)}

    def name(self):
        return self.name

-_python_sum = sum
-def sum(msgs=None, out=None):
-    return ReducerFunctionTemplate("sum", F.sum, _python_sum, msgs, out)
+def sum(msg, out):
+    """Builtin reduce function that aggregates messages by sum.
+
+    Parameters
+    ----------
+    msg : str
+        The message name.
+    out : str
+        The output node feature name.
+    """
+    return ReducerFunctionTemplate("sum", F.sum, msg, out)
+
+def max(msg, out):
+    """Builtin reduce function that aggregates messages by max.

-_python_max = max
-def max(msgs=None, out=None):
-    return ReducerFunctionTemplate("max", F.max, _python_max, msgs, out)
+    Parameters
+    ----------
+    msg : str
+        The message name.
+    out : str
+        The output node feature name.
+    """
+    return ReducerFunctionTemplate("max", F.max, msg, out)
--- a/python/dgl/generator/__init__.py
+++ b/python/dgl/generator/__init__.py
-from .line import *
--- a/python/dgl/generator/line.py
+++ b/python/dgl/generator/line.py
-"""Line graph generator."""
-from __future__ import absolute_import
-
-import networkx as nx
-import numpy as np
-
-import dgl.backend as F
-from dgl.graph import DGLGraph
-from dgl.frame import FrameRef
-
-def line_graph(G, no_backtracking=False):
-    """Create the line graph that shares the underlying features.
-
-    The node features of the result line graph will share the edge features
-    of the given graph.
-
-    Parameters
-    ----------
-    G : DGLGraph
-        The input graph.
-    no_backtracking : bool
-        Whether the backtracking edges are included in the line graph.
-        If i~j and j~i are two edges in original graph G, then
-        (i,j)~(j,i) and (j,i)~(i,j) are the "backtracking" edges on
-        the line graph.
-    """
-    L = nx.DiGraph()
-    for eid, from_node in enumerate(G.edge_list):
-        L.add_node(from_node)
-        for to_node in G.edges(from_node[1]):
-            if no_backtracking and to_node[1] == from_node[0]:
-                continue
-            L.add_edge(from_node, to_node)
-    relabel_map = {}
-    for i, e in enumerate(G.edge_list):
-        relabel_map[e] = i
-    nx.relabel.relabel_nodes(L, relabel_map, copy=False)
-    return DGLGraph(L, node_frame=G._edge_frame)
--- a/python/dgl/graph.py
+++ b/python/dgl/graph.py
@@ -3,225 +3,707 @@
 from __future__ import absolute_import

 import networkx as nx
-from networkx.classes.digraph import DiGraph
+import numpy as np

 import dgl
-from dgl.base import ALL, is_all, __MSG__, __REPR__
-import dgl.backend as F
-from dgl.backend import Tensor
-from dgl.cached_graph import CachedGraph, create_cached_graph
-import dgl.context as context
-from dgl.frame import FrameRef, merge_frames
-from dgl.nx_adapt import nx_init
-import dgl.scheduler as scheduler
-import dgl.utils as utils
-from dgl.function.message import BundledMessageFunction
-from dgl.function.reducer import BundledReduceFunction
-
-class DGLGraph(DiGraph):
+from .base import ALL, is_all, DGLError, dgl_warning
+from . import backend as F
+from .backend import Tensor
+from .frame import FrameRef, merge_frames
+from .function.message import BundledMessageFunction
+from .function.reducer import BundledReduceFunction
+from .graph_index import GraphIndex, create_graph_index
+from . import scheduler
+from . import utils
+
+__all__ = ['DLGraph']
+
+class DGLGraph(object):
    """Base graph class specialized for neural networks on graphs.

    TODO(minjie): document of batching semantics
-    TODO(minjie): document of __REPR__ semantics

    Parameters
    ----------
    graph_data : graph data
        Data to initialize graph. Same as networkx's semantics.
-    node_frame : dgl.frame.Frame
+    node_frame : FrameRef
        Node feature storage.
-    edge_frame : dgl.frame.Frame
+    edge_frame : FrameRef
        Edge feature storage.
-    attr : keyword arguments, optional
-        Attributes to add to graph as key=value pairs.
+    multigraph : bool, optional
+        Whether the graph would be a multigraph (default: False)
    """
    def __init__(self,
                 graph_data=None,
                 node_frame=None,
                 edge_frame=None,
-                 **attr):
-        # TODO(minjie): maintaining node/edge list is costly when graph is large.
-        self._edge_list = []
-        nx_init(self,
-                self._add_node_callback,
-                self._add_edge_callback,
-                self._del_node_callback,
-                self._del_edge_callback,
-                graph_data,
-                **attr)
-        # cached graph and storage
-        self._cached_graph = None
+                 multigraph=False):
+        # graph
+        self._graph = create_graph_index(graph_data, multigraph)
+        # frame
        self._node_frame = node_frame if node_frame is not None else FrameRef()
        self._edge_frame = edge_frame if edge_frame is not None else FrameRef()
-        # other class members
-        self._msg_graph = None
+        # msg graph & frame
+        self._msg_graph = create_graph_index(multigraph=multigraph)
        self._msg_frame = FrameRef()
-        self._message_func = (None, None)
-        self._reduce_func = (None, None)
-        self._edge_func = (None, None)
-        self._apply_node_func = (None, None)
-        self._apply_edge_func = (None, None)
+        self._msg_edges = []
+        self.reset_messages()
+        # registered functions
+        self._message_func = None
+        self._reduce_func = None
+        self._edge_func = None
+        self._apply_node_func = None
+        self._apply_edge_func = None
+
+    def add_nodes(self, num, reprs=None):
+        """Add nodes.
+
+        Parameters
+        ----------
+        num : int
+            Number of nodes to be added.
+        reprs : dict
+            Optional node representations.
+        """
+        self._graph.add_nodes(num)
+        self._msg_graph.add_nodes(num)
+        #TODO(minjie): change frames
+        assert reprs is None
+
+    def add_edge(self, u, v, reprs=None):
+        """Add one edge.
+
+        Parameters
+        ----------
+        u : int
+            The src node.
+        v : int
+            The dst node.
+        reprs : dict
+            Optional edge representation.
+        """
+        self._graph.add_edge(u, v)
+        #TODO(minjie): change frames
+        assert reprs is None
+
+    def add_edges(self, u, v, reprs=None):
+        """Add many edges.
+
+        Parameters
+        ----------
+        u : list, tensor
+            The src nodes.
+        v : list, tensor
+            The dst nodes.
+        reprs : dict
+            Optional node representations.
+        """
+        u = utils.toindex(u)
+        v = utils.toindex(v)
+        self._graph.add_edges(u, v)
+        #TODO(minjie): change frames
+        assert reprs is None
+
+    def clear(self):
+        """Clear the graph and its storage."""
+        self._graph.clear()
+        self._node_frame.clear()
+        self._edge_frame.clear()
+        self._msg_graph.clear()
+        self._msg_frame.clear()
+        self._msg_edges.clear()
+
+    def reset_messages(self):
+        """Clear all messages."""
+        self._msg_graph.clear()
+        self._msg_frame.clear()
+        self._msg_edges.clear()
+        self._msg_graph.add_nodes(self.number_of_nodes())
+
+    def number_of_nodes(self):
+        """Return the number of nodes.
+
+        Returns
+        -------
+        int
+            The number of nodes
+        """
+        return self._graph.number_of_nodes()
+
+    def __len__(self):
+        """Return the number of nodes."""
+        return self.number_of_nodes()
+
+    @property
+    def is_multigraph(self):
+        """Whether the graph is a multigraph.
+        """
+        return self._graph.is_multigraph()
+
+    def number_of_edges(self):
+        """Return the number of edges.
+
+        Returns
+        -------
+        int
+            The number of edges
+        """
+        return self._graph.number_of_edges()
+
+    def has_node(self, vid):
+        """Return true if the node exists.
+
+        Parameters
+        ----------
+        vid : int
+            The nodes
+
+        Returns
+        -------
+        bool
+            True if the node exists
+        """
+        return self.has_node(vid)
+
+    def __contains__(self, vid):
+        """Same as has_node."""
+        return self.has_node(vid)
+
+    def has_nodes(self, vids):
+        """Return true if the nodes exist.
+
+        Parameters
+        ----------
+        vid : list, tensor
+            The nodes
+
+        Returns
+        -------
+        tensor
+            0-1 array indicating existence
+        """
+        vids = utils.toindex(vids)
+        rst = self._graph.has_nodes(vids)
+        return rst.tousertensor()
+
+    def has_edge_between(self, u, v):
+        """Return true if the edge exists.
+
+        Parameters
+        ----------
+        u : int
+            The src node.
+        v : int
+            The dst node.
+
+        Returns
+        -------
+        bool
+            True if the edge exists
+        """
+        return self._graph.has_edge_between(u, v)
+
+    def has_edges_between(self, u, v):
+        """Return true if the edge exists.
+
+        Parameters
+        ----------
+        u : list, tensor
+            The src nodes.
+        v : list, tensor
+            The dst nodes.
+
+        Returns
+        -------
+        tensor
+            0-1 array indicating existence
+        """
+        u = utils.toindex(u)
+        v = utils.toindex(v)
+        rst = self._graph.has_edges_between(u, v)
+        return rst.tousertensor()
+
+    def predecessors(self, v, radius=1):
+        """Return the predecessors of the node.
+
+        Parameters
+        ----------
+        v : int
+            The node.
+        radius : int, optional
+            The radius of the neighborhood.
+
+        Returns
+        -------
+        tensor
+            Array of predecessors
+        """
+        return self._graph.predecessors(v).tousertensor()
+
+    def successors(self, v, radius=1):
+        """Return the successors of the node.
+
+        Parameters
+        ----------
+        v : int
+            The node.
+        radius : int, optional
+            The radius of the neighborhood.
+
+        Returns
+        -------
+        tensor
+            Array of successors
+        """
+        return self._graph.successors(v).tousertensor()
+
+    def edge_id(self, u, v, force_multi=False):
+        """Return the id of the edge.
+
+        Parameters
+        ----------
+        u : int
+            The src node.
+        v : int
+            The dst node.
+        force_multi : bool
+            If False, will return a single edge ID if the graph is a simple graph.
+            If True, will always return an array.
+
+        Returns
+        -------
+        int or tensor
+            The edge id if force_multi == True and the graph is a simple graph.
+            The edge id array otherwise.
+        """
+        idx = self._graph.edge_id(u, v)
+        return idx.tousertensor() if force_multi or self.is_multigraph else idx[0]
+
+    def edge_ids(self, u, v, force_multi=False):
+        """Return the edge ids.
+
+        Parameters
+        ----------
+        u : list, tensor
+            The src nodes.
+        v : list, tensor
+            The dst nodes.
+        force_multi : bool
+            If False, will return a single edge ID array if the graph is a simple graph.
+            If True, will always return 3 arrays (src nodes, dst nodes, edge ids).
+
+        Returns
+        -------
+        tensor, or (tensor, tensor, tensor)
+        If force_multi is True or the graph is multigraph, return (src nodes, dst nodes, edge ids)
+        Otherwise, return a single tensor of edge ids.
+        """
+        u = utils.toindex(u)
+        v = utils.toindex(v)
+        src, dst, eid = self._graph.edge_ids(u, v)
+        if force_multi or self.is_multigraph:
+            return src.tousertensor(), dst.tousertensor(), eid.tousertensor()
+        else:
+            return eid.tousertensor()
+
+    def in_edges(self, v):
+        """Return the in edges of the node(s).
+
+        Parameters
+        ----------
+        v : int, list, tensor
+            The node(s).
+
+        Returns
+        -------
+        tensor
+            The src nodes.
+        tensor
+            The dst nodes.
+        tensor
+            The edge ids.
+        """
+        v = utils.toindex(v)
+        src, dst, eid = self._graph.in_edges(v)
+        return src.tousertensor(), dst.tousertensor(), eid.tousertensor()
+
+    def out_edges(self, v):
+        """Return the out edges of the node(s).
+
+        Parameters
+        ----------
+        v : int, list, tensor
+            The node(s).
+
+        Returns
+        -------
+        tensor
+            The src nodes.
+        tensor
+            The dst nodes.
+        tensor
+            The edge ids.
+        """
+        v = utils.toindex(v)
+        src, dst, eid = self._graph.out_edges(v)
+        return src.tousertensor(), dst.tousertensor(), eid.tousertensor()
+
+    def edges(self, sorted=False):
+        """Return all the edges.
+
+        Parameters
+        ----------
+        sorted : bool
+            True if the returned edges are sorted by their src and dst ids.
+
+        Returns
+        -------
+        tensor
+            The src nodes.
+        tensor
+            The dst nodes.
+        tensor
+            The edge ids.
+        """
+        src, dst, eid = self._graph.edges(sorted)
+        return src.tousertensor(), dst.tousertensor(), eid.tousertensor()
+
+    def in_degree(self, v):
+        """Return the in degree of the node.
+
+        Parameters
+        ----------
+        v : int
+            The node.
+
+        Returns
+        -------
+        int
+            The in degree.
+        """
+        return self._graph.in_degree(v)
+
+    def in_degrees(self, v):
+        """Return the in degrees of the nodes.
+
+        Parameters
+        ----------
+        v : list, tensor
+            The nodes.
+
+        Returns
+        -------
+        tensor
+            The in degree array.
+        """
+        return self._graph.in_degrees(v).tousertensor()
+
+    def out_degree(self, v):
+        """Return the out degree of the node.
+
+        Parameters
+        ----------
+        v : int
+            The node.
+
+        Returns
+        -------
+        int
+            The out degree.
+        """
+        return self._graph.out_degree(v)
+
+    def out_degrees(self, v):
+        """Return the out degrees of the nodes.
+
+        Parameters
+        ----------
+        v : list, tensor
+            The nodes.
+
+        Returns
+        -------
+        tensor
+            The out degree array.
+        """
+        return self._graph.out_degrees(v).tousertensor()
+
+    def to_networkx(self, node_attrs=None, edge_attrs=None):
+        """Convert to networkx graph.
+
+        The edge id will be saved as the 'id' edge attribute.
+
+        Parameters
+        ----------
+        node_attrs : iterable of str, optional
+            The node attributes to be copied.
+        edge_attrs : iterable of str, optional
+            The edge attributes to be copied.
+
+        Returns
+        -------
+        networkx.DiGraph
+            The nx graph
+        """
+        nx_graph = self._graph.to_networkx()
+        #TODO(minjie): attributes
+        dgl_warning('to_networkx currently does not support converting'
+                    ' node/edge features automatically.')
+        return nx_graph
+
+    def from_networkx(self, nx_graph, node_attrs=None, edge_attrs=None):
+        """Convert from networkx graph.
+
+        If 'id' edge attribute exists, the edge will be added follows
+        the edge id order. Otherwise, order is undefined.
+
+        Parameters
+        ----------
+        nx_graph : networkx.DiGraph
+            The nx graph
+        node_attrs : iterable of str, optional
+            The node attributes needs to be copied.
+        edge_attrs : iterable of str, optional
+            The edge attributes needs to be copied.
+        """
+        self.clear()
+        self._graph.from_networkx(nx_graph)
+        self._msg_graph.add_nodes(self._graph.number_of_nodes())
+        # copy attributes
+        def _batcher(lst):
+            if isinstance(lst[0], Tensor):
+                return F.pack([F.unsqueeze(x, 0) for x in lst])
+            else:
+                return F.tensor(lst)
+        if node_attrs is not None:
+            attr_dict = {attr : [] for attr in node_attrs}
+            for nid in range(self.number_of_nodes()):
+                for attr in node_attrs:
+                    attr_dict[attr].append(nx_graph.nodes[nid][attr])
+            for attr in node_attrs:
+                self._node_frame[attr] = _batcher(attr_dict[attr])
+        if edge_attrs is not None:
+            attr_dict = {attr : [] for attr in edge_attrs}
+            src, dst, _ = self._graph.edges()
+            for u, v in zip(src.tolist(), dst.tolist()):
+                for attr in edge_attrs:
+                    attr_dict[attr].append(nx_graph.edges[u, v][attr])
+            for attr in edge_attrs:
+                self._edge_frame[attr] = _batcher(attr_dict[attr])
+
+    def from_scipy_sparse_matrix(self, a):
+        """ Convert from scipy sparse matrix.
+
+        Parameters
+        ----------
+        a : scipy sparse matrix
+            The graph's adjacency matrix
+        """
+        self.clear()
+        self._graph.from_scipy_sparse_matrix(a)
+        self._msg_graph.add_nodes(self._graph.number_of_nodes())

    def node_attr_schemes(self):
+        """Return the node feature schemes.
+
+        Returns
+        -------
+        dict of str to schemes
+            The schemes of node feature columns.
+        """
        return self._node_frame.schemes

    def edge_attr_schemes(self):
+        """Return the edge feature schemes.
+
+        Returns
+        -------
+        dict of str to schemes
+            The schemes of edge feature columns.
+        """
        return self._edge_frame.schemes

-    def set_n_repr(self, hu, u=ALL):
+    def set_n_initializer(self, initializer):
+        """Set the initializer for empty node features.
+
+        Initializer is a callable that returns a tensor given the shape and data type.
+
+        Parameters
+        ----------
+        initializer : callable
+            The initializer.
+        """
+        self._node_frame.set_initializer(initializer)
+
+    def set_e_initializer(self, initializer):
+        """Set the initializer for empty edge features.
+
+        Initializer is a callable that returns a tensor given the shape and data type.
+
+        Parameters
+        ----------
+        initializer : callable
+            The initializer.
+        """
+        self._edge_frame.set_initializer(initializer)
+
+    def set_n_repr(self, hu, u=ALL, inplace=False):
        """Set node(s) representation.

-        To set multiple node representations at once, pass `u` with a tensor or
-        a supported container of node ids. In this case, `hu` must be a tensor
-        of shape (B, D1, D2, ...), where B is the number of the nodes and
-        (D1, D2, ...) is the shape of the node representation tensor.
+        `hu` is a dictionary from the feature name to feature tensor. Each tensor
+        is of shape (B, D1, D2, ...), where B is the number of nodes to be updated,
+        and (D1, D2, ...) be the shape of the node representation tensor. The
+        length of the given node ids must match B (i.e, len(u) == B).

-        Dictionary type is also supported for `hu`. In this case, each item
-        will be treated as separate attribute of the nodes.
+        All update will be done out-placely to work with autograd unless the inplace
+        flag is true.

        Parameters
        ----------
-        hu : tensor or dict of tensor
-          Node representation.
+        hu : dict of tensor
+            Node representation.
        u : node, container or tensor
-          The node(s).
+            The node(s).
+        inplace : bool
+            True if the update is done inplacely
        """
        # sanity check
+        if not utils.is_dict_like(hu):
+            raise DGLError('Expect dictionary type for feature data.'
+                           ' Got "%s" instead.' % type(hu))
        if is_all(u):
            num_nodes = self.number_of_nodes()
        else:
            u = utils.toindex(u)
            num_nodes = len(u)
-        if utils.is_dict_like(hu):
-            for key, val in hu.items():
-                assert F.shape(val)[0] == num_nodes
-        else:
-            assert F.shape(hu)[0] == num_nodes
+        for key, val in hu.items():
+            nfeats = F.shape(val)[0]
+            if nfeats != num_nodes:
+                raise DGLError('Expect number of features to match number of nodes (len(u)).'
+                               ' Got %d and %d instead.' % (nfeats, num_nodes))
        # set
        if is_all(u):
-            if utils.is_dict_like(hu):
-                for key, val in hu.items():
-                    self._node_frame[key] = val
-            else:
-                self._node_frame[__REPR__] = hu
+            for key, val in hu.items():
+                self._node_frame[key] = val
        else:
-            if utils.is_dict_like(hu):
-                self._node_frame[u] = hu
-            else:
-                self._node_frame[u] = {__REPR__ : hu}
+            self._node_frame.update_rows(u, hu, inplace=inplace)

    def get_n_repr(self, u=ALL):
        """Get node(s) representation.

+        The returned feature tensor batches multiple node features on the first dimension.
+
        Parameters
        ----------
        u : node, container or tensor
-          The node(s).
+            The node(s).
+
+        Returns
+        -------
+        dict
+            Representation dict from feature name to feature tensor.
        """
+        if len(self.node_attr_schemes()) == 0:
+            return dict()
        if is_all(u):
-            if len(self._node_frame) == 1 and __REPR__ in self._node_frame:
-                return self._node_frame[__REPR__]
-            else:
-                return dict(self._node_frame)
+            return dict(self._node_frame)
        else:
            u = utils.toindex(u)
-            if len(self._node_frame) == 1 and __REPR__ in self._node_frame:
-                return self._node_frame.select_rows(u)[__REPR__]
-            else:
-                return self._node_frame.select_rows(u)
+            return self._node_frame.select_rows(u)

-    def pop_n_repr(self, key=__REPR__):
+    def pop_n_repr(self, key):
        """Get and remove the specified node repr.

        Parameters
        ----------
        key : str
-          The attribute name.
+            The attribute name.
+
+        Returns
+        -------
+        Tensor
+            The popped representation
        """
        return self._node_frame.pop(key)

-    def set_e_repr(self, h_uv, u=ALL, v=ALL):
+    def set_e_repr(self, he, u=ALL, v=ALL, inplace=False):
        """Set edge(s) representation.

-        To set multiple edge representations at once, pass `u` and `v` with tensors or
-        supported containers of node ids. In this case, `h_uv` must be a tensor
-        of shape (B, D1, D2, ...), where B is the number of the edges and
-        (D1, D2, ...) is the shape of the edge representation tensor.
+        `he` is a dictionary from the feature name to feature tensor. Each tensor
+        is of shape (B, D1, D2, ...), where B is the number of edges to be updated,
+        and (D1, D2, ...) be the shape of the edge representation tensor.

-        Dictionary type is also supported for `h_uv`. In this case, each item
-        will be treated as separate attribute of the edges.
+        All update will be done out-placely to work with autograd unless the inplace
+        flag is true.

        Parameters
        ----------
-        h_uv : tensor or dict of tensor
+        he : tensor or dict of tensor
          Edge representation.
        u : node, container or tensor
          The source node(s).
        v : node, container or tensor
          The destination node(s).
+        inplace : bool
+            True if the update is done inplacely
        """
        # sanity check
+        if not utils.is_dict_like(he):
+            raise DGLError('Expect dictionary type for feature data.'
+                           ' Got "%s" instead.' % type(he))
        u_is_all = is_all(u)
        v_is_all = is_all(v)
        assert u_is_all == v_is_all
        if u_is_all:
-            num_edges = self.cached_graph.num_edges()
+            self.set_e_repr_by_id(he, eid=ALL, inplace=inplace)
        else:
            u = utils.toindex(u)
            v = utils.toindex(v)
-            num_edges = max(len(u), len(v))
-        if utils.is_dict_like(h_uv):
-            for key, val in h_uv.items():
-                assert F.shape(val)[0] == num_edges
-        else:
-            assert F.shape(h_uv)[0] == num_edges
-        # set
-        if u_is_all:
-            if utils.is_dict_like(h_uv):
-                for key, val in h_uv.items():
-                    self._edge_frame[key] = val
-            else:
-                self._edge_frame[__REPR__] = h_uv
-        else:
-            eid = self.cached_graph.get_edge_id(u, v)
-            if utils.is_dict_like(h_uv):
-                self._edge_frame[eid] = h_uv
-            else:
-                self._edge_frame[eid] = {__REPR__ : h_uv}
+            _, _, eid = self._graph.edge_ids(u, v)
+            self.set_e_repr_by_id(he, eid=eid, inplace=inplace)

-    def set_e_repr_by_id(self, h_uv, eid=ALL):
+    def set_e_repr_by_id(self, he, eid=ALL, inplace=False):
        """Set edge(s) representation by edge id.

+        `he` is a dictionary from the feature name to feature tensor. Each tensor
+        is of shape (B, D1, D2, ...), where B is the number of edges to be updated,
+        and (D1, D2, ...) be the shape of the edge representation tensor.
+
+        All update will be done out-placely to work with autograd unless the inplace
+        flag is true.
+
        Parameters
        ----------
-        h_uv : tensor or dict of tensor
+        he : tensor or dict of tensor
          Edge representation.
        eid : int, container or tensor
          The edge id(s).
+        inplace : bool
+            True if the update is done inplacely
        """
        # sanity check
+        if not utils.is_dict_like(he):
+            raise DGLError('Expect dictionary type for feature data.'
+                           ' Got "%s" instead.' % type(he))
        if is_all(eid):
-            num_edges = self.cached_graph.num_edges()
+            num_edges = self.number_of_edges()
        else:
            eid = utils.toindex(eid)
            num_edges = len(eid)
-        if utils.is_dict_like(h_uv):
-            for key, val in h_uv.items():
-                assert F.shape(val)[0] == num_edges
-        else:
-            assert F.shape(h_uv)[0] == num_edges
+        for key, val in he.items():
+            nfeats = F.shape(val)[0]
+            if nfeats != num_edges:
+                raise DGLError('Expect number of features to match number of edges.'
+                               ' Got %d and %d instead.' % (nfeats, num_edges))
        # set
        if is_all(eid):
-            if utils.is_dict_like(h_uv):
-                for key, val in h_uv.items():
-                    self._edge_frame[key] = val
-            else:
-                self._edge_frame[__REPR__] = h_uv
+            # update column
+            for key, val in he.items():
+                self._edge_frame[key] = val
        else:
-            if utils.is_dict_like(h_uv):
-                self._edge_frame[eid] = h_uv
-            else:
-                self._edge_frame[eid] = {__REPR__ : h_uv}
+            # update row
+            self._edge_frame.update_rows(eid, he, inplace=inplace)

    def get_e_repr(self, u=ALL, v=ALL):
        """Get node(s) representation.
@@ -232,31 +714,37 @@ class DGLGraph(DiGraph):
          The source node(s).
        v : node, container or tensor
          The destination node(s).
+
+        Returns
+        -------
+        dict
+            Representation dict
        """
        u_is_all = is_all(u)
        v_is_all = is_all(v)
        assert u_is_all == v_is_all
+        if len(self.edge_attr_schemes()) == 0:
+            return dict()
        if u_is_all:
-            if len(self._edge_frame) == 1 and __REPR__ in self._edge_frame:
-                return self._edge_frame[__REPR__]
-            else:
-                return dict(self._edge_frame)
+            return self.get_e_repr_by_id(eid=ALL)
        else:
            u = utils.toindex(u)
            v = utils.toindex(v)
-            eid = self.cached_graph.get_edge_id(u, v)
-            if len(self._edge_frame) == 1 and __REPR__ in self._edge_frame:
-                return self._edge_frame.select_rows(eid)[__REPR__]
-            else:
-                return self._edge_frame.select_rows(eid)
+            _, _, eid = self._graph.edge_ids(u, v)
+            return self.get_e_repr_by_id(eid=eid)

-    def pop_e_repr(self, key=__REPR__):
+    def pop_e_repr(self, key):
        """Get and remove the specified edge repr.

        Parameters
        ----------
        key : str
          The attribute name.
+
+        Returns
+        -------
+        Tensor
+            The popped representation
        """
        return self._edge_frame.pop(key)

@@ -267,150 +755,142 @@ class DGLGraph(DiGraph):
        ----------
        eid : int, container or tensor
          The edge id(s).
+
+        Returns
+        -------
+        dict
+            Representation dict from feature name to feature tensor.
        """
+        if len(self.edge_attr_schemes()) == 0:
+            return dict()
        if is_all(eid):
-            if len(self._edge_frame) == 1 and __REPR__ in self._edge_frame:
-                return self._edge_frame[__REPR__]
-            else:
-                return dict(self._edge_frame)
+            return dict(self._edge_frame)
        else:
            eid = utils.toindex(eid)
-            if len(self._edge_frame) == 1 and __REPR__ in self._edge_frame:
-                return self._edge_frame.select_rows(eid)[__REPR__]
-            else:
-                return self._edge_frame.select_rows(eid)
+            return self._edge_frame.select_rows(eid)

-    def register_edge_func(self,
-                           edge_func,
-                           batchable=False):
+    def register_edge_func(self, edge_func):
        """Register global edge update function.

        Parameters
        ----------
        edge_func : callable
          Message function on the edge.
-        batchable : bool
-          Whether the provided message function allows batch computing.
        """
-        self._edge_func = (edge_func, batchable)
+        self._edge_func = edge_func

-    def register_message_func(self,
-                              message_func,
-                              batchable=False):
+    def register_message_func(self, message_func):
        """Register global message function.

        Parameters
        ----------
        message_func : callable
          Message function on the edge.
-        batchable : bool
-          Whether the provided message function allows batch computing.
        """
-        self._message_func = (message_func, batchable)
+        self._message_func = message_func

-    def register_reduce_func(self,
-                             reduce_func,
-                             batchable=False):
+    def register_reduce_func(self, reduce_func):
        """Register global message reduce function.

        Parameters
        ----------
        reduce_func : str or callable
          Reduce function on incoming edges.
-        batchable : bool
-          Whether the provided reduce function allows batch computing.
        """
-        self._reduce_func = (reduce_func, batchable)
+        self._reduce_func = reduce_func

-    def register_apply_node_func(self,
-                                 apply_node_func,
-                                 batchable=False):
+    def register_apply_node_func(self, apply_node_func):
        """Register global node apply function.

        Parameters
        ----------
        apply_node_func : callable
          Apply function on the node.
-        batchable : bool
-          Whether the provided function allows batch computing.
        """
-        self._apply_node_func = (apply_node_func, batchable)
+        self._apply_node_func = apply_node_func

-    def register_apply_edge_func(self,
-                                 apply_edge_func,
-                                 batchable=False):
+    def register_apply_edge_func(self, apply_edge_func):
        """Register global edge apply function.

        Parameters
        ----------
        apply_edge_func : callable
          Apply function on the edge.
-        batchable : bool
-          Whether the provided function allows batch computing.
        """
-        self._apply_edge_func = (apply_edge_func, batchable)
+        self._apply_edge_func = apply_edge_func

-    def apply_nodes(self, v, apply_node_func="default", batchable=False):
+    def apply_nodes(self, v=ALL, apply_node_func="default"):
        """Apply the function on node representations.

+        Applying a None function will be ignored.
+
        Parameters
        ----------
-        v : int, iterable of int, tensor
+        v : int, iterable of int, tensor, optional
          The node id(s).
        apply_node_func : callable
          The apply node function.
-        batchable : bool
-          Whether the provided function allows batch computing.
+        """
+        self._apply_nodes(v, apply_node_func)
+
+    def _apply_nodes(self, v, apply_node_func="default", reduce_accum=None):
+        """Internal apply nodes
+
+        Parameters
+        ----------
+        reduce_accum: dict-like
+          The output of reduce func
        """
        if apply_node_func == "default":
-            apply_node_func, batchable = self._apply_node_func
+            apply_node_func = self._apply_node_func
        if not apply_node_func:
            # Skip none function call.
+            if reduce_accum is not None:
+                # write reduce result back
+                self.set_n_repr(reduce_accum, v)
            return
-        if batchable:
-            new_repr = apply_node_func(self.get_n_repr(v))
-            self.set_n_repr(new_repr, v)
-        else:
-            if is_all(v):
-                v = self.nodes()
-            v = utils.toindex(v)
-            for vv in utils.node_iter(v):
-                ret = apply_node_func(_get_repr(self.nodes[vv]))
-                _set_repr(self.nodes[vv], ret)
-
-    def apply_edges(self, u, v, apply_edge_func="default", batchable=False):
+        # take out current node repr
+        curr_repr = self.get_n_repr(v)
+        if reduce_accum is not None:
+            # merge current node_repr with reduce output
+            curr_repr = utils.HybridDict(reduce_accum, curr_repr)
+        new_repr = apply_node_func(curr_repr)
+        if reduce_accum is not None:
+            # merge new node_repr with reduce output
+            reduce_accum.update(new_repr)
+            new_repr = reduce_accum
+        self.set_n_repr(new_repr, v)
+
+    def apply_edges(self, u=None, v=None, apply_edge_func="default", eid=None):
        """Apply the function on edge representations.

+        Applying a None function will be ignored.
+
        Parameters
        ----------
-        u : int, iterable of int, tensor
+        u : optional, int, iterable of int, tensor
          The src node id(s).
-        v : int, iterable of int, tensor
+        v : optional, int, iterable of int, tensor
          The dst node id(s).
        apply_edge_func : callable
          The apply edge function.
-        batchable : bool
-          Whether the provided function allows batch computing.
+        eid : None, edge, container or tensor
+          The edge to update on.  If eid is not None then u and v are ignored.
        """
        if apply_edge_func == "default":
-            apply_edge_func, batchable = self._apply_edge_func
+            apply_edge_func = self._apply_edge_func
        if not apply_edge_func:
            # Skip none function call.
            return
-        if batchable:
+        if eid is None:
            new_repr = apply_edge_func(self.get_e_repr(u, v))
            self.set_e_repr(new_repr, u, v)
        else:
-            if is_all(u) == is_all(v):
-                u, v = zip(*self.edges)
-            u = utils.toindex(u)
-            v = utils.toindex(v)
-            for uu, vv in utils.edge_iter(u, v):
-                ret = apply_edge_func(_get_repr(self.edges[uu, vv]))
-                _set_repr(self.edges[uu, vv], ret)
+            new_repr = apply_edge_func(self.get_e_repr_by_id(eid))
+            self.set_e_repr_by_id(new_repr, eid)

-    def send(self, u, v, message_func="default", batchable=False):
-        """Trigger the message function on edge u->v
+    def send(self, u=None, v=None, message_func="default", eid=None):
+        """Trigger the message function on edge u->v or eid

        The message function should be compatible with following signature:

@@ -422,62 +902,108 @@ class DGLGraph(DiGraph):
        The message function can be any of the pre-defined functions
        ('from_src').

+        Currently, we require the message functions of consecutive send's to
+        return the same keys.  Otherwise the behavior will be undefined.
+
        Parameters
        ----------
-        u : node, container or tensor
+        u : optional, node, container or tensor
          The source node(s).
-        v : node, container or tensor
+        v : optional, node, container or tensor
          The destination node(s).
        message_func : callable
          The message function.
-        batchable : bool
-          Whether the function allows batched computation.
+        eid : optional, edge, container or tensor
+          The edge to update on.  If eid is not None then u and v are ignored.
+
+        Notes
+        -----
+        On multigraphs, if u and v are specified, then the messages will be sent
+        along all edges between u and v.
        """
        if message_func == "default":
-            message_func, batchable = self._message_func
+            message_func = self._message_func
        assert message_func is not None
        if isinstance(message_func, (tuple, list)):
            message_func = BundledMessageFunction(message_func)
-        if batchable:
-            self._batch_send(u, v, message_func)
-        else:
-            self._nonbatch_send(u, v, message_func)
+        self._batch_send(u, v, eid, message_func)

-    def _nonbatch_send(self, u, v, message_func):
-        if is_all(u) and is_all(v):
-            u, v = self.cached_graph.edges()
-        else:
-            u = utils.toindex(u)
-            v = utils.toindex(v)
-        for uu, vv in utils.edge_iter(u, v):
-            ret = message_func(_get_repr(self.nodes[uu]),
-                               _get_repr(self.edges[uu, vv]))
-            self.edges[uu, vv][__MSG__] = ret
-
-    def _batch_send(self, u, v, message_func):
-        if is_all(u) and is_all(v):
-            u, v = self.cached_graph.edges()
-            self.msg_graph.add_edges(u, v)
+    def _batch_send(self, u, v, eid, message_func):
+        if is_all(u) and is_all(v) and eid is None:
+            u, v, eid = self._graph.edges()
            # call UDF
            src_reprs = self.get_n_repr(u)
            edge_reprs = self.get_e_repr()
            msgs = message_func(src_reprs, edge_reprs)
+        elif eid is not None:
+            eid = utils.toindex(eid)
+            u, v, _ = self._graph.find_edges(eid)
+            # call UDF
+            src_reprs = self.get_n_repr(u)
+            edge_reprs = self.get_e_repr_by_id(eid)
+            msgs = message_func(src_reprs, edge_reprs)
        else:
            u = utils.toindex(u)
            v = utils.toindex(v)
-            u, v = utils.edge_broadcasting(u, v)
-            eid = self.cached_graph.get_edge_id(u, v)
-            self.msg_graph.add_edges(u, v)
+            u, v, eid = self._graph.edge_ids(u, v)
            # call UDF
            src_reprs = self.get_n_repr(u)
            edge_reprs = self.get_e_repr_by_id(eid)
            msgs = message_func(src_reprs, edge_reprs)
+        self._msg_graph.add_edges(u, v)
+        self._msg_frame.append(msgs)
+
+        # TODO(minjie): Fix these codes in next PR.
+        """
+        new_uv = []
+        msg_target_rows = []
+        msg_update_rows = []
+        msg_append_rows = []
+        for i, (_u, _v, _eid) in enumerate(zip(u, v, eid)):
+            if _eid in self._msg_edges:
+                msg_target_rows.append(self._msg_edges.index(_eid))
+                msg_update_rows.append(i)
+            else:
+                new_uv.append((_u, _v))
+                self._msg_edges.append(_eid)
+                msg_append_rows.append(i)
+
+        msg_target_rows = utils.toindex(msg_target_rows)
+        msg_update_rows = utils.toindex(msg_update_rows)
+        msg_append_rows = utils.toindex(msg_append_rows)
+
        if utils.is_dict_like(msgs):
-            self._msg_frame.append(msgs)
+            if len(msg_target_rows) > 0:
+                self._msg_frame.update_rows(
+                        msg_target_rows,
+                        {k: F.gather_row(msgs[k], msg_update_rows.tousertensor())
+                            for k in msgs},
+                        inplace=False)
+            if len(msg_append_rows) > 0:
+                new_u, new_v = zip(*new_uv)
+                new_u = utils.toindex(new_u)
+                new_v = utils.toindex(new_v)
+                self._msg_graph.add_edges(new_u, new_v)
+                self._msg_frame.append(
+                        {k: F.gather_row(msgs[k], msg_append_rows.tousertensor())
+                            for k in msgs})
        else:
-            self._msg_frame.append({__MSG__ : msgs})
+            if len(msg_target_rows) > 0:
+                self._msg_frame.update_rows(
+                        msg_target_rows,
+                        {__MSG__: F.gather_row(msgs, msg_update_rows.tousertensor())},
+                        inplace=False)
+            if len(msg_append_rows) > 0:
+                new_u, new_v = zip(*new_uv)
+                new_u = utils.toindex(new_u)
+                new_v = utils.toindex(new_v)
+                self._msg_graph.add_edges(new_u, new_v)
+                self._msg_frame.append(
+                        {__MSG__: F.gather_row(msgs, msg_append_rows.tousertensor())}
+                        )
+        """

-    def update_edge(self, u=ALL, v=ALL, edge_func="default", batchable=False):
+    def update_edge(self, u=ALL, v=ALL, edge_func="default", eid=None):
        """Update representation on edge u->v

        The edge function should be compatible with following signature:
@@ -496,32 +1022,17 @@ class DGLGraph(DiGraph):
          The destination node(s).
        edge_func : callable
          The update function.
-        batchable : bool
-          Whether the function allows batched computation.
+        eid : optional, edge, container or tensor
+          The edge to update on.  If eid is not None then u and v are ignored.
        """
        if edge_func == "default":
-            edge_func, batchable = self._edge_func
+            edge_func = self._edge_func
        assert edge_func is not None
-        if batchable:
-            self._batch_update_edge(u, v, edge_func)
-        else:
-            self._nonbatch_update_edge(u, v, edge_func)
+        self._batch_update_edge(u, v, eid, edge_func)

-    def _nonbatch_update_edge(self, u, v, edge_func):
-        if is_all(u) and is_all(v):
-            u, v = self.cached_graph.edges()
-        else:
-            u = utils.toindex(u)
-            v = utils.toindex(v)
-        for uu, vv in utils.edge_iter(u, v):
-            ret = edge_func(_get_repr(self.nodes[uu]),
-                            _get_repr(self.nodes[vv]),
-                            _get_repr(self.edges[uu, vv]))
-            _set_repr(self.edges[uu, vv], ret)
-
-    def _batch_update_edge(self, u, v, edge_func):
-        if is_all(u) and is_all(v):
-            u, v = self.cached_graph.edges()
+    def _batch_update_edge(self, u, v, eid, edge_func):
+        if is_all(u) and is_all(v) and eid is None:
+            u, v, eid = self._graph.edges()
            # call the UDF
            src_reprs = self.get_n_repr(u)
            dst_reprs = self.get_n_repr(v)
@@ -529,10 +1040,11 @@ class DGLGraph(DiGraph):
            new_edge_reprs = edge_func(src_reprs, dst_reprs, edge_reprs)
            self.set_e_repr(new_edge_reprs)
        else:
-            u = utils.toindex(u)
-            v = utils.toindex(v)
-            u, v = utils.edge_broadcasting(u, v)
-            eid = self.cached_graph.get_edge_id(u, v)
+            if eid is None:
+                u = utils.toindex(u)
+                v = utils.toindex(v)
+                u, v = utils.edge_broadcasting(u, v)
+                _, _, eid = self._graph.edge_ids(u, v)
            # call the UDF
            src_reprs = self.get_n_repr(u)
            dst_reprs = self.get_n_repr(v)
@@ -543,8 +1055,7 @@ class DGLGraph(DiGraph):
    def recv(self,
             u,
             reduce_func="default",
-             apply_node_func="default",
-             batchable=False):
+             apply_node_func="default"):
        """Receive and reduce in-coming messages and update representation on node u.

        It computes the new node state using the messages sent from the predecessors
@@ -574,33 +1085,15 @@ class DGLGraph(DiGraph):
          The reduce function.
        apply_node_func : callable, optional
          The update function.
-        batchable : bool, optional
-          Whether the reduce and update function allows batched computation.
        """
        if reduce_func == "default":
-            reduce_func, batchable = self._reduce_func
+            reduce_func = self._reduce_func
        assert reduce_func is not None
        if isinstance(reduce_func, (list, tuple)):
            reduce_func = BundledReduceFunction(reduce_func)
-        if batchable:
-            self._batch_recv(u, reduce_func)
-        else:
-            self._nonbatch_recv(u, reduce_func)
+        self._batch_recv(u, reduce_func)
        # optional apply nodes
-        self.apply_nodes(u, apply_node_func, batchable)
-
-    def _nonbatch_recv(self, u, reduce_func):
-        if is_all(u):
-            u = list(range(0, self.number_of_nodes()))
-        else:
-            u = utils.toindex(u)
-        for i, uu in enumerate(utils.node_iter(u)):
-            # reduce phase
-            msgs_batch = [self.edges[vv, uu].pop(__MSG__)
-                          for vv in self.pred[uu] if __MSG__ in self.edges[vv, uu]]
-            if len(msgs_batch) != 0:
-                new_repr = reduce_func(_get_repr(self.nodes[uu]), msgs_batch)
-                _set_repr(self.nodes[uu], new_repr)
+        self.apply_nodes(u, apply_node_func)

    def _batch_recv(self, v, reduce_func):
        if self._msg_frame.num_rows == 0:
@@ -616,7 +1109,7 @@ class DGLGraph(DiGraph):
        v = utils.toindex(v)

        # degree bucketing
-        degrees, v_buckets = scheduler.degree_bucketing(self.msg_graph, v)
+        degrees, v_buckets = scheduler.degree_bucketing(self._msg_graph, v)
        if degrees == [0]:
            # no message has been sent to the specified node
            return
@@ -631,33 +1124,26 @@ class DGLGraph(DiGraph):
                continue
            bkt_len = len(v_bkt)
            dst_reprs = self.get_n_repr(v_bkt)
-            uu, vv, _ = self.msg_graph.in_edges(v_bkt)
-            in_msg_ids = self.msg_graph.get_edge_id(uu, vv)
+            uu, vv, in_msg_ids = self._msg_graph.in_edges(v_bkt)
            in_msgs = self._msg_frame.select_rows(in_msg_ids)
            # Reshape the column tensor to (B, Deg, ...).
            def _reshape_fn(msg):
                msg_shape = F.shape(msg)
                new_shape = (bkt_len, deg) + msg_shape[1:]
                return F.reshape(msg, new_shape)
-            if len(in_msgs) == 1 and __MSG__ in in_msgs:
-                reshaped_in_msgs = _reshape_fn(in_msgs[__MSG__])
-            else:
-                reshaped_in_msgs = utils.LazyDict(
-                        lambda key: _reshape_fn(in_msgs[key]), self._msg_frame.schemes)
-            reordered_v.append(v_bkt.totensor())
+            reshaped_in_msgs = utils.LazyDict(
+                    lambda key: _reshape_fn(in_msgs[key]), self._msg_frame.schemes)
+            reordered_v.append(v_bkt.tousertensor())
            new_reprs.append(reduce_func(dst_reprs, reshaped_in_msgs))

-        # TODO: clear partial messages
-        self.clear_messages()
+        # TODO(minjie): clear partial messages
+        self.reset_messages()

        # Pack all reducer results together
        reordered_v = F.pack(reordered_v)
-        if utils.is_dict_like(new_reprs[0]):
-            keys = new_reprs[0].keys()
-            new_reprs = {key : F.pack([repr[key] for repr in new_reprs])
-                         for key in keys}
-        else:
-            new_reprs = {__REPR__ : F.pack(new_reprs)}
+        keys = new_reprs[0].keys()
+        new_reprs = {key : F.pack([repr[key] for repr in new_reprs])
+                     for key in keys}

        if v_is_all and not has_zero_degree:
            # First do reorder and then replace the whole column.
@@ -670,18 +1156,19 @@ class DGLGraph(DiGraph):
            self.set_n_repr(new_reprs, reordered_v)

    def send_and_recv(self,
-                      u, v,
+                      u=None, v=None,
                      message_func="default",
                      reduce_func="default",
                      apply_node_func="default",
-                      batchable=False):
-        """Trigger the message function on u->v and update v.
+                      eid=None):
+        """Trigger the message function on u->v and update v, or on edge eid
+        and update the destination nodes.

        Parameters
        ----------
-        u : node, container or tensor
+        u : optional, node, container or tensor
          The source node(s).
-        v : node, container or tensor
+        v : optional, node, container or tensor
          The destination node(s).
        message_func : callable
          The message function.
@@ -689,45 +1176,83 @@ class DGLGraph(DiGraph):
          The reduce function.
        apply_node_func : callable, optional
          The update function.
-        batchable : bool
-          Whether the reduce and update function allows batched computation.
-        """
-        u = utils.toindex(u)
-        v = utils.toindex(v)
-        if len(u) == 0:
-            # no edges to be triggered
-            assert len(v) == 0
-            return
-        unique_v = utils.toindex(F.unique(v.totensor()))

-        # TODO(minjie): better way to figure out `batchable` flag
+        Notes
+        -----
+        On multigraphs, if u and v are specified, then the messages will be sent
+        and received along all edges between u and v.
+        """
        if message_func == "default":
-            message_func, batchable = self._message_func
+            message_func = self._message_func
        if reduce_func == "default":
-            reduce_func, _ = self._reduce_func
+            reduce_func = self._reduce_func
        assert message_func is not None
        assert reduce_func is not None

-        if batchable:
+        if eid is None:
+            if u is None or v is None:
+                raise ValueError('u and v must be given if eid is None')
+
+            u = utils.toindex(u)
+            v = utils.toindex(v)
+            if len(u) == 0:
+                # no edges to be triggered
+                assert len(v) == 0
+                return
+            unique_v = utils.toindex(F.unique(v.tousertensor()))
+
            executor = scheduler.get_executor(
                    'send_and_recv', self, src=u, dst=v,
                    message_func=message_func, reduce_func=reduce_func)
        else:
+            eid = utils.toindex(eid)
+            if len(eid) == 0:
+                # no edges to be triggered
+                return
+
            executor = None

        if executor:
-            executor.run()
+            new_reprs = executor.run()
+            unique_v = executor.recv_nodes
+            self._apply_nodes(unique_v, apply_node_func, reduce_accum=new_reprs)
+        elif eid is not None:
+            _, v, _ = self._graph.find_edges(eid)
+            unique_v = utils.toindex(F.unique(v.tousertensor()))
+
+            # TODO(quan): replace with the new DegreeBucketingScheduler
+            self.send(eid=eid, message_func=message_func)
+            self.recv(unique_v, reduce_func, apply_node_func)
        else:
-            self.send(u, v, message_func, batchable=batchable)
-            self.recv(unique_v, reduce_func, None, batchable=batchable)
-        self.apply_nodes(unique_v, apply_node_func, batchable=batchable)
+            # handle multiple message and reduce func
+            if isinstance(message_func, (tuple, list)):
+                message_func = BundledMessageFunction(message_func)
+            if isinstance(reduce_func, (list, tuple)):
+                reduce_func = BundledReduceFunction(reduce_func)
+
+            # message func
+            u, v = utils.edge_broadcasting(u, v)
+            src_reprs = self.get_n_repr(u)
+            edge_reprs = self.get_e_repr(u, v)
+            msgs = message_func(src_reprs, edge_reprs)
+            msg_frame = FrameRef()
+            msg_frame.append(msgs)
+
+            # recv with degree bucketing
+            executor = scheduler.get_recv_executor(graph=self,
+                                                   reduce_func=reduce_func,
+                                                   message_frame=msg_frame,
+                                                   edges=(u, v))
+            new_reprs = executor.run()
+            unique_v = executor.recv_nodes
+
+            self._apply_nodes(unique_v, apply_node_func, reduce_accum=new_reprs)

    def pull(self,
             v,
             message_func="default",
             reduce_func="default",
-             apply_node_func="default",
-             batchable=False):
+             apply_node_func="default"):
        """Pull messages from the node's predecessors and then update it.

        Parameters
@@ -740,24 +1265,20 @@ class DGLGraph(DiGraph):
          The reduce function.
        apply_node_func : callable, optional
          The update function.
-        batchable : bool
-          Whether the reduce and update function allows batched computation.
        """
        v = utils.toindex(v)
        if len(v) == 0:
            return
-        uu, vv, _ = self.cached_graph.in_edges(v)
-        self.send_and_recv(uu, vv, message_func, reduce_func,
-                apply_node_func=None, batchable=batchable)
-        unique_v = F.unique(v.totensor())
-        self.apply_nodes(unique_v, apply_node_func, batchable=batchable)
+        uu, vv, _ = self._graph.in_edges(v)
+        self.send_and_recv(uu, vv, message_func, reduce_func, apply_node_func=None)
+        unique_v = F.unique(v.tousertensor())
+        self.apply_nodes(unique_v, apply_node_func)

    def push(self,
             u,
             message_func="default",
             reduce_func="default",
-             apply_node_func="default",
-             batchable=False):
+             apply_node_func="default"):
        """Send message from the node to its successors and update them.

        Parameters
@@ -770,21 +1291,18 @@ class DGLGraph(DiGraph):
          The reduce function.
        apply_node_func : callable
          The update function.
-        batchable : bool
-          Whether the reduce and update function allows batched computation.
        """
        u = utils.toindex(u)
        if len(u) == 0:
            return
-        uu, vv, _ = self.cached_graph.out_edges(u)
+        uu, vv, _ = self._graph.out_edges(u)
        self.send_and_recv(uu, vv, message_func,
-                reduce_func, apply_node_func, batchable=batchable)
+                reduce_func, apply_node_func)

    def update_all(self,
                   message_func="default",
                   reduce_func="default",
-                   apply_node_func="default",
-                   batchable=False):
+                   apply_node_func="default"):
        """Send messages through all the edges and update all nodes.

        Parameters
@@ -795,76 +1313,61 @@ class DGLGraph(DiGraph):
          The reduce function.
        apply_node_func : callable, optional
          The update function.
-        batchable : bool
-          Whether the reduce and update function allows batched computation.
        """
        if message_func == "default":
-            message_func, batchable = self._message_func
+            message_func = self._message_func
        if reduce_func == "default":
-            reduce_func, _ = self._reduce_func
+            reduce_func = self._reduce_func
        assert message_func is not None
        assert reduce_func is not None

-        if batchable:
-            executor = scheduler.get_executor(
-                    "update_all", self, message_func=message_func, reduce_func=reduce_func)
-        else:
-            executor = None
-
+        executor = scheduler.get_executor(
+                "update_all", self, message_func=message_func, reduce_func=reduce_func)
        if executor:
-            executor.run()
+            new_reprs = executor.run()
+            self._apply_nodes(ALL, apply_node_func, reduce_accum=new_reprs)
        else:
-            self.send(ALL, ALL, message_func, batchable=batchable)
-            self.recv(ALL, reduce_func, None, batchable=batchable)
-        self.apply_nodes(ALL, apply_node_func, batchable=batchable)
+            self.send(ALL, ALL, message_func)
+            self.recv(ALL, reduce_func, apply_node_func)

    def propagate(self,
-                  iterator='bfs',
+                  traverser='topo',
                  message_func="default",
                  reduce_func="default",
                  apply_node_func="default",
-                  batchable=False,
                  **kwargs):
-        """Propagate messages and update nodes using iterator.
+        """Propagate messages and update nodes using graph traversal.

        A convenient function for passing messages and updating
-        nodes according to the iterator. The iterator can be
-        any of the pre-defined iterators ('bfs', 'dfs', 'pre-order',
-        'mid-order', 'post-order'). The computation will be unrolled
-        in the backend efficiently. User can also provide custom
-        iterator that generates the edges and nodes.
+        nodes according to the traverser. The traverser can be
+        any of the pre-defined traverser (e.g. 'topo'). User can also provide custom
+        traverser that generates the edges and nodes.

        Parameters
        ----------
+        traverser : str or generator of edges.
+          The traverser of the graph.
        message_func : str or callable
          The message function.
        reduce_func : str or callable
          The reduce function.
        apply_node_func : str or callable
          The update function.
-        batchable : bool
-          Whether the reduce and update function allows batched computation.
-        iterator : str or generator of steps.
-          The iterator of the graph.
        kwargs : keyword arguments, optional
            Arguments for pre-defined iterators.
        """
-        if isinstance(iterator, str):
-            # TODO Call pre-defined routine to unroll the computation.
+        if isinstance(traverser, str):
+            # TODO(minjie): Call pre-defined routine to unroll the computation.
            raise RuntimeError('Not implemented.')
        else:
            # NOTE: the iteration can return multiple edges at each step.
-            for u, v in iterator:
+            for u, v in traverser:
                self.send_and_recv(u, v,
-                        message_func, reduce_func, apply_node_func, batchable)
+                        message_func, reduce_func, apply_node_func)

    def subgraph(self, nodes):
        """Generate the subgraph among the given nodes.

-        The generated graph contains only the graph structure. The node/edge
-        features are not shared implicitly. Use `copy_from` to get node/edge
-        features from parent graph.
-
        Parameters
        ----------
        nodes : list, or iterable
@@ -875,7 +1378,26 @@ class DGLGraph(DiGraph):
        G : DGLSubGraph
            The subgraph.
        """
-        return dgl.DGLSubGraph(self, nodes)
+        induced_nodes = utils.toindex(nodes)
+        sgi = self._graph.node_subgraph(induced_nodes)
+        return dgl.DGLSubGraph(self, sgi.induced_nodes, sgi.induced_edges, sgi)
+
+    def edge_subgraph(self, edges):
+        """Generate the subgraph among the given edges.
+
+        Parameters
+        ----------
+        edges : list, or iterable
+            A container of the edges to construct subgraph.
+
+        Returns
+        -------
+        G : DGLSubGraph
+            The subgraph.
+        """
+        induced_edges = utils.toindex(edges)
+        sgi = self._graph.edge_subgraph(induced_edges)
+        return dgl.DGLSubGraph(self, sgi.induced_nodes, sgi.induced_edges, sgi)

    def merge(self, subgraphs, reduce_func='sum'):
        """Merge subgraph features back to this parent graph.
@@ -919,91 +1441,109 @@ class DGLGraph(DiGraph):
                self._edge_frame.num_rows,
                reduce_func)

-    def draw(self):
-        """Plot the graph using dot."""
-        from networkx.drawing.nx_agraph import graphviz_layout
+    def adjacency_matrix(self, ctx=None):
+        """Return the adjacency matrix representation of this graph.

-        pos = graphviz_layout(self, prog='dot')
-        nx.draw(self, pos, with_labels=True)
+        Parameters
+        ----------
+        ctx : optional
+            The context of returned adjacency matrix.

-    @property
-    def cached_graph(self):
-        # TODO: dirty flag when mutated
-        if self._cached_graph is None:
-            self._cached_graph = create_cached_graph(self)
-        return self._cached_graph
+        Returns
+        -------
+        sparse_tensor
+            The adjacency matrix.
+        """
+        return self._graph.adjacency_matrix().get(ctx)

-    @property
-    def msg_graph(self):
-        # TODO: dirty flag when mutated
-        if self._msg_graph is None:
-            self._msg_graph = CachedGraph()
-            self._msg_graph.add_nodes(self.number_of_nodes())
-        return self._msg_graph
-
-    def clear_messages(self):
-        if self._msg_graph is not None:
-            self._msg_graph = CachedGraph()
-            self._msg_graph.add_nodes(self.number_of_nodes())
-            self._msg_frame.clear()
+    def incidence_matrix(self, oriented=False, ctx=None):
+        """Return the incidence matrix representation of this graph.

-    @property
-    def edge_list(self):
-        """Return edges in the addition order."""
-        return self._edge_list
+        Parameters
+        ----------
+        oriented : bool, optional
+            Whether the returned incidence matrix is oriented.
+
+        ctx : optional
+            The context of returned incidence matrix.
+
+        Returns
+        -------
+        sparse_tensor
+            The incidence matrix.
+        """
+        return self._graph.incidence_matrix(oriented).get(ctx)

-    def get_edge_id(self, u, v):
-        """Return the continuous edge id(s) assigned.
+    def line_graph(self, backtracking=True, shared=False):
+        """Return the line graph of this graph.

        Parameters
        ----------
-        u : node, container or tensor
-          The source node(s).
-        v : node, container or tensor
-          The destination node(s).
+        backtracking : bool, optional
+            Whether the returned line graph is backtracking.
+
+        shared : bool, optional
+            Whether the returned line graph shares representations with `self`.

        Returns
        -------
-        eid : tensor
-          The tensor contains edge id(s).
+        DGLGraph
+            The line graph of this graph.
        """
-        u = utils.toindex(u)
-        v = utils.toindex(v)
-        return self.cached_graph.get_edge_id(u, v)
-
-    def _add_node_callback(self, node):
-        #print('New node:', node)
-        self._cached_graph = None
-
-    def _del_node_callback(self, node):
-        #print('Del node:', node)
-        raise RuntimeError('Node removal is not supported currently.')
-        node = utils.convert_to_id_tensor(node)
-        self._node_frame.delete_rows(node)
-        self._cached_graph = None
-
-    def _add_edge_callback(self, u, v):
-        #print('New edge:', u, v)
-        self._edge_list.append((u, v))
-        self._cached_graph = None
-
-    def _del_edge_callback(self, u, v):
-        #print('Del edge:', u, v)
-        raise RuntimeError('Edge removal is not supported currently.')
-        u = utils.convert_to_id_tensor(u)
-        v = utils.convert_to_id_tensor(v)
-        eid = self.get_edge_id(u, v)
-        self._edge_frame.delete_rows(eid)
-        self._cached_graph = None
-
-def _get_repr(attr_dict):
-    if len(attr_dict) == 1 and __REPR__ in attr_dict:
-        return attr_dict[__REPR__]
-    else:
-        return attr_dict
-
-def _set_repr(attr_dict, attr):
-    if utils.is_dict_like(attr):
-        attr_dict.update(attr)
-    else:
-        attr_dict[__REPR__] = attr
+        graph_data = self._graph.line_graph(backtracking)
+        node_frame = self._edge_frame if shared else None
+        return DGLGraph(graph_data, node_frame)
+
+    def filter_nodes(self, predicate, nodes=ALL):
+        """Return a tensor of node IDs that satisfy the given predicate.
+
+        Parameters
+        ----------
+        predicate : callable
+            The predicate should take in a dict of tensors whose values
+            are concatenation of node representations by node ID (same as
+            get_n_repr()), and return a boolean tensor with N elements
+            indicating which node satisfy the predicate.
+        nodes : container or tensor
+            The nodes to filter on
+
+        Returns
+        -------
+        tensor
+            The filtered nodes
+        """
+        n_repr = self.get_n_repr(nodes)
+        n_mask = predicate(n_repr)
+
+        if is_all(nodes):
+            return F.nonzero_1d(n_mask)
+        else:
+            nodes = F.Tensor(nodes)
+            return nodes[n_mask]
+
+    def filter_edges(self, predicate, edges=ALL):
+        """Return a tensor of edge IDs that satisfy the given predicate.
+
+        Parameters
+        ----------
+        predicate : callable
+            The predicate should take in a dict of tensors whose values
+            are concatenation of edge representations by edge ID (same as
+            get_e_repr_by_id()), and return a boolean tensor with N elements
+            indicating which node satisfy the predicate.
+        edges : container or tensor
+            The edges to filter on
+
+        Returns
+        -------
+        tensor
+            The filtered edges
+        """
+        e_repr = self.get_e_repr_by_id(edges)
+        e_mask = predicate(e_repr)
+
+        if is_all(edges):
+            return F.nonzero_1d(e_mask)
+        else:
+            edges = F.Tensor(edges)
+            return edges[e_mask]
--- a/python/dgl/graph_index.py
+++ b/python/dgl/graph_index.py
+from __future__ import absolute_import
+
+import ctypes
+import numpy as np
+import networkx as nx
+import scipy
+
+from ._ffi.base import c_array
+from ._ffi.function import _init_api
+from . import backend as F
+from . import utils
+
+GraphIndexHandle = ctypes.c_void_p
+
+class GraphIndex(object):
+    """Graph index object.
+
+    Parameters
+    ----------
+    handle : GraphIndexHandle
+        Handler
+    """
+    def __init__(self, handle):
+        self._handle = handle
+        self._cache = {}
+
+    def __del__(self):
+        """Free this graph index object."""
+        _CAPI_DGLGraphFree(self._handle)
+
+    def add_nodes(self, num):
+        """Add nodes.
+        
+        Parameters
+        ----------
+        num : int
+            Number of nodes to be added.
+        """
+        _CAPI_DGLGraphAddVertices(self._handle, num);
+        self._cache.clear()
+
+    def add_edge(self, u, v):
+        """Add one edge.
+        
+        Parameters
+        ----------
+        u : int
+            The src node.
+        v : int
+            The dst node.
+        """
+        _CAPI_DGLGraphAddEdge(self._handle, u, v);
+        self._cache.clear()
+
+    def add_edges(self, u, v):
+        """Add many edges.
+        
+        Parameters
+        ----------
+        u : utils.Index
+            The src nodes.
+        v : utils.Index
+            The dst nodes.
+        """
+        u_array = u.todgltensor()
+        v_array = v.todgltensor()
+        _CAPI_DGLGraphAddEdges(self._handle, u_array, v_array)
+        self._cache.clear()
+
+    def clear(self):
+        """Clear the graph."""
+        _CAPI_DGLGraphClear(self._handle)
+        self._cache.clear()
+
+    def is_multigraph(self):
+        """Return whether the graph is a multigraph
+
+        Returns
+        -------
+        bool
+            True if it is a multigraph, False otherwise.
+        """
+        return bool(_CAPI_DGLGraphIsMultigraph(self._handle))
+
+    def number_of_nodes(self):
+        """Return the number of nodes.
+
+        Returns
+        -------
+        int
+            The number of nodes
+        """
+        return _CAPI_DGLGraphNumVertices(self._handle)
+
+    def number_of_edges(self):
+        """Return the number of edges.
+
+        Returns
+        -------
+        int
+            The number of edges
+        """
+        return _CAPI_DGLGraphNumEdges(self._handle)
+
+    def has_node(self, vid):
+        """Return true if the node exists.
+
+        Parameters
+        ----------
+        vid : int
+            The nodes
+
+        Returns
+        -------
+        bool
+            True if the node exists, False otherwise.
+        """
+        return bool(_CAPI_DGLGraphHasVertex(self._handle, vid))
+
+    def has_nodes(self, vids):
+        """Return true if the nodes exist.
+
+        Parameters
+        ----------
+        vid : utils.Index
+            The nodes
+
+        Returns
+        -------
+        utils.Index
+            0-1 array indicating existence
+        """
+        vid_array = vids.todgltensor()
+        return utils.toindex(_CAPI_DGLGraphHasVertices(self._handle, vid_array))
+
+    def has_edge_between(self, u, v):
+        """Return true if the edge exists.
+
+        Parameters
+        ----------
+        u : int
+            The src node.
+        v : int
+            The dst node.
+
+        Returns
+        -------
+        bool
+            True if the edge exists, False otherwise
+        """
+        return bool(_CAPI_DGLGraphHasEdgeBetween(self._handle, u, v))
+
+    def has_edges_between(self, u, v):
+        """Return true if the edge exists.
+
+        Parameters
+        ----------
+        u : utils.Index
+            The src nodes.
+        v : utils.Index
+            The dst nodes.
+
+        Returns
+        -------
+        utils.Index
+            0-1 array indicating existence
+        """
+        u_array = u.todgltensor()
+        v_array = v.todgltensor()
+        return utils.toindex(_CAPI_DGLGraphHasEdgesBetween(self._handle, u_array, v_array))
+
+    def predecessors(self, v, radius=1):
+        """Return the predecessors of the node.
+
+        Parameters
+        ----------
+        v : int
+            The node.
+        radius : int, optional
+            The radius of the neighborhood.
+
+        Returns
+        -------
+        utils.Index
+            Array of predecessors
+        """
+        return utils.toindex(_CAPI_DGLGraphPredecessors(self._handle, v, radius))
+
+    def successors(self, v, radius=1):
+        """Return the successors of the node.
+
+        Parameters
+        ----------
+        v : int
+            The node.
+        radius : int, optional
+            The radius of the neighborhood.
+
+        Returns
+        -------
+        utils.Index
+            Array of successors
+        """
+        return utils.toindex(_CAPI_DGLGraphSuccessors(self._handle, v, radius))
+
+    def edge_id(self, u, v):
+        """Return the id array of all edges between u and v.
+
+        Parameters
+        ----------
+        u : int
+            The src node.
+        v : int
+            The dst node.
+
+        Returns
+        -------
+        utils.Index
+            The edge id array.
+        """
+        return utils.toindex(_CAPI_DGLGraphEdgeId(self._handle, u, v))
+
+    def edge_ids(self, u, v):
+        """Return a triplet of arrays that contains the edge IDs.
+
+        Parameters
+        ----------
+        u : utils.Index
+            The src nodes.
+        v : utils.Index
+            The dst nodes.
+
+        Returns
+        -------
+        utils.Index
+            The src nodes.
+        utils.Index
+            The dst nodes.
+        utils.Index
+            The edge ids.
+        """
+        u_array = u.todgltensor()
+        v_array = v.todgltensor()
+        edge_array = _CAPI_DGLGraphEdgeIds(self._handle, u_array, v_array)
+
+        src = utils.toindex(edge_array(0))
+        dst = utils.toindex(edge_array(1))
+        eid = utils.toindex(edge_array(2))
+
+        return src, dst, eid
+
+    def find_edges(self, eid):
+        """Return a triplet of arrays that contains the edge IDs.
+
+        Parameters
+        ----------
+        eid : utils.Index
+            The edge ids.
+
+        Returns
+        -------
+        utils.Index
+            The src nodes.
+        utils.Index
+            The dst nodes.
+        utils.Index
+            The edge ids.
+        """
+        eid_array = eid.todgltensor()
+        edge_array = _CAPI_DGLGraphFindEdges(self._handle, eid_array)
+
+        src = utils.toindex(edge_array(0))
+        dst = utils.toindex(edge_array(1))
+        eid = utils.toindex(edge_array(2))
+
+        return src, dst, eid
+
+    def in_edges(self, v):
+        """Return the in edges of the node(s).
+
+        Parameters
+        ----------
+        v : utils.Index
+            The node(s).
+        
+        Returns
+        -------
+        utils.Index
+            The src nodes.
+        utils.Index
+            The dst nodes.
+        utils.Index
+            The edge ids.
+        """
+        if len(v) == 1:
+            edge_array = _CAPI_DGLGraphInEdges_1(self._handle, v[0])
+        else:
+            v_array = v.todgltensor()
+            edge_array = _CAPI_DGLGraphInEdges_2(self._handle, v_array)
+        src = utils.toindex(edge_array(0))
+        dst = utils.toindex(edge_array(1))
+        eid = utils.toindex(edge_array(2))
+        return src, dst, eid
+
+    def out_edges(self, v):
+        """Return the out edges of the node(s).
+
+        Parameters
+        ----------
+        v : utils.Index
+            The node(s).
+        
+        Returns
+        -------
+        utils.Index
+            The src nodes.
+        utils.Index
+            The dst nodes.
+        utils.Index
+            The edge ids.
+        """
+        if len(v) == 1:
+            edge_array = _CAPI_DGLGraphOutEdges_1(self._handle, v[0])
+        else:
+            v_array = v.todgltensor()
+            edge_array = _CAPI_DGLGraphOutEdges_2(self._handle, v_array)
+        src = utils.toindex(edge_array(0))
+        dst = utils.toindex(edge_array(1))
+        eid = utils.toindex(edge_array(2))
+        return src, dst, eid
+
+    def edges(self, sorted=False):
+        """Return all the edges
+
+        Parameters
+        ----------
+        sorted : bool
+            True if the returned edges are sorted by their src and dst ids.
+        
+        Returns
+        -------
+        utils.Index
+            The src nodes.
+        utils.Index
+            The dst nodes.
+        utils.Index
+            The edge ids.
+        """
+        edge_array = _CAPI_DGLGraphEdges(self._handle, sorted)
+        src = utils.toindex(edge_array(0))
+        dst = utils.toindex(edge_array(1))
+        eid = utils.toindex(edge_array(2))
+        return src, dst, eid
+
+    def in_degree(self, v):
+        """Return the in degree of the node.
+
+        Parameters
+        ----------
+        v : int
+            The node.
+
+        Returns
+        -------
+        int
+            The in degree.
+        """
+        return _CAPI_DGLGraphInDegree(self._handle, v)
+
+    def in_degrees(self, v):
+        """Return the in degrees of the nodes.
+
+        Parameters
+        ----------
+        v : utils.Index
+            The nodes.
+
+        Returns
+        -------
+        int
+            The in degree array.
+        """
+        v_array = v.todgltensor()
+        return utils.toindex(_CAPI_DGLGraphInDegrees(self._handle, v_array))
+
+    def out_degree(self, v):
+        """Return the out degree of the node.
+
+        Parameters
+        ----------
+        v : int
+            The node.
+
+        Returns
+        -------
+        int
+            The out degree.
+        """
+        return _CAPI_DGLGraphOutDegree(self._handle, v)
+
+    def out_degrees(self, v):
+        """Return the out degrees of the nodes.
+
+        Parameters
+        ----------
+        v : utils.Index
+            The nodes.
+
+        Returns
+        -------
+        int
+            The out degree array.
+        """
+        v_array = v.todgltensor()
+        return utils.toindex(_CAPI_DGLGraphOutDegrees(self._handle, v_array))
+
+    def node_subgraph(self, v):
+        """Return the induced node subgraph.
+
+        Parameters
+        ----------
+        v : utils.Index
+            The nodes.
+
+        Returns
+        -------
+        SubgraphIndex
+            The subgraph index.
+        """
+        v_array = v.todgltensor()
+        rst = _CAPI_DGLGraphVertexSubgraph(self._handle, v_array)
+        induced_edges = utils.toindex(rst(2))
+        return SubgraphIndex(rst(0), self, v, induced_edges)
+
+    def edge_subgraph(self, e):
+        """Return the induced edge subgraph.
+
+        Parameters
+        ----------
+        e : utils.Index
+            The edges.
+
+        Returns
+        -------
+        SubgraphIndex
+            The subgraph index.
+        """
+        e_array = e.todgltensor()
+        rst = _CAPI_DGLGraphEdgeSubgraph(self._handle, e_array)
+        gi = GraphIndex(rst(0))
+        induced_nodes = utils.toindex(rst(1))
+        return SubgraphIndex(rst(0), self, induced_nodes, e)
+
+    def adjacency_matrix(self):
+        """Return the adjacency matrix representation of this graph.
+
+        Returns
+        -------
+        utils.CtxCachedObject
+            An object that returns tensor given context.
+        """
+        if not 'adj' in self._cache:
+            src, dst, _ = self.edges(sorted=False)
+            src = F.unsqueeze(src.tousertensor(), 0)
+            dst = F.unsqueeze(dst.tousertensor(), 0)
+            idx = F.pack([dst, src])
+            n = self.number_of_nodes()
+            dat = F.ones((self.number_of_edges(),))
+            mat = F.sparse_tensor(idx, dat, [n, n])
+            self._cache['adj'] = utils.CtxCachedObject(lambda ctx: F.to_context(mat, ctx))
+        return self._cache['adj']
+
+    def incidence_matrix(self, oriented=False):
+        """Return the incidence matrix representation of this graph.
+        
+        Parameters
+        ----------
+        oriented : bool, optional (default=False)
+          Whether the returned incidence matrix is oriented.
+
+        Returns
+        -------
+        utils.CtxCachedObject
+            An object that returns tensor given context.
+        """
+        key = ('oriented ' if oriented else '') + 'incidence matrix'
+        if not key in self._cache:
+            src, dst, _ = self.edges(sorted=False)
+            src = src.tousertensor()
+            dst = dst.tousertensor()
+            m = self.number_of_edges()
+            eid = F.arange(m, dtype=F.int64)
+            row = F.pack([src, dst])
+            col = F.pack([eid, eid])
+            idx = F.stack([row, col])
+
+            diagonal = (src == dst)
+            if oriented:
+                x = -F.ones((m,))
+                y = F.ones((m,))
+                x[diagonal] = 0
+                y[diagonal] = 0
+                dat = F.pack([x, y])
+            else:
+                x = F.ones((m,))
+                x[diagonal] = 0
+                dat = F.pack([x, x])
+            n = self.number_of_nodes()
+            mat = F.sparse_tensor(idx, dat, [n, m])
+            self._cache[key] = utils.CtxCachedObject(lambda ctx: F.to_context(mat, ctx))
+
+        return self._cache[key]
+
+    def to_networkx(self):
+        """Convert to networkx graph.
+
+        The edge id will be saved as the 'id' edge attribute.
+
+        Returns
+        -------
+        networkx.DiGraph
+            The nx graph
+        """
+        src, dst, eid = self.edges()
+        ret = nx.MultiDiGraph() if self.is_multigraph() else nx.DiGraph()
+        for u, v, id in zip(src, dst, eid):
+            ret.add_edge(u, v, id=id)
+        return ret
+
+    def from_networkx(self, nx_graph):
+        """Convert from networkx graph.
+
+        If 'id' edge attribute exists, the edge will be added follows
+        the edge id order. Otherwise, order is undefined.
+        
+        Parameters
+        ----------
+        nx_graph : networkx.DiGraph
+            The nx graph
+        """
+        self.clear()
+
+        if not isinstance(nx_graph, nx.Graph):
+            nx_graph = (nx.MultiDiGraph(nx_graph) if self.is_multigraph()
+                    else nx.DiGraph(nx_graph))
+        else:
+            nx_graph = nx_graph.to_directed()
+
+        num_nodes = nx_graph.number_of_nodes()
+        self.add_nodes(num_nodes)
+        has_edge_id = 'id' in next(iter(nx_graph.edges))
+        if has_edge_id:
+            num_edges = nx_graph.number_of_edges()
+            src = np.zeros((num_edges,), dtype=np.int64)
+            dst = np.zeros((num_edges,), dtype=np.int64)
+            for e, attr in nx_graph.edges.items:
+                # MultiDiGraph returns a triplet in e while DiGraph returns a pair
+                eid = attr['id']
+                src[eid] = e[0]
+                dst[eid] = e[1]
+        else:
+            src = []
+            dst = []
+            for e in nx_graph.edges:
+                src.append(e[0])
+                dst.append(e[1])
+        src = utils.toindex(src)
+        dst = utils.toindex(dst)
+        self.add_edges(src, dst)
+
+    def from_scipy_sparse_matrix(self, adj):
+        """Convert from scipy sparse matrix.
+
+        Parameters
+        ----------
+        adj : scipy sparse matrix
+        """
+        self.clear()
+        self.add_nodes(adj.shape[0])
+        adj_coo = adj.tocoo()
+        src = utils.toindex(adj_coo.row)
+        dst = utils.toindex(adj_coo.col)
+        self.add_edges(src, dst)
+
+    def line_graph(self, backtracking=True):
+        """Return the line graph of this graph.
+
+        Parameters
+        ----------
+        backtracking : bool, optional (default=False)
+          Whether (i, j) ~ (j, i) in L(G).
+          (i, j) ~ (j, i) is the behavior of networkx.line_graph.
+
+        Returns
+        -------
+        GraphIndex
+            The line graph of this graph.
+        """
+        handle = _CAPI_DGLGraphLineGraph(self._handle, backtracking)
+        return GraphIndex(handle)
+
+class SubgraphIndex(GraphIndex):
+    """Graph index for subgraph.
+
+    Parameters
+    ----------
+    handle : GraphIndexHandle
+        The capi handle.
+    paranet : GraphIndex
+        The parent graph index.
+    induced_nodes : utils.Index
+        The parent node ids in this subgraph.
+    induced_edges : utils.Index
+        The parent edge ids in this subgraph.
+    """
+    def __init__(self, handle, parent, induced_nodes, induced_edges):
+        super(SubgraphIndex, self).__init__(handle)
+        self._parent = parent
+        self._induced_nodes = induced_nodes
+        self._induced_edges = induced_edges
+
+    def add_nodes(self, num):
+        """Add nodes. Disabled because SubgraphIndex is read-only."""
+        raise RuntimeError('Readonly graph. Mutation is not allowed.')
+
+    def add_edge(self, u, v):
+        """Add edges. Disabled because SubgraphIndex is read-only."""
+        raise RuntimeError('Readonly graph. Mutation is not allowed.')
+
+    def add_edges(self, u, v):
+        """Add edges. Disabled because SubgraphIndex is read-only."""
+        raise RuntimeError('Readonly graph. Mutation is not allowed.')
+
+    @property
+    def induced_nodes(self):
+        """Return parent node ids.
+
+        Returns
+        -------
+        utils.Index
+            The parent node ids.
+        """
+        return self._induced_nodes
+
+    @property
+    def induced_edges(self):
+        """Return parent edge ids.
+
+        Returns
+        -------
+        utils.Index
+            The parent edge ids.
+        """
+        return self._induced_edges
+
+def disjoint_union(graphs):
+    """Return a disjoint union of the input graphs.
+
+    The new graph will include all the nodes/edges in the given graphs.
+    Nodes/Edges will be relabled by adding the cumsum of the previous graph sizes
+    in the given sequence order. For example, giving input [g1, g2, g3], where
+    they have 5, 6, 7 nodes respectively. Then node#2 of g2 will become node#7
+    in the result graph. Edge ids are re-assigned similarly.
+
+    Parameters
+    ----------
+    graphs : iterable of GraphIndex
+        The input graphs
+
+    Returns
+    -------
+    GraphIndex
+        The disjoint union
+    """
+    inputs = c_array(GraphIndexHandle, [gr._handle for gr in graphs])
+    inputs = ctypes.cast(inputs, ctypes.c_void_p)
+    handle = _CAPI_DGLDisjointUnion(inputs, len(graphs))
+    return GraphIndex(handle)
+
+def disjoint_partition(graph, num_or_size_splits):
+    """Partition the graph disjointly.
+   
+    This is a reverse operation of DisjointUnion. The graph will be partitioned
+    into num graphs. This requires the given number of partitions to evenly
+    divides the number of nodes in the graph. If the a size list is given,
+    the sum of the given sizes is equal.
+
+    Parameters
+    ----------
+    graph : GraphIndex
+        The graph to be partitioned
+    num_or_size_splits : int or utils.Index
+        The partition number of size splits
+
+    Returns
+    -------
+    list of GraphIndex
+        The partitioned graphs
+    """
+    if isinstance(num_or_size_splits, utils.Index):
+        rst = _CAPI_DGLDisjointPartitionBySizes(
+                graph._handle,
+                num_or_size_splits.todgltensor())
+    else:
+        rst = _CAPI_DGLDisjointPartitionByNum(
+                graph._handle,
+                int(num_or_size_splits))
+    graphs = []
+    for val in rst.asnumpy():
+        handle = ctypes.cast(int(val), ctypes.c_void_p)
+        graphs.append(GraphIndex(handle))
+    return graphs
+
+def create_graph_index(graph_data=None, multigraph=False):
+    """Create a graph index object.
+
+    Parameters
+    ----------
+    graph_data : graph data, optional
+        Data to initialize graph. Same as networkx's semantics.
+    multigraph : bool, optional
+        Whether the graph is multigraph (default is False)
+    """
+    if isinstance(graph_data, GraphIndex):
+        return graph_data
+
+    handle = _CAPI_DGLGraphCreate(multigraph)
+    gi = GraphIndex(handle)
+
+    if graph_data is None:
+        return gi
+
+    # scipy format
+    if isinstance(graph_data, scipy.sparse.spmatrix):
+        try:
+            gi.from_scipy_sparse_matrix(graph_data)
+            return gi
+        except:
+            raise Exception('Graph data is not a valid scipy sparse matrix.')
+
+    # networkx - any format
+    try:
+        gi.from_networkx(graph_data)
+    except:
+        raise Exception('Error while creating graph from input of type "%s".'
+                         % type(graph_data))
+
+    return gi
+
+_init_api("dgl.graph_index")
--- a/python/dgl/ndarray.py
+++ b/python/dgl/ndarray.py
+"""DGL Runtime NDArray API.
+
+dgl.ndarray provides a minimum runtime array structure to be
+used with C++ library.
+"""
+# pylint: disable=invalid-name,unused-import
+from __future__ import absolute_import as _abs
+
+import ctypes
+import functools
+import operator
+import numpy as _np
+
+from ._ffi.ndarray import TVMContext, TVMType, NDArrayBase
+from ._ffi.ndarray import context, empty, from_dlpack, numpyasarray
+from ._ffi.ndarray import _set_class_ndarray
+from . import backend as F
+
+class NDArray(NDArrayBase):
+    """Lightweight NDArray class for DGL framework."""
+    def __len__(self):
+        return functools.reduce(operator.mul, self.shape, 1)
+
+def cpu(dev_id=0):
+    """Construct a CPU device
+
+    Parameters
+    ----------
+    dev_id : int, optional
+        The integer device id
+
+    Returns
+    -------
+    ctx : TVMContext
+        The created context
+    """
+    return TVMContext(1, dev_id)
+
+
+def gpu(dev_id=0):
+    """Construct a CPU device
+
+    Parameters
+    ----------
+    dev_id : int, optional
+        The integer device id
+
+    Returns
+    -------
+    ctx : TVMContext
+        The created context
+    """
+    return TVMContext(2, dev_id)
+
+def array(arr, ctx=cpu(0)):
+    """Create an array from source arr.
+
+    Parameters
+    ----------
+    arr : numpy.ndarray
+        The array to be copied from
+
+    ctx : TVMContext, optional
+        The device context to create the array
+
+    Returns
+    -------
+    ret : NDArray
+        The created array
+    """
+    if not isinstance(arr, (_np.ndarray, NDArray)):
+        arr = _np.array(arr)
+    return empty(arr.shape, arr.dtype, ctx).copyfrom(arr)
+
+def zerocopy_from_numpy(np_data):
+    """Create an array that shares the given numpy data.
+
+    Parameters
+    ----------
+    np_data : numpy.ndarray
+        The numpy data
+
+    Returns
+    -------
+    NDArray
+        The array
+    """
+    arr, _ = numpyasarray(np_data)
+    handle = ctypes.pointer(arr)
+    return NDArray(handle, is_view=True)
+
+_set_class_ndarray(NDArray)
--- a/python/dgl/nn/__init__.py
+++ b/python/dgl/nn/__init__.py
+"""Package nn modules"""
+from __future__ import absolute_import
+
 import os
+
 __backend__ = os.environ.get('DGLBACKEND', 'pytorch').lower()

 if __backend__ == 'numpy':
    pass
 elif __backend__ == 'pytorch':
    from .pytorch import *
-else:
+elif __backend__ != 'mxnet':
    raise Exception("Unsupported backend %s" % __backend__)
--- a/python/dgl/nn/pytorch/gcn.py
+++ b/python/dgl/nn/pytorch/gcn.py
@@ -7,9 +7,8 @@ GCN with SPMV specialization.
 """
 import torch.nn as nn

-import dgl
-import dgl.function as fn
-from dgl.base import ALL, is_all
+from ... import function as fn
+from ...base import ALL, is_all

 class NodeUpdateModule(nn.Module):
    def __init__(self, in_feats, out_feats, activation=None):

--- a/python/dgl/nx_adapt.py
+++ b/python/dgl/nx_adapt.py
-"""Utility functions for networkx adapter."""
-from __future__ import absolute_import
-
-from collections import MutableMapping
-
-import networkx as nx
-import networkx.convert as convert
-
-class NodeDict(MutableMapping):
-    def __init__(self, add_cb, del_cb):
-        self._dict = {}
-        self._add_cb = add_cb
-        self._del_cb = del_cb
-    def __setitem__(self, key, val):
-        self._add_cb(key)
-        self._dict[key] = val
-    def __getitem__(self, key):
-        return self._dict[key]
-    def __delitem__(self, key):
-        self._del_cb(key)
-        del self._dict[key]
-    def __len__(self):
-        return len(self._dict)
-    def __iter__(self):
-        return iter(self._dict)
-
-class AdjOuterDict(MutableMapping):
-    def __init__(self, add_cb, del_cb):
-        self._dict = {}
-        self._add_cb = add_cb
-        self._del_cb = del_cb
-    def __setitem__(self, key, val):
-        val.src = key
-        self._dict[key] = val
-    def __getitem__(self, key):
-        return self._dict[key]
-    def __delitem__(self, key):
-        for val in self._dict[key]:
-            self._del_cb(key, val)
-        del self._dict[key]
-    def __len__(self):
-        return len(self._dict)
-    def __iter__(self):
-        return iter(self._dict)
-
-class AdjInnerDict(MutableMapping):
-    def __init__(self, add_cb, del_cb):
-        self._dict = {}
-        self.src = None
-        self._add_cb = add_cb
-        self._del_cb = del_cb
-    def __setitem__(self, key, val):
-        if self.src is not None and key not in self._dict:
-            self._add_cb(self.src, key)
-        self._dict[key] = val
-    def __getitem__(self, key):
-        return self._dict[key]
-    def __delitem__(self, key):
-        if self.src is not None:
-            self._del_cb(self.src, key)
-        del self._dict[key]
-    def __len__(self):
-        return len(self._dict)
-    def __iter__(self):
-        return iter(self._dict)
-
-class AdjInnerDictFactory(object):
-    def __init__(self, cb1, cb2):
-        self._cb1 = cb1
-        self._cb2 = cb2
-    def __call__(self):
-        return AdjInnerDict(self._cb1, self._cb2)
-
-def nx_init(obj,
-            add_node_cb,
-            add_edge_cb,
-            del_node_cb,
-            del_edge_cb,
-            graph_data,
-            **attr):
-    """Init the object to be compatible with networkx's DiGraph.
-
-    Parameters
-    ----------
-    obj : any
-        The object to be init.
-    add_node_cb : callable
-        The callback function when node is added.
-    add_edge_cb : callable
-        The callback function when edge is added.
-    graph_data : graph data
-        Data to initialize graph. Same as networkx's semantics.
-    attr : keyword arguments, optional
-        Attributes to add to graph as key=value pairs.
-    """
-    # The following codes work for networkx 2.1.
-    obj.adjlist_outer_dict_factory = None
-    obj.adjlist_inner_dict_factory = AdjInnerDictFactory(add_edge_cb, del_edge_cb)
-    obj.edge_attr_dict_factory = dict
-
-    obj.root_graph = obj
-    obj.graph = {}
-    obj._node = NodeDict(add_node_cb, del_node_cb)
-
-    obj._adj = AdjOuterDict(add_edge_cb, del_edge_cb)
-    obj._pred = dict()
-    obj._succ = obj._adj
-
-    if graph_data is not None:
-        convert.to_networkx_graph(graph_data, create_using=obj)
-    obj.graph.update(attr)
--- a/python/dgl/scheduler.py
+++ b/python/dgl/scheduler.py
@@ -3,20 +3,23 @@ from __future__ import absolute_import

 import numpy as np

-import dgl.backend as F
-import dgl.function.message as fmsg
-import dgl.function.reducer as fred
-import dgl.utils as utils
-from dgl.base import ALL
+from .base import ALL, DGLError
+from . import backend as F
+from .function import message as fmsg
+from .function import reducer as fred
+from . import utils
+from collections import defaultdict as ddict

-__all__ = ["degree_bucketing", "get_executor"]
+from ._ffi.function import _init_api

-def degree_bucketing(cached_graph, v):
+__all__ = ["degree_bucketing", "get_recv_executor", "get_executor"]
+
+def degree_bucketing(graph, v):
    """Create degree bucketing scheduling policy.

    Parameters
    ----------
-    cached_graph : dgl.cached_graph.CachedGraph
+    graph : dgl.graph_index.GraphIndex
        the graph
    v : dgl.utils.Index
        the nodes to gather messages
@@ -29,7 +32,7 @@ def degree_bucketing(cached_graph, v):
        list of node id buckets; nodes belong to the same bucket have
        the same degree
    """
-    degrees = F.asnumpy(cached_graph.in_degrees(v).totensor())
+    degrees = np.array(graph.in_degrees(v).tolist())
    unique_degrees = list(np.unique(degrees))
    v_np = np.array(v.tolist())
    v_bkt = []
@@ -39,9 +42,84 @@ def degree_bucketing(cached_graph, v):
    #print('degree-bucketing:', unique_degrees, [len(b) for b in v_bkt])
    return unique_degrees, v_bkt

+def _process_buckets(buckets):
+    """read bucketing auxiliary data"""
+    # get back results
+    degs = utils.toindex(buckets(0))
+    v = utils.toindex(buckets(1))
+    # TODO: convert directly from ndarary to python list?
+    v_section = buckets(2).asnumpy().tolist()
+    msg_ids = utils.toindex(buckets(3))
+    msg_section = buckets(4).asnumpy().tolist()
+
+    # split buckets
+    unique_v = v.tousertensor()
+    msg_ids = msg_ids.tousertensor()
+    dsts = F.unpack(unique_v, v_section)
+    msg_ids = F.unpack(msg_ids, msg_section)
+
+    # convert to utils.Index
+    unique_v = utils.toindex(unique_v)
+    dsts = [utils.toindex(dst) for dst in dsts]
+    msg_ids = [utils.toindex(msg_id) for msg_id in msg_ids]
+
+    return unique_v, degs, dsts, msg_ids
+
+def light_degree_bucketing(v):
+    """Return the bucketing by degree scheduling for destination nodes of messages
+
+    Parameters
+    ----------
+    v: utils.Index
+        destionation node for each message
+
+    Returns
+    -------
+    unique_v: utils.Index
+        unqiue destination nodes
+    degrees: utils.Index
+        A list of degree for each bucket
+    v_bkt: list of utils.Index
+        A list of node id buckets, nodes in each bucket have the same degree
+    msg_ids: list of utils.Index
+        A list of message id buckets, each node in the ith node id bucket has
+        degree[i] messages in the ith message id bucket
+    """
+    buckets = _CAPI_DGLDegreeBucketing(v.todgltensor())
+    return _process_buckets(buckets)
+
+def light_degree_bucketing_for_graph(graph):
+    """Return the bucketing by degree scheduling for the entire graph
+
+    Parameters:
+        graph: GraphIndex
+
+    Returns
+    -------
+    unique_v: utils.Index
+        unqiue destination nodes
+    degrees: utils.Index
+        A list of degree for each bucket
+    v_bkt: list of utils.Index
+        A list of node id buckets, nodes in each bucket have the same degree
+    msg_ids: list of utils.Index
+        A list of message id buckets, each node in the ith node id bucket has
+        degree[i] messages in the ith message id bucket
+    """
+    buckets = _CAPI_DGLDegreeBucketingFromGraph(self._handle)
+    return _process_buckets(buckets)
+

 class Executor(object):
+    """Base class for executing graph computation."""
+
    def run(self):
+        """Run this executor.
+
+        This should return the new node features.
+
+        TODO(minjie): extend this to support computation on edges.
+        """
        raise NotImplementedError

 class SPMVOperator(Executor):
@@ -56,10 +134,7 @@ class SPMVOperator(Executor):

    def run(self):
        # get src col
-        if self.src_field is None:
-            srccol = self.node_repr
-        else:
-            srccol = self.node_repr[self.src_field]
+        srccol = self.node_repr[self.src_field]
        ctx = F.get_context(srccol)

        # build adjmat
@@ -72,10 +147,50 @@ class SPMVOperator(Executor):
            dstcol = F.squeeze(dstcol)
        else:
            dstcol = F.spmm(adjmat, srccol)
-        if self.dst_field is None:
-            return dstcol
+        return {self.dst_field : dstcol}
+
+
+# FIXME: refactorize in scheduler/executor redesign
+class DegreeBucketingExecutor(Executor):
+    def __init__(self, g, rfunc, message_frame, edges=None):
+        self.g = g
+        self.rfunc = rfunc
+        self.msg_frame = message_frame
+
+        # calc degree bucketing schedule
+        if edges is not None:
+            unique_v, degs, dsts, msg_ids = light_degree_bucketing(edges[1])
        else:
-            return {self.dst_field : dstcol}
+            unique_v, degs, dsts, msg_ids = light_degree_bucketing_for_graph(g._graph)
+        self._recv_nodes = unique_v
+        self.degrees = degs
+        self.dsts = dsts
+        self.msg_ids = msg_ids
+
+    @property
+    def recv_nodes(self):
+        return self._recv_nodes
+
+    def run(self):
+        new_reprs = []
+        # loop over each bucket
+        # FIXME (lingfan): handle zero-degree case
+        for deg, vv, msg_id in zip(self.degrees, self.dsts, self.msg_ids):
+            dst_reprs = self.g.get_n_repr(vv)
+            in_msgs = self.msg_frame.select_rows(msg_id)
+            def _reshape_fn(msg):
+                msg_shape = F.shape(msg)
+                new_shape = (len(vv), deg) + msg_shape[1:]
+                return F.reshape(msg, new_shape)
+            reshaped_in_msgs = utils.LazyDict(
+                    lambda key: _reshape_fn(in_msgs[key]), self.msg_frame.schemes)
+            new_reprs.append(self.rfunc(dst_reprs, reshaped_in_msgs))
+
+        # Pack all reducer results together
+        keys = new_reprs[0].keys()
+        new_reprs = {key : F.pack([repr[key] for repr in new_reprs])
+                     for key in keys}
+        return new_reprs


 class BasicExecutor(Executor):
@@ -92,7 +207,7 @@ class BasicExecutor(Executor):
        raise NotImplementedError

    @property
-    def graph_mapping(self):
+    def recv_nodes(self):
        raise NotImplementedError

    def _build_exec(self, mfunc, rfunc):
@@ -115,8 +230,7 @@ class BasicExecutor(Executor):
        return exe

    def run(self):
-        attr = self.exe.run()
-        self.g.set_n_repr(attr, self.graph_mapping)
+        return self.exe.run()


 class UpdateAllExecutor(BasicExecutor):
@@ -129,13 +243,7 @@ class UpdateAllExecutor(BasicExecutor):
        self._edge_repr = None
        self._graph_idx = None
        self._graph_shape = None
-        self._graph_mapping = None
-
-    @property
-    def graph_idx(self):
-        if self._graph_idx is None:
-            self._graph_idx = self.g.cached_graph.adjmat()
-        return self._graph_idx
+        self._recv_nodes = None

    @property
    def graph_shape(self):
@@ -145,7 +253,7 @@ class UpdateAllExecutor(BasicExecutor):
        return self._graph_shape

    @property
-    def graph_mapping(self):
+    def recv_nodes(self):
        return ALL

    @property
@@ -162,16 +270,13 @@ class UpdateAllExecutor(BasicExecutor):

    def _adj_build_fn(self, edge_field, ctx, use_edge_feat):
        if use_edge_feat:
-            if edge_field is None:
-                dat = self.edge_repr
-            else:
-                dat = self.edge_repr[edge_field]
+            dat = self.edge_repr[edge_field]
            dat = F.squeeze(dat)
            # TODO(minjie): should not directly use _indices
-            idx = self.graph_idx.get(ctx)._indices()
+            idx = self.g.adjacency_matrix(ctx)._indices()
            adjmat = F.sparse_tensor(idx, dat, self.graph_shape)
        else:
-            adjmat = self.graph_idx.get(ctx)
+            adjmat = self.g.adjacency_matrix(ctx)
        return adjmat


@@ -186,7 +291,7 @@ class SendRecvExecutor(BasicExecutor):
        self._edge_repr = None
        self._graph_idx = None
        self._graph_shape = None
-        self._graph_mapping = None
+        self._recv_nodes = None

    @property
    def graph_idx(self):
@@ -201,10 +306,10 @@ class SendRecvExecutor(BasicExecutor):
        return self._graph_shape

    @property
-    def graph_mapping(self):
-        if self._graph_mapping is None:
+    def recv_nodes(self):
+        if self._recv_nodes is None:
            self._build_adjmat()
-        return self._graph_mapping
+        return self._recv_nodes

    @property
    def node_repr(self):
@@ -221,22 +326,19 @@ class SendRecvExecutor(BasicExecutor):
    def _build_adjmat(self):
        # handle graph index
        new2old, old2new = utils.build_relabel_map(self.v)
-        u = self.u.totensor()
-        v = self.v.totensor()
+        u = self.u.tousertensor()
+        v = self.v.tousertensor()
        # TODO(minjie): should not directly use []
        new_v = old2new[v]
        n = self.g.number_of_nodes()
        m = len(new2old)
        self._graph_idx = F.pack([F.unsqueeze(new_v, 0), F.unsqueeze(u, 0)])
        self._graph_shape = [m, n]
-        self._graph_mapping = new2old
+        self._recv_nodes = new2old

    def _adj_build_fn(self, edge_field, ctx, use_edge_feat):
        if use_edge_feat:
-            if edge_field is None:
-                dat = self.edge_repr
-            else:
-                dat = self.edge_repr[edge_field]
+            dat = self.edge_repr[edge_field]
            dat = F.squeeze(dat)
        else:
            dat = F.ones((len(self.u), ))
@@ -268,9 +370,8 @@ class BundledExecutor(BasicExecutor):
        func_pairs = []
        for rfn in rfunc.fn_list:
            mfn = out2mfunc.get(rfn.msg_field, None)
-            # field check
-            assert mfn is not None, \
-                    "cannot find message func for reduce func in-field {}".format(rfn.msg_field)
+            if mfn is None:
+                raise DGLError('Cannot find message field "%s".' % rfn.msg_field)
            func_pairs.append((mfn, rfn))
        return func_pairs

@@ -283,7 +384,7 @@ class BundledExecutor(BasicExecutor):
            else:
                # attr and res must be dict
                attr.update(res)
-        self.g.set_n_repr(attr, self.graph_mapping)
+        return attr


 class BundledUpdateAllExecutor(BundledExecutor, UpdateAllExecutor):
@@ -291,13 +392,20 @@ class BundledUpdateAllExecutor(BundledExecutor, UpdateAllExecutor):
        self._init_state()
        BundledExecutor.__init__(self, graph, mfunc, rfunc)

-
 class BundledSendRecvExecutor(BundledExecutor, SendRecvExecutor):
    def __init__(self, graph, src, dst, mfunc, rfunc):
        self._init_state(src, dst)
        BundledExecutor.__init__(self, graph, mfunc, rfunc)

 def _is_spmv_supported(fn, graph=None):
+    # FIXME: also take into account
+    # (1) which backend DGL is under.
+    # (2) whether the graph is a multigraph.
+    #
+    # Current SPMV optimizer assumes that duplicate entries are summed up
+    # in sparse matrices, which is the case for PyTorch but not MXNet.
+    # The result is that on multigraphs, SPMV can still work for reducer=sum
+    # and message=copy_src/src_mul_edge *only in PyTorch*.
    if isinstance(fn, fmsg.MessageFunction):
        return fn.is_spmv_supported(graph)
    elif isinstance(fn, fred.ReduceFunction):
@@ -342,3 +450,24 @@ def get_executor(call_type, graph, **kwargs):
        return _create_send_and_recv_exec(graph, **kwargs)
    else:
        return None
+
+def get_recv_executor(graph, reduce_func, message_frame, edges=None):
+    """Create executor for recv phase
+
+    Parameters
+    ----------
+    graph: DGLGraph
+        DGLGraph on which to perform recv
+    reduce_func: callable
+        The reduce function
+    message_frame: FrameRef
+        Message frame
+    edges: tuple/list of utils.Index
+        src and dst Index representing edges along which messages are sent
+        If not specified, all edges of graph are used instead
+    """
+
+    # FIXME: handle builtin spmv executor case
+    return DegreeBucketingExecutor(graph, reduce_func, message_frame, edges)
+
+_init_api("dgl.scheduler")
--- a/python/dgl/subgraph.py
+++ b/python/dgl/subgraph.py
-"""DGLSubGraph"""
+"""Class for subgraph data structure."""
 from __future__ import absolute_import

 import networkx as nx
-import dgl.backend as F
-from dgl.frame import Frame, FrameRef
-from dgl.graph import DGLGraph
-from dgl.nx_adapt import nx_init
-import dgl.utils as utils
+
+from . import backend as F
+from .frame import Frame, FrameRef
+from .graph import DGLGraph
+from . import utils

 class DGLSubGraph(DGLGraph):
-    # TODO(gaiyu): ReadOnlyGraph
-    def __init__(self,
-                 parent,
-                 nodes):
-        super(DGLSubGraph, self).__init__()
-        # relabel nodes
-        self._node_mapping = utils.build_relabel_dict(nodes)
-        self._parent_nid = utils.toindex(nodes)
-        eids = []
-        # create subgraph
-        for eid, (u, v) in enumerate(parent.edge_list):
-            if u in self._node_mapping and v in self._node_mapping:
-                self.add_edge(self._node_mapping[u],
-                              self._node_mapping[v])
-                eids.append(eid)
-        self._parent_eid = utils.toindex(eids)
-
-    def copy_from(self, parent):
-        """Copy node/edge features from the parent graph.
+    """The subgraph class.

-        All old features will be removed.
+    There are two subgraph modes: shared and non-shared.
+
+    For the "non-shared" mode, the user needs to explicitly call
+    ``copy_from_parent`` to copy node/edge features from its parent graph.
+    * If the user tries to get node/edge features before ``copy_from_parent``,
+      s/he will get nothing.
+    * If the subgraph already has its own node/edge features, ``copy_from_parent``
+      will override them.
+    * Any update on the subgraph's node/edge features will not be seen
+      by the parent graph. As such, the memory consumption is of the order
+      of the subgraph size.
+    * To write the subgraph's node/edge features back to parent graph. There are two options:
+      (1) Use ``copy_to_parent`` API to write node/edge features back.
+      (2) [TODO] Use ``dgl.merge`` to merge multiple subgraphs back to one parent.
+
+    The "shared" mode is currently not supported.
+
+    The subgraph is read-only so mutation is not allowed.
+
+    Parameters
+    ----------
+    parent : DGLGraph
+        The parent graph
+    parent_nid : utils.Index
+        The induced parent node ids in this subgraph.
+    parent_eid : utils.Index
+        The induced parent edge ids in this subgraph.
+    graph_idx : GraphIndex
+        The graph index.
+    shared : bool, optional
+        Whether the subgraph shares node/edge features with the parent graph.
+    """
+    def __init__(self, parent, parent_nid, parent_eid, graph_idx, shared=False):
+        super(DGLSubGraph, self).__init__(graph_data=graph_idx)
+        self._parent = parent
+        self._parent_nid = parent_nid
+        self._parent_eid = parent_eid
+
+    # override APIs
+    def add_nodes(self, num, reprs=None):
+        """Add nodes. Disabled because BatchedDGLGraph is read-only."""
+        raise RuntimeError('Readonly graph. Mutation is not allowed.')
+
+    def add_edge(self, u, v, reprs=None):
+        """Add one edge. Disabled because BatchedDGLGraph is read-only."""
+        raise RuntimeError('Readonly graph. Mutation is not allowed.')
+
+    def add_edges(self, u, v, reprs=None):
+        """Add many edges. Disabled because BatchedDGLGraph is read-only."""
+        raise RuntimeError('Readonly graph. Mutation is not allowed.')
+
+    @property
+    def parent_nid(self):
+        """Get the parent node ids.
+
+        The returned tensor can be used as a map from the node id
+        in this subgraph to the node id in the parent graph.
+
+        Returns
+        -------
+        Tensor
+            The parent node id array.
+        """
+        return self._parent_nid.tousertensor()
+
+    @property
+    def parent_eid(self):
+        """Get the parent edge ids.
+
+        The returned tensor can be used as a map from the edge id
+        in this subgraph to the edge id in the parent graph.
+
+        Returns
+        -------
+        Tensor
+            The parent edge id array.
+        """
+        return self._parent_eid.tousertensor()
+
+    def copy_to_parent(self, inplace=False):
+        """Write node/edge features to the parent graph.

        Parameters
        ----------
-        parent : DGLGraph
-            The parent graph to copy from.
+        inplace : bool
+            If true, use inplace write (no gradient but faster)
+        """
+        self._parent._node_frame.update_rows(
+                self._parent_nid, self._node_frame, inplace=inplace)
+        self._parent._edge_frame.update_rows(
+                self._parent_eid, self._edge_frame, inplace=inplace)
+
+    def copy_from_parent(self):
+        """Copy node/edge features from the parent graph.
+
+        All old features will be removed.
        """
-        if parent._node_frame.num_rows != 0:
-            self._node_frame = FrameRef(Frame(parent._node_frame[self._parent_nid]))
-        if parent._edge_frame.num_rows != 0:
-            self._edge_frame = FrameRef(Frame(parent._edge_frame[self._parent_eid]))
+        if self._parent._node_frame.num_rows != 0:
+            self._node_frame = FrameRef(Frame(
+                self._parent._node_frame[self._parent_nid]))
+        if self._parent._edge_frame.num_rows != 0:
+            self._edge_frame = FrameRef(Frame(
+                self._parent._edge_frame[self._parent_eid]))
--- a/python/dgl/utils.py
+++ b/python/dgl/utils.py
@@ -5,50 +5,70 @@ from collections import Mapping
 from functools import wraps
 import numpy as np

-import dgl.backend as F
-from dgl.backend import Tensor, SparseTensor
-
-def is_id_tensor(u):
-    """Return whether the input is a supported id tensor."""
-    return isinstance(u, Tensor) and F.isinteger(u) and len(F.shape(u)) == 1
-
-def is_id_container(u):
-    """Return whether the input is a supported id container."""
-    return (getattr(u, '__iter__', None) is not None
-            and getattr(u, '__len__', None) is not None)
+from . import backend as F
+from .backend import Tensor, SparseTensor
+from . import ndarray as nd

 class Index(object):
    """Index class that can be easily converted to list/tensor."""
    def __init__(self, data):
-        self._list_data = None
-        self._tensor_data = None
-        self._ctx_data = dict()
+        self._list_data = None  # a numpy type data
+        self._user_tensor_data = dict()  # dictionary of user tensors
+        self._dgl_tensor_data = None  # a dgl ndarray
        self._dispatch(data)

    def _dispatch(self, data):
-        if is_id_tensor(data):
-            self._tensor_data = data
-        elif is_id_container(data):
-            self._list_data = data
+        """Store data based on its type."""
+        if isinstance(data, Tensor):
+            if not (F.dtype(data) == F.int64 and len(F.shape(data)) == 1):
+                raise ValueError('Index data must be 1D int64 vector, but got: %s' % str(data))
+            self._user_tensor_data[F.get_context(data)] = data
+        elif isinstance(data, nd.NDArray):
+            if not (data.dtype == 'int64' and len(data.shape) == 1):
+                raise ValueError('Index data must be 1D int64 vector, but got: %s' % str(data))
+            self._dgl_tensor_data = data
        else:
            try:
-                self._list_data = [int(data)]
+                self._list_data = np.array([int(data)]).astype(np.int64)
            except:
-                raise TypeError('Error index data: %s' % str(x))
+                try:
+                    self._list_data = np.array(data).astype(np.int64)
+                except:
+                    raise ValueError('Error index data: %s' % str(data))
+            self._user_tensor_data[nd.cpu()] = F.zerocopy_from_numpy(self._list_data)

    def tolist(self):
+        """Convert to a python-list compatible object."""
        if self._list_data is None:
-            self._list_data = list(F.asnumpy(self._tensor_data))
+            if self._dgl_tensor_data is not None:
+                self._list_data = self._dgl_tensor_data.asnumpy()
+            else:
+                data = self.tousertensor()
+                self._list_data = F.zerocopy_to_numpy(data)
        return self._list_data

-    def totensor(self, ctx=None):
-        if self._tensor_data is None:
-            self._tensor_data = F.tensor(self._list_data, dtype=F.int64)
+    def tousertensor(self, ctx=None):
+        """Convert to user tensor (defined in `backend`)."""
        if ctx is None:
-            return self._tensor_data
-        if ctx not in self._ctx_data:
-            self._ctx_data[ctx] = F.to_context(self._tensor_data, ctx)
-        return self._ctx_data[ctx]
+            ctx = nd.cpu()
+        if len(self._user_tensor_data) == 0:
+            # zero copy from dgl tensor
+            dl = self._dgl_tensor_data.to_dlpack()
+            self._user_tensor_data[nd.cpu()] = F.zerocopy_from_dlpack(dl)
+        if ctx not in self._user_tensor_data:
+            # copy from cpu to another device
+            data = next(iter(self._user_tensor_data.values()))
+            self._user_tensor_data[ctx] = F.to_context(data, ctx)
+        return self._user_tensor_data[ctx]
+
+    def todgltensor(self):
+        """Convert to dgl.NDArray."""
+        if self._dgl_tensor_data is None:
+            # zero copy from user tensor
+            tsor = self.tousertensor()
+            dl = F.zerocopy_to_dlpack(tsor)
+            self._dgl_tensor_data = nd.from_dlpack(dl)
+        return self._dgl_tensor_data

    def __iter__(self):
        return iter(self.tolist())
@@ -56,8 +76,11 @@ class Index(object):
    def __len__(self):
        if self._list_data is not None:
            return len(self._list_data)
+        elif len(self._user_tensor_data) > 0:
+            data = next(iter(self._user_tensor_data.values()))
+            return len(data)
        else:
-            return len(self._tensor_data)
+            return len(self._dgl_tensor_data)

    def __getitem__(self, i):
        return self.tolist()[i]
@@ -118,40 +141,13 @@ def edge_broadcasting(u, v):
        The dst id(s) after broadcasting
    """
    if len(u) != len(v) and len(u) == 1:
-        u = toindex(F.broadcast_to(u.totensor(), v.totensor()))
+        u = toindex(F.broadcast_to(u.tousertensor(), v.tousertensor()))
    elif len(u) != len(v) and len(v) == 1:
-        v = toindex(F.broadcast_to(v.totensor(), u.totensor()))
+        v = toindex(F.broadcast_to(v.tousertensor(), u.tousertensor()))
    else:
        assert len(u) == len(v)
    return u, v

-'''
-def convert_to_id_container(x):
-    if is_id_container(x):
-        return x
-    elif is_id_tensor(x):
-        return F.asnumpy(x)
-    else:
-        try:
-            return [int(x)]
-        except:
-            raise TypeError('Error node: %s' % str(x))
-    return None
-
-def convert_to_id_tensor(x, ctx=None):
-    if is_id_container(x):
-        ret = F.tensor(x, dtype=F.int64)
-    elif is_id_tensor(x):
-        ret = x
-    else:
-        try:
-            ret = F.tensor([int(x)], dtype=F.int64)
-        except:
-            raise TypeError('Error node: %s' % str(x))
-    ret = F.to_context(ret, ctx)
-    return ret
-'''
-
 class LazyDict(Mapping):
    """A readonly dictionary that does not materialize the storage."""
    def __init__(self, fn, keys):
@@ -172,6 +168,34 @@ class LazyDict(Mapping):
    def __len__(self):
        return len(self._keys)

+class HybridDict(Mapping):
+    """A readonly dictonary that merges several dict-like (python dict, LazyDict).
+       If there are duplicate keys, early keys have priority over latter ones
+    """
+    def __init__(self, *dict_like_list):
+        self._dict_like_list = dict_like_list
+        self._keys = None
+
+    def keys(self):
+        if self._keys is None:
+            self._keys = sum([set(d.keys()) for d in self._dict_like_list], set())
+            self._keys = list(self._keys)
+        return self._keys
+
+    def __getitem__(self, key):
+        for d in self._dict_like_list:
+            if key in d:
+                return d[key]
+
+    def __contains__(self, key):
+        return key in self.keys()
+
+    def __iter__(self):
+        return iter(self.keys())
+
+    def __len__(self):
+        return len(self.keys())
+
 class ReadOnlyDict(Mapping):
    """A readonly dictionary wrapper."""
    def __init__(self, dict_like):
@@ -209,7 +233,7 @@ def build_relabel_map(x):
      One can use advanced indexing to convert an old id tensor to a
      new id tensor: new_id = old_to_new[old_id]
    """
-    x = x.totensor()
+    x = x.tousertensor()
    unique_x, _ = F.sort(F.unique(x))
    map_len = int(F.max(unique_x)) + 1
    old_to_new = F.zeros(map_len, dtype=F.int64)
@@ -316,6 +340,6 @@ def reorder(dict_like, index):
    """
    new_dict = {}
    for key, val in dict_like.items():
-        idx_ctx = index.totensor(F.get_context(val))
+        idx_ctx = index.tousertensor(F.get_context(val))
        new_dict[key] = F.gather_row(val, idx_ctx)
    return new_dict
--- a/python/setup.py
+++ b/python/setup.py
@@ -17,7 +17,6 @@ setuptools.setup(
        'numpy>=1.14.0',
        'scipy>=1.1.0',
        'networkx>=2.1',
-        'python-igraph>=0.7.0',
    ],
    data_files=[('', ['VERSION'])],
-    url='https://github.com/jermainewang/dgl-1')
+    url='https://github.com/jermainewang/dgl')
--- a/src/c_api_common.cc
+++ b/src/c_api_common.cc
+/*!
+ *  Copyright (c) 2018 by Contributors
+ * \file c_runtime_api.cc
+ * \brief DGL C API common implementations
+ */
+#include "c_api_common.h"
+
+using tvm::runtime::TVMArgs;
+using tvm::runtime::TVMArgValue;
+using tvm::runtime::TVMRetValue;
+using tvm::runtime::PackedFunc;
+using tvm::runtime::NDArray;
+
+namespace dgl {
+
+DLManagedTensor* CreateTmpDLManagedTensor(const TVMArgValue& arg) {
+  const DLTensor* dl_tensor = arg;
+  DLManagedTensor* ret = new DLManagedTensor();
+  ret->deleter = [] (DLManagedTensor* self) { delete self; };
+  ret->manager_ctx = nullptr;
+  ret->dl_tensor = *dl_tensor;
+  return ret;
+}
+
+PackedFunc ConvertNDArrayVectorToPackedFunc(const std::vector<NDArray>& vec) {
+    auto body = [vec](TVMArgs args, TVMRetValue* rv) {
+        size_t which = args[0];
+        if (which >= vec.size()) {
+            LOG(FATAL) << "invalid choice";
+        } else {
+            *rv = std::move(vec[which]);
+        }
+    };
+    return PackedFunc(body);
+}
+
+}  // namespace dgl
+
--- a/src/c_api_common.h
+++ b/src/c_api_common.h
+/*!
+ *  Copyright (c) 2018 by Contributors
+ * \file c_api_common.h
+ * \brief DGL C API common util functions
+ */
+#ifndef DGL_C_API_COMMON_H_
+#define DGL_C_API_COMMON_H_
+
+#include <dgl/runtime/ndarray.h>
+#include <dgl/runtime/packed_func.h>
+#include <dgl/runtime/registry.h>
+#include <vector>
+
+namespace dgl {
+
+// Graph handler type
+typedef void* GraphHandle;
+
+/*!
+ * \brief Convert the given DLTensor to DLManagedTensor.
+ *
+ * Return a temporary DLManagedTensor that does not own memory.
+ */
+DLManagedTensor* CreateTmpDLManagedTensor(
+    const tvm::runtime::TVMArgValue& arg);
+
+/*!
+ * \brief Convert a vector of NDArray to PackedFunc.
+ */
+tvm::runtime::PackedFunc ConvertNDArrayVectorToPackedFunc(
+    const std::vector<tvm::runtime::NDArray>& vec);
+
+}  // namespace dgl
+
+#endif  // DGL_C_API_COMMON_H_
--- a/src/graph/graph.cc
+++ b/src/graph/graph.cc
+/*!
+ *  Copyright (c) 2018 by Contributors
+ * \file graph/graph.cc
+ * \brief DGL graph index implementation
+ */
+#include <dgl/graph.h>
+#include <algorithm>
+#include <unordered_map>
+#include <set>
+#include <functional>
+
+namespace dgl {
+namespace {
+inline bool IsValidIdArray(const IdArray& arr) {
+  return arr->ctx.device_type == kDLCPU && arr->ndim == 1
+    && arr->dtype.code == kDLInt && arr->dtype.bits == 64;
+}
+}  // namespace
+
+void Graph::AddVertices(uint64_t num_vertices) {
+  CHECK(!read_only_) << "Graph is read-only. Mutations are not allowed.";
+  adjlist_.resize(adjlist_.size() + num_vertices);
+  reverse_adjlist_.resize(reverse_adjlist_.size() + num_vertices);
+}
+
+void Graph::AddEdge(dgl_id_t src, dgl_id_t dst) {
+  CHECK(!read_only_) << "Graph is read-only. Mutations are not allowed.";
+  CHECK(HasVertex(src) && HasVertex(dst))
+    << "Invalid vertices: src=" << src << " dst=" << dst;
+
+  dgl_id_t eid = num_edges_++;
+
+  adjlist_[src].succ.push_back(dst);
+  adjlist_[src].edge_id.push_back(eid);
+  reverse_adjlist_[dst].succ.push_back(src);
+  reverse_adjlist_[dst].edge_id.push_back(eid);
+
+  all_edges_src_.push_back(src);
+  all_edges_dst_.push_back(dst);
+}
+
+void Graph::AddEdges(IdArray src_ids, IdArray dst_ids) {
+  CHECK(!read_only_) << "Graph is read-only. Mutations are not allowed.";
+  CHECK(IsValidIdArray(src_ids)) << "Invalid src id array.";
+  CHECK(IsValidIdArray(dst_ids)) << "Invalid dst id array.";
+  const auto srclen = src_ids->shape[0];
+  const auto dstlen = dst_ids->shape[0];
+  const int64_t* src_data = static_cast<int64_t*>(src_ids->data);
+  const int64_t* dst_data = static_cast<int64_t*>(dst_ids->data);
+  if (srclen == 1) {
+    // one-many
+    for (int64_t i = 0; i < dstlen; ++i) {
+      AddEdge(src_data[0], dst_data[i]);
+    }
+  } else if (dstlen == 1) {
+    // many-one
+    for (int64_t i = 0; i < srclen; ++i) {
+      AddEdge(src_data[i], dst_data[0]);
+    }
+  } else {
+    // many-many
+    CHECK(srclen == dstlen) << "Invalid src and dst id array.";
+    for (int64_t i = 0; i < srclen; ++i) {
+      AddEdge(src_data[i], dst_data[i]);
+    }
+  }
+}
+
+BoolArray Graph::HasVertices(IdArray vids) const {
+  CHECK(IsValidIdArray(vids)) << "Invalid vertex id array.";
+  const auto len = vids->shape[0];
+  BoolArray rst = BoolArray::Empty({len}, vids->dtype, vids->ctx);
+  const int64_t* vid_data = static_cast<int64_t*>(vids->data);
+  int64_t* rst_data = static_cast<int64_t*>(rst->data);
+  const int64_t nverts = NumVertices();
+  for (int64_t i = 0; i < len; ++i) {
+    rst_data[i] = (vid_data[i] < nverts)? 1 : 0;
+  }
+  return rst;
+}
+
+// O(E)
+bool Graph::HasEdgeBetween(dgl_id_t src, dgl_id_t dst) const {
+  if (!HasVertex(src) || !HasVertex(dst)) return false;
+  const auto& succ = adjlist_[src].succ;
+  return std::find(succ.begin(), succ.end(), dst) != succ.end();
+}
+
+// O(E*k) pretty slow
+BoolArray Graph::HasEdgesBetween(IdArray src_ids, IdArray dst_ids) const {
+  CHECK(IsValidIdArray(src_ids)) << "Invalid src id array.";
+  CHECK(IsValidIdArray(dst_ids)) << "Invalid dst id array.";
+  const auto srclen = src_ids->shape[0];
+  const auto dstlen = dst_ids->shape[0];
+  const auto rstlen = std::max(srclen, dstlen);
+  BoolArray rst = BoolArray::Empty({rstlen}, src_ids->dtype, src_ids->ctx);
+  int64_t* rst_data = static_cast<int64_t*>(rst->data);
+  const int64_t* src_data = static_cast<int64_t*>(src_ids->data);
+  const int64_t* dst_data = static_cast<int64_t*>(dst_ids->data);
+  if (srclen == 1) {
+    // one-many
+    for (int64_t i = 0; i < dstlen; ++i) {
+      rst_data[i] = HasEdgeBetween(src_data[0], dst_data[i])? 1 : 0;
+    }
+  } else if (dstlen == 1) {
+    // many-one
+    for (int64_t i = 0; i < srclen; ++i) {
+      rst_data[i] = HasEdgeBetween(src_data[i], dst_data[0])? 1 : 0;
+    }
+  } else {
+    // many-many
+    CHECK(srclen == dstlen) << "Invalid src and dst id array.";
+    for (int64_t i = 0; i < srclen; ++i) {
+      rst_data[i] = HasEdgeBetween(src_data[i], dst_data[i])? 1 : 0;
+    }
+  }
+  return rst;
+}
+
+// The data is copy-out; support zero-copy?
+IdArray Graph::Predecessors(dgl_id_t vid, uint64_t radius) const {
+  CHECK(HasVertex(vid)) << "invalid vertex: " << vid;
+  CHECK(radius >= 1) << "invalid radius: " << radius;
+  std::set<dgl_id_t> vset;
+
+  for (auto& it : reverse_adjlist_[vid].succ)
+    vset.insert(it);
+
+  const int64_t len = vset.size();
+  IdArray rst = IdArray::Empty({len}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
+  int64_t* rst_data = static_cast<int64_t*>(rst->data);
+
+  std::copy(vset.begin(), vset.end(), rst_data);
+  return rst;
+}
+
+// The data is copy-out; support zero-copy?
+IdArray Graph::Successors(dgl_id_t vid, uint64_t radius) const {
+  CHECK(HasVertex(vid)) << "invalid vertex: " << vid;
+  CHECK(radius >= 1) << "invalid radius: " << radius;
+  std::set<dgl_id_t> vset;
+
+  for (auto& it : adjlist_[vid].succ)
+    vset.insert(it);
+
+  const int64_t len = vset.size();
+  IdArray rst = IdArray::Empty({len}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
+  int64_t* rst_data = static_cast<int64_t*>(rst->data);
+
+  std::copy(vset.begin(), vset.end(), rst_data);
+  return rst;
+}
+
+// O(E)
+IdArray Graph::EdgeId(dgl_id_t src, dgl_id_t dst) const {
+  CHECK(HasVertex(src) && HasVertex(dst)) << "invalid edge: " << src << " -> " << dst;
+
+  const auto& succ = adjlist_[src].succ;
+  std::vector<dgl_id_t> edgelist;
+
+  for (size_t i = 0; i < succ.size(); ++i) {
+    if (succ[i] == dst)
+      edgelist.push_back(adjlist_[src].edge_id[i]);
+  }
+
+  // FIXME: signed?  Also it seems that we are using int64_t everywhere...
+  const int64_t len = edgelist.size();
+  IdArray rst = IdArray::Empty({len}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
+  // FIXME: signed?
+  int64_t* rst_data = static_cast<int64_t*>(rst->data);
+
+  std::copy(edgelist.begin(), edgelist.end(), rst_data);
+
+  return rst;
+}
+
+// O(E*k) pretty slow
+Graph::EdgeArray Graph::EdgeIds(IdArray src_ids, IdArray dst_ids) const {
+  CHECK(IsValidIdArray(src_ids)) << "Invalid src id array.";
+  CHECK(IsValidIdArray(dst_ids)) << "Invalid dst id array.";
+  const auto srclen = src_ids->shape[0];
+  const auto dstlen = dst_ids->shape[0];
+  int64_t i, j;
+
+  CHECK((srclen == dstlen) || (srclen == 1) || (dstlen == 1))
+    << "Invalid src and dst id array.";
+
+  const int64_t src_stride = (srclen == 1 && dstlen != 1) ? 0 : 1;
+  const int64_t dst_stride = (dstlen == 1 && srclen != 1) ? 0 : 1;
+  const int64_t* src_data = static_cast<int64_t*>(src_ids->data);
+  const int64_t* dst_data = static_cast<int64_t*>(dst_ids->data);
+
+  std::vector<dgl_id_t> src, dst, eid;
+
+  for (i = 0, j = 0; i < srclen && j < dstlen; i += src_stride, j += dst_stride) {
+    const dgl_id_t src_id = src_data[i], dst_id = dst_data[j];
+    const auto& succ = adjlist_[src_id].succ;
+    for (size_t k = 0; k < succ.size(); ++k) {
+      if (succ[k] == dst_id) {
+        src.push_back(src_id);
+        dst.push_back(dst_id);
+        eid.push_back(adjlist_[src_id].edge_id[k]);
+      }
+    }
+  }
+
+  int64_t rstlen = src.size();
+  IdArray rst_src = IdArray::Empty({rstlen}, src_ids->dtype, src_ids->ctx);
+  IdArray rst_dst = IdArray::Empty({rstlen}, src_ids->dtype, src_ids->ctx);
+  IdArray rst_eid = IdArray::Empty({rstlen}, src_ids->dtype, src_ids->ctx);
+  int64_t* rst_src_data = static_cast<int64_t*>(rst_src->data);
+  int64_t* rst_dst_data = static_cast<int64_t*>(rst_dst->data);
+  int64_t* rst_eid_data = static_cast<int64_t*>(rst_eid->data);
+
+  std::copy(src.begin(), src.end(), rst_src_data);
+  std::copy(dst.begin(), dst.end(), rst_dst_data);
+  std::copy(eid.begin(), eid.end(), rst_eid_data);
+
+  return EdgeArray{rst_src, rst_dst, rst_eid};
+}
+
+Graph::EdgeArray Graph::FindEdges(IdArray eids) const {
+  int64_t len = eids->shape[0];
+
+  IdArray rst_src = IdArray::Empty({len}, eids->dtype, eids->ctx);
+  IdArray rst_dst = IdArray::Empty({len}, eids->dtype, eids->ctx);
+  IdArray rst_eid = IdArray::Empty({len}, eids->dtype, eids->ctx);
+  int64_t* eid_data = static_cast<int64_t*>(eids->data);
+  int64_t* rst_src_data = static_cast<int64_t*>(rst_src->data);
+  int64_t* rst_dst_data = static_cast<int64_t*>(rst_dst->data);
+  int64_t* rst_eid_data = static_cast<int64_t*>(rst_eid->data);
+
+  for (uint64_t i = 0; i < (uint64_t)len; ++i) {
+    dgl_id_t eid = eid_data[i];
+    if (eid >= num_edges_)
+      LOG(FATAL) << "invalid edge id:" << eid;
+
+    rst_src_data[i] = all_edges_src_[eid];
+    rst_dst_data[i] = all_edges_dst_[eid];
+    rst_eid_data[i] = eid;
+  }
+
+  return EdgeArray{rst_src, rst_dst, rst_eid};
+}
+
+// O(E)
+Graph::EdgeArray Graph::InEdges(dgl_id_t vid) const {
+  CHECK(HasVertex(vid)) << "invalid vertex: " << vid;
+  const int64_t len = reverse_adjlist_[vid].succ.size();
+  IdArray src = IdArray::Empty({len}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
+  IdArray dst = IdArray::Empty({len}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
+  IdArray eid = IdArray::Empty({len}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
+  int64_t* src_data = static_cast<int64_t*>(src->data);
+  int64_t* dst_data = static_cast<int64_t*>(dst->data);
+  int64_t* eid_data = static_cast<int64_t*>(eid->data);
+  for (int64_t i = 0; i < len; ++i) {
+    src_data[i] = reverse_adjlist_[vid].succ[i];
+    eid_data[i] = reverse_adjlist_[vid].edge_id[i];
+  }
+  std::fill(dst_data, dst_data + len, vid);
+  return EdgeArray{src, dst, eid};
+}
+
+// O(E)
+Graph::EdgeArray Graph::InEdges(IdArray vids) const {
+  CHECK(IsValidIdArray(vids)) << "Invalid vertex id array.";
+  const auto len = vids->shape[0];
+  const int64_t* vid_data = static_cast<int64_t*>(vids->data);
+  int64_t rstlen = 0;
+  for (int64_t i = 0; i < len; ++i) {
+    CHECK(HasVertex(vid_data[i])) << "Invalid vertex: " << vid_data[i];
+    rstlen += reverse_adjlist_[vid_data[i]].succ.size();
+  }
+  IdArray src = IdArray::Empty({rstlen}, vids->dtype, vids->ctx);
+  IdArray dst = IdArray::Empty({rstlen}, vids->dtype, vids->ctx);
+  IdArray eid = IdArray::Empty({rstlen}, vids->dtype, vids->ctx);
+  int64_t* src_ptr = static_cast<int64_t*>(src->data);
+  int64_t* dst_ptr = static_cast<int64_t*>(dst->data);
+  int64_t* eid_ptr = static_cast<int64_t*>(eid->data);
+  for (int64_t i = 0; i < len; ++i) {
+    const auto& pred = reverse_adjlist_[vid_data[i]].succ;
+    const auto& eids = reverse_adjlist_[vid_data[i]].edge_id;
+    for (size_t j = 0; j < pred.size(); ++j) {
+      *(src_ptr++) = pred[j];
+      *(dst_ptr++) = vid_data[i];
+      *(eid_ptr++) = eids[j];
+    }
+  }
+  return EdgeArray{src, dst, eid};
+}
+
+// O(E)
+Graph::EdgeArray Graph::OutEdges(dgl_id_t vid) const {
+  CHECK(HasVertex(vid)) << "invalid vertex: " << vid;
+  const int64_t len = adjlist_[vid].succ.size();
+  IdArray src = IdArray::Empty({len}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
+  IdArray dst = IdArray::Empty({len}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
+  IdArray eid = IdArray::Empty({len}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
+  int64_t* src_data = static_cast<int64_t*>(src->data);
+  int64_t* dst_data = static_cast<int64_t*>(dst->data);
+  int64_t* eid_data = static_cast<int64_t*>(eid->data);
+  for (int64_t i = 0; i < len; ++i) {
+    dst_data[i] = adjlist_[vid].succ[i];
+    eid_data[i] = adjlist_[vid].edge_id[i];
+  }
+  std::fill(src_data, src_data + len, vid);
+  return EdgeArray{src, dst, eid};
+}
+
+// O(E)
+Graph::EdgeArray Graph::OutEdges(IdArray vids) const {
+  CHECK(IsValidIdArray(vids)) << "Invalid vertex id array.";
+  const auto len = vids->shape[0];
+  const int64_t* vid_data = static_cast<int64_t*>(vids->data);
+  int64_t rstlen = 0;
+  for (int64_t i = 0; i < len; ++i) {
+    CHECK(HasVertex(vid_data[i])) << "Invalid vertex: " << vid_data[i];
+    rstlen += adjlist_[vid_data[i]].succ.size();
+  }
+  IdArray src = IdArray::Empty({rstlen}, vids->dtype, vids->ctx);
+  IdArray dst = IdArray::Empty({rstlen}, vids->dtype, vids->ctx);
+  IdArray eid = IdArray::Empty({rstlen}, vids->dtype, vids->ctx);
+  int64_t* src_ptr = static_cast<int64_t*>(src->data);
+  int64_t* dst_ptr = static_cast<int64_t*>(dst->data);
+  int64_t* eid_ptr = static_cast<int64_t*>(eid->data);
+  for (int64_t i = 0; i < len; ++i) {
+    const auto& succ = adjlist_[vid_data[i]].succ;
+    const auto& eids = adjlist_[vid_data[i]].edge_id;
+    for (size_t j = 0; j < succ.size(); ++j) {
+      *(src_ptr++) = vid_data[i];
+      *(dst_ptr++) = succ[j];
+      *(eid_ptr++) = eids[j];
+    }
+  }
+  return EdgeArray{src, dst, eid};
+}
+
+// O(E*log(E)) if sort is required; otherwise, O(E)
+Graph::EdgeArray Graph::Edges(bool sorted) const {
+  const int64_t len = num_edges_;
+  IdArray src = IdArray::Empty({len}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
+  IdArray dst = IdArray::Empty({len}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
+  IdArray eid = IdArray::Empty({len}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
+
+  if (sorted) {
+    typedef std::tuple<int64_t, int64_t, int64_t> Tuple;
+    std::vector<Tuple> tuples;
+    tuples.reserve(len);
+    for (uint64_t eid = 0; eid < num_edges_; ++eid) {
+      tuples.emplace_back(all_edges_src_[eid], all_edges_dst_[eid], eid);
+    }
+    // sort according to src and dst ids
+    std::sort(tuples.begin(), tuples.end(),
+        [] (const Tuple& t1, const Tuple& t2) {
+          return std::get<0>(t1) < std::get<0>(t2)
+            || (std::get<0>(t1) == std::get<0>(t2) && std::get<1>(t1) < std::get<1>(t2));
+        });
+
+    // make return arrays
+    int64_t* src_ptr = static_cast<int64_t*>(src->data);
+    int64_t* dst_ptr = static_cast<int64_t*>(dst->data);
+    int64_t* eid_ptr = static_cast<int64_t*>(eid->data);
+    for (size_t i = 0; i < tuples.size(); ++i) {
+      src_ptr[i] = std::get<0>(tuples[i]);
+      dst_ptr[i] = std::get<1>(tuples[i]);
+      eid_ptr[i] = std::get<2>(tuples[i]);
+    }
+  } else {
+    int64_t* src_ptr = static_cast<int64_t*>(src->data);
+    int64_t* dst_ptr = static_cast<int64_t*>(dst->data);
+    int64_t* eid_ptr = static_cast<int64_t*>(eid->data);
+    std::copy(all_edges_src_.begin(), all_edges_src_.end(), src_ptr);
+    std::copy(all_edges_dst_.begin(), all_edges_dst_.end(), dst_ptr);
+    for (uint64_t eid = 0; eid < num_edges_; ++eid) {
+      eid_ptr[eid] = eid;
+    }
+  }
+
+  return EdgeArray{src, dst, eid};
+}
+
+// O(V)
+DegreeArray Graph::InDegrees(IdArray vids) const {
+  CHECK(IsValidIdArray(vids)) << "Invalid vertex id array.";
+  const auto len = vids->shape[0];
+  const int64_t* vid_data = static_cast<int64_t*>(vids->data);
+  DegreeArray rst = DegreeArray::Empty({len}, vids->dtype, vids->ctx);
+  int64_t* rst_data = static_cast<int64_t*>(rst->data);
+  for (int64_t i = 0; i < len; ++i) {
+    const auto vid = vid_data[i];
+    CHECK(HasVertex(vid)) << "Invalid vertex: " << vid;
+    rst_data[i] = reverse_adjlist_[vid].succ.size();
+  }
+  return rst;
+}
+
+// O(V)
+DegreeArray Graph::OutDegrees(IdArray vids) const {
+  CHECK(IsValidIdArray(vids)) << "Invalid vertex id array.";
+  const auto len = vids->shape[0];
+  const int64_t* vid_data = static_cast<int64_t*>(vids->data);
+  DegreeArray rst = DegreeArray::Empty({len}, vids->dtype, vids->ctx);
+  int64_t* rst_data = static_cast<int64_t*>(rst->data);
+  for (int64_t i = 0; i < len; ++i) {
+    const auto vid = vid_data[i];
+    CHECK(HasVertex(vid)) << "Invalid vertex: " << vid;
+    rst_data[i] = adjlist_[vid].succ.size();
+  }
+  return rst;
+}
+
+Subgraph Graph::VertexSubgraph(IdArray vids) const {
+  CHECK(IsValidIdArray(vids)) << "Invalid vertex id array.";
+  const auto len = vids->shape[0];
+  std::unordered_map<dgl_id_t, dgl_id_t> oldv2newv;
+  std::vector<dgl_id_t> edges;
+  const int64_t* vid_data = static_cast<int64_t*>(vids->data);
+  for (int64_t i = 0; i < len; ++i) {
+    oldv2newv[vid_data[i]] = i;
+  }
+  Subgraph rst;
+  rst.induced_vertices = vids;
+  rst.graph.AddVertices(len);
+  for (int64_t i = 0; i < len; ++i) {
+    const dgl_id_t oldvid = vid_data[i];
+    const dgl_id_t newvid = i;
+    for (size_t j = 0; j < adjlist_[oldvid].succ.size(); ++j) {
+      const dgl_id_t oldsucc = adjlist_[oldvid].succ[j];
+      if (oldv2newv.count(oldsucc)) {
+        const dgl_id_t newsucc = oldv2newv[oldsucc];
+        edges.push_back(adjlist_[oldvid].edge_id[j]);
+        rst.graph.AddEdge(newvid, newsucc);
+      }
+    }
+  }
+  rst.induced_edges = IdArray::Empty({static_cast<int64_t>(edges.size())}, vids->dtype, vids->ctx);
+  std::copy(edges.begin(), edges.end(), static_cast<int64_t*>(rst.induced_edges->data));
+  return rst;
+}
+
+Subgraph Graph::EdgeSubgraph(IdArray eids) const {
+  CHECK(IsValidIdArray(eids)) << "Invalid vertex id array.";
+
+  const auto len = eids->shape[0];
+  std::unordered_map<dgl_id_t, dgl_id_t> oldv2newv;
+  std::vector<dgl_id_t> nodes;
+  const int64_t* eid_data = static_cast<int64_t*>(eids->data);
+
+  for (int64_t i = 0; i < len; ++i) {
+    dgl_id_t src_id = all_edges_src_[eid_data[i]];
+    dgl_id_t dst_id = all_edges_dst_[eid_data[i]];
+    if (oldv2newv.insert(std::make_pair(src_id, oldv2newv.size())).second)
+      nodes.push_back(src_id);
+    if (oldv2newv.insert(std::make_pair(dst_id, oldv2newv.size())).second)
+      nodes.push_back(dst_id);
+  }
+
+  Subgraph rst;
+  rst.induced_edges = eids;
+  rst.graph.AddVertices(nodes.size());
+
+  for (int64_t i = 0; i < len; ++i) {
+    dgl_id_t src_id = all_edges_src_[eid_data[i]];
+    dgl_id_t dst_id = all_edges_dst_[eid_data[i]];
+    rst.graph.AddEdge(oldv2newv[src_id], oldv2newv[dst_id]);
+  }
+
+  rst.induced_vertices = IdArray::Empty(
+      {static_cast<int64_t>(nodes.size())}, eids->dtype, eids->ctx);
+  std::copy(nodes.begin(), nodes.end(), static_cast<int64_t*>(rst.induced_vertices->data));
+
+  return rst;
+}
+
+Graph Graph::Reverse() const {
+  LOG(FATAL) << "not implemented";
+  return *this;
+}
+
+}  // namespace dgl
--- a/src/graph/graph_apis.cc
+++ b/src/graph/graph_apis.cc
+/*!
+ *  Copyright (c) 2018 by Contributors
+ * \file graph/graph.cc
+ * \brief DGL graph index APIs
+ */
+#include <dgl/graph.h>
+#include <dgl/graph_op.h>
+#include "../c_api_common.h"
+
+using tvm::runtime::TVMArgs;
+using tvm::runtime::TVMArgValue;
+using tvm::runtime::TVMRetValue;
+using tvm::runtime::PackedFunc;
+using tvm::runtime::NDArray;
+
+namespace dgl {
+
+namespace {
+// Convert EdgeArray structure to PackedFunc.
+PackedFunc ConvertEdgeArrayToPackedFunc(const Graph::EdgeArray& ea) {
+  auto body = [ea] (TVMArgs args, TVMRetValue* rv) {
+      int which = args[0];
+      if (which == 0) {
+        *rv = std::move(ea.src);
+      } else if (which == 1) {
+        *rv = std::move(ea.dst);
+      } else if (which == 2) {
+        *rv = std::move(ea.id);
+      } else {
+        LOG(FATAL) << "invalid choice";
+      }
+    };
+  return PackedFunc(body);
+}
+
+// Convert Subgraph structure to PackedFunc.
+PackedFunc ConvertSubgraphToPackedFunc(const Subgraph& sg) {
+  auto body = [sg] (TVMArgs args, TVMRetValue* rv) {
+      int which = args[0];
+      if (which == 0) {
+        Graph* gptr = new Graph();
+        *gptr = std::move(sg.graph);
+        GraphHandle ghandle = gptr;
+        *rv = ghandle;
+      } else if (which == 1) {
+        *rv = std::move(sg.induced_vertices);
+      } else if (which == 2) {
+        *rv = std::move(sg.induced_edges);
+      } else {
+        LOG(FATAL) << "invalid choice";
+      }
+    };
+  return PackedFunc(body);
+}
+
+}  // namespace
+
+TVM_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphCreate")
+.set_body([] (TVMArgs args, TVMRetValue* rv) {
+    bool multigraph = static_cast<bool>(args[0]);
+    GraphHandle ghandle = new Graph(multigraph);
+    *rv = ghandle;
+  });
+
+TVM_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphFree")
+.set_body([] (TVMArgs args, TVMRetValue* rv) {
+    GraphHandle ghandle = args[0];
+    Graph* gptr = static_cast<Graph*>(ghandle);
+    delete gptr;
+  });
+
+TVM_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphAddVertices")
+.set_body([] (TVMArgs args, TVMRetValue* rv) {
+    GraphHandle ghandle = args[0];
+    Graph* gptr = static_cast<Graph*>(ghandle);
+    uint64_t num_vertices = args[1];
+    gptr->AddVertices(num_vertices);
+  });
+
+TVM_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphAddEdge")
+.set_body([] (TVMArgs args, TVMRetValue* rv) {
+    GraphHandle ghandle = args[0];
+    Graph* gptr = static_cast<Graph*>(ghandle);
+    const dgl_id_t src = args[1];
+    const dgl_id_t dst = args[2];
+    gptr->AddEdge(src, dst);
+  });
+
+TVM_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphAddEdges")
+.set_body([] (TVMArgs args, TVMRetValue* rv) {
+    GraphHandle ghandle = args[0];
+    Graph* gptr = static_cast<Graph*>(ghandle);
+    const IdArray src = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1]));
+    const IdArray dst = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[2]));
+    gptr->AddEdges(src, dst);
+  });
+
+TVM_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphClear")
+.set_body([] (TVMArgs args, TVMRetValue* rv) {
+    GraphHandle ghandle = args[0];
+    Graph* gptr = static_cast<Graph*>(ghandle);
+    gptr->Clear();
+  });
+
+TVM_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphIsMultigraph")
+.set_body([] (TVMArgs args, TVMRetValue *rv) {
+    GraphHandle ghandle = args[0];
+    // NOTE: not const since we have caches
+    const Graph* gptr = static_cast<Graph*>(ghandle);
+    *rv = gptr->IsMultigraph();
+  });
+
+TVM_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphNumVertices")
+.set_body([] (TVMArgs args, TVMRetValue* rv) {
+    GraphHandle ghandle = args[0];
+    const Graph* gptr = static_cast<Graph*>(ghandle);
+    *rv = static_cast<int64_t>(gptr->NumVertices());
+  });
+
+TVM_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphNumEdges")
+.set_body([] (TVMArgs args, TVMRetValue* rv) {
+    GraphHandle ghandle = args[0];
+    const Graph* gptr = static_cast<Graph*>(ghandle);
+    *rv = static_cast<int64_t>(gptr->NumEdges());
+  });
+
+TVM_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphHasVertex")
+.set_body([] (TVMArgs args, TVMRetValue* rv) {
+    GraphHandle ghandle = args[0];
+    const Graph* gptr = static_cast<Graph*>(ghandle);
+    const dgl_id_t vid = args[1];
+    *rv = gptr->HasVertex(vid);
+  });
+
+TVM_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphHasVertices")
+.set_body([] (TVMArgs args, TVMRetValue* rv) {
+    GraphHandle ghandle = args[0];
+    const Graph* gptr = static_cast<Graph*>(ghandle);
+    const IdArray vids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1]));
+    *rv = gptr->HasVertices(vids);
+  });
+
+TVM_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphHasEdgeBetween")
+.set_body([] (TVMArgs args, TVMRetValue* rv) {
+    GraphHandle ghandle = args[0];
+    const Graph* gptr = static_cast<Graph*>(ghandle);
+    const dgl_id_t src = args[1];
+    const dgl_id_t dst = args[2];
+    *rv = gptr->HasEdgeBetween(src, dst);
+  });
+
+TVM_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphHasEdgesBetween")
+.set_body([] (TVMArgs args, TVMRetValue* rv) {
+    GraphHandle ghandle = args[0];
+    const Graph* gptr = static_cast<Graph*>(ghandle);
+    const IdArray src = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1]));
+    const IdArray dst = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[2]));
+    *rv = gptr->HasEdgesBetween(src, dst);
+  });
+
+TVM_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphPredecessors")
+.set_body([] (TVMArgs args, TVMRetValue* rv) {
+    GraphHandle ghandle = args[0];
+    const Graph* gptr = static_cast<Graph*>(ghandle);
+    const dgl_id_t vid = args[1];
+    const uint64_t radius = args[2];
+    *rv = gptr->Predecessors(vid, radius);
+  });
+
+TVM_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphSuccessors")
+.set_body([] (TVMArgs args, TVMRetValue* rv) {
+    GraphHandle ghandle = args[0];
+    const Graph* gptr = static_cast<Graph*>(ghandle);
+    const dgl_id_t vid = args[1];
+    const uint64_t radius = args[2];
+    *rv = gptr->Successors(vid, radius);
+  });
+
+TVM_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphEdgeId")
+.set_body([] (TVMArgs args, TVMRetValue* rv) {
+    GraphHandle ghandle = args[0];
+    const Graph* gptr = static_cast<Graph*>(ghandle);
+    const dgl_id_t src = args[1];
+    const dgl_id_t dst = args[2];
+    *rv = gptr->EdgeId(src, dst);
+  });
+
+TVM_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphEdgeIds")
+.set_body([] (TVMArgs args, TVMRetValue* rv) {
+    GraphHandle ghandle = args[0];
+    const Graph* gptr = static_cast<Graph*>(ghandle);
+    const IdArray src = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1]));
+    const IdArray dst = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[2]));
+    *rv = ConvertEdgeArrayToPackedFunc(gptr->EdgeIds(src, dst));
+  });
+
+TVM_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphFindEdges")
+.set_body([] (TVMArgs args, TVMRetValue* rv) {
+    GraphHandle ghandle = args[0];
+    const Graph* gptr = static_cast<Graph*>(ghandle);
+    const IdArray eids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1]));
+    *rv = ConvertEdgeArrayToPackedFunc(gptr->FindEdges(eids));
+  });
+
+TVM_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphInEdges_1")
+.set_body([] (TVMArgs args, TVMRetValue* rv) {
+    GraphHandle ghandle = args[0];
+    const Graph* gptr = static_cast<Graph*>(ghandle);
+    const dgl_id_t vid = args[1];
+    *rv = ConvertEdgeArrayToPackedFunc(gptr->InEdges(vid));
+  });
+
+TVM_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphInEdges_2")
+.set_body([] (TVMArgs args, TVMRetValue* rv) {
+    GraphHandle ghandle = args[0];
+    const Graph* gptr = static_cast<Graph*>(ghandle);
+    const IdArray vids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1]));
+    *rv = ConvertEdgeArrayToPackedFunc(gptr->InEdges(vids));
+  });
+
+TVM_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphOutEdges_1")
+.set_body([] (TVMArgs args, TVMRetValue* rv) {
+    GraphHandle ghandle = args[0];
+    const Graph* gptr = static_cast<Graph*>(ghandle);
+    const dgl_id_t vid = args[1];
+    *rv = ConvertEdgeArrayToPackedFunc(gptr->OutEdges(vid));
+  });
+
+TVM_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphOutEdges_2")
+.set_body([] (TVMArgs args, TVMRetValue* rv) {
+    GraphHandle ghandle = args[0];
+    const Graph* gptr = static_cast<Graph*>(ghandle);
+    const IdArray vids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1]));
+    *rv = ConvertEdgeArrayToPackedFunc(gptr->OutEdges(vids));
+  });
+
+TVM_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphEdges")
+.set_body([] (TVMArgs args, TVMRetValue* rv) {
+    GraphHandle ghandle = args[0];
+    const Graph* gptr = static_cast<Graph*>(ghandle);
+    const bool sorted = args[1];
+    *rv = ConvertEdgeArrayToPackedFunc(gptr->Edges(sorted));
+  });
+
+TVM_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphInDegree")
+.set_body([] (TVMArgs args, TVMRetValue* rv) {
+    GraphHandle ghandle = args[0];
+    const Graph* gptr = static_cast<Graph*>(ghandle);
+    const dgl_id_t vid = args[1];
+    *rv = static_cast<int64_t>(gptr->InDegree(vid));
+  });
+
+TVM_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphInDegrees")
+.set_body([] (TVMArgs args, TVMRetValue* rv) {
+    GraphHandle ghandle = args[0];
+    const Graph* gptr = static_cast<Graph*>(ghandle);
+    const IdArray vids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1]));
+    *rv = gptr->InDegrees(vids);
+  });
+
+TVM_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphOutDegree")
+.set_body([] (TVMArgs args, TVMRetValue* rv) {
+    GraphHandle ghandle = args[0];
+    const Graph* gptr = static_cast<Graph*>(ghandle);
+    const dgl_id_t vid = args[1];
+    *rv = static_cast<int64_t>(gptr->OutDegree(vid));
+  });
+
+TVM_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphOutDegrees")
+.set_body([] (TVMArgs args, TVMRetValue* rv) {
+    GraphHandle ghandle = args[0];
+    const Graph* gptr = static_cast<Graph*>(ghandle);
+    const IdArray vids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1]));
+    *rv = gptr->OutDegrees(vids);
+  });
+
+TVM_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphVertexSubgraph")
+.set_body([] (TVMArgs args, TVMRetValue* rv) {
+    GraphHandle ghandle = args[0];
+    const Graph* gptr = static_cast<Graph*>(ghandle);
+    const IdArray vids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1]));
+    *rv = ConvertSubgraphToPackedFunc(gptr->VertexSubgraph(vids));
+  });
+
+TVM_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphEdgeSubgraph")
+.set_body([] (TVMArgs args, TVMRetValue* rv) {
+    GraphHandle ghandle = args[0];
+    const Graph *gptr = static_cast<Graph*>(ghandle);
+    const IdArray eids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1]));
+    *rv = ConvertSubgraphToPackedFunc(gptr->EdgeSubgraph(eids));
+  });
+
+TVM_REGISTER_GLOBAL("graph_index._CAPI_DGLDisjointUnion")
+.set_body([] (TVMArgs args, TVMRetValue* rv) {
+    void* list = args[0];
+    GraphHandle* inhandles = static_cast<GraphHandle*>(list);
+    int list_size = args[1];
+    std::vector<const Graph*> graphs;
+    for (int i = 0; i < list_size; ++i) {
+      const Graph* gr = static_cast<const Graph*>(inhandles[i]);
+      graphs.push_back(gr);
+    }
+    Graph* gptr = new Graph();
+    *gptr = GraphOp::DisjointUnion(std::move(graphs));
+    GraphHandle ghandle = gptr;
+    *rv = ghandle;
+  });
+
+TVM_REGISTER_GLOBAL("graph_index._CAPI_DGLDisjointPartitionByNum")
+.set_body([] (TVMArgs args, TVMRetValue* rv) {
+    GraphHandle ghandle = args[0];
+    const Graph* gptr = static_cast<Graph*>(ghandle);
+    int64_t num = args[1];
+    std::vector<Graph>&& rst = GraphOp::DisjointPartitionByNum(gptr, num);
+    // return the pointer array as an integer array
+    const int64_t len = rst.size();
+    NDArray ptr_array = NDArray::Empty({len}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
+    int64_t* ptr_array_data = static_cast<int64_t*>(ptr_array->data);
+    for (size_t i = 0; i < rst.size(); ++i) {
+      Graph* ptr = new Graph();
+      *ptr = std::move(rst[i]);
+      ptr_array_data[i] = reinterpret_cast<std::intptr_t>(ptr);
+    }
+    *rv = ptr_array;
+  });
+
+TVM_REGISTER_GLOBAL("graph_index._CAPI_DGLDisjointPartitionBySizes")
+.set_body([] (TVMArgs args, TVMRetValue* rv) {
+    GraphHandle ghandle = args[0];
+    const Graph* gptr = static_cast<Graph*>(ghandle);
+    const IdArray sizes = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1]));
+    std::vector<Graph>&& rst = GraphOp::DisjointPartitionBySizes(gptr, sizes);
+    // return the pointer array as an integer array
+    const int64_t len = rst.size();
+    NDArray ptr_array = NDArray::Empty({len}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
+    int64_t* ptr_array_data = static_cast<int64_t*>(ptr_array->data);
+    for (size_t i = 0; i < rst.size(); ++i) {
+      Graph* ptr = new Graph();
+      *ptr = std::move(rst[i]);
+      ptr_array_data[i] = reinterpret_cast<std::intptr_t>(ptr);
+    }
+    *rv = ptr_array;
+  });
+
+TVM_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphLineGraph")
+.set_body([] (TVMArgs args, TVMRetValue* rv) {
+    GraphHandle ghandle = args[0];
+    bool backtracking = args[1];
+    const Graph* gptr = static_cast<Graph*>(ghandle);
+    Graph* lgptr = new Graph();
+    *lgptr = GraphOp::LineGraph(gptr, backtracking);
+    GraphHandle lghandle = lgptr;
+    *rv = lghandle;
+  });
+}  // namespace dgl
--- a/src/graph/graph_op.cc
+++ b/src/graph/graph_op.cc
+/*!
+ *  Copyright (c) 2018 by Contributors
+ * \file graph/graph.cc
+ * \brief Graph operation implementation
+ */
+#include <dgl/graph_op.h>
+#include <algorithm>
+
+namespace dgl {
+
+Graph GraphOp::LineGraph(const Graph* g, bool backtracking) {
+  typedef std::pair<dgl_id_t, dgl_id_t> entry;
+  typedef std::map<dgl_id_t, std::vector<entry>> csm;  // Compressed Sparse Matrix
+
+  csm adj;
+  std::vector<entry> vec;
+  for (size_t i = 0; i != g->all_edges_src_.size(); ++i) {
+    auto u = g->all_edges_src_[i];
+    auto v = g->all_edges_dst_[i];
+    auto ret = adj.insert(csm::value_type(u, vec));
+    (ret.first)->second.push_back(std::make_pair(v, i));
+  }
+
+  std::vector<dgl_id_t> lg_src, lg_dst;
+  for (size_t i = 0; i != g->all_edges_src_.size(); ++i) {
+    auto u = g->all_edges_src_[i];
+    auto v = g->all_edges_dst_[i];
+    auto j = adj.find(v);
+    if (j != adj.end()) {
+      for (size_t k = 0; k != j->second.size(); ++k) {
+        if (backtracking || (!backtracking && j->second[k].first != u)) {
+          lg_src.push_back(i);
+          lg_dst.push_back(j->second[k].second);
+        }
+      }
+    }
+  }
+
+  const int64_t len = lg_src.size();
+  IdArray src = IdArray::Empty({len}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
+  IdArray dst = IdArray::Empty({len}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
+  int64_t* src_ptr = static_cast<int64_t*>(src->data);
+  int64_t* dst_ptr = static_cast<int64_t*>(dst->data);
+  std::copy(lg_src.begin(), lg_src.end(), src_ptr);
+  std::copy(lg_dst.begin(), lg_dst.end(), dst_ptr);
+
+  Graph lg;
+  lg.AddVertices(g->NumEdges());
+  lg.AddEdges(src, dst);
+  return lg;
+}
+
+Graph GraphOp::DisjointUnion(std::vector<const Graph*> graphs) {
+  Graph rst;
+  uint64_t cumsum = 0;
+  for (const Graph* gr : graphs) {
+    rst.AddVertices(gr->NumVertices());
+    for (uint64_t i = 0; i < gr->NumEdges(); ++i) {
+      rst.AddEdge(gr->all_edges_src_[i] + cumsum, gr->all_edges_dst_[i] + cumsum);
+    }
+    cumsum += gr->NumVertices();
+  }
+  return rst;
+}
+
+std::vector<Graph> GraphOp::DisjointPartitionByNum(const Graph* graph, int64_t num) {
+  CHECK(num != 0 && graph->NumVertices() % num == 0)
+    << "Number of partitions must evenly divide the number of nodes.";
+  IdArray sizes = IdArray::Empty({num}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
+  int64_t* sizes_data = static_cast<int64_t*>(sizes->data);
+  std::fill(sizes_data, sizes_data + num, graph->NumVertices() / num);
+  return DisjointPartitionBySizes(graph, sizes);
+}
+
+std::vector<Graph> GraphOp::DisjointPartitionBySizes(const Graph* graph, IdArray sizes) {
+  const int64_t len = sizes->shape[0];
+  const int64_t* sizes_data = static_cast<int64_t*>(sizes->data);
+  std::vector<int64_t> cumsum;
+  cumsum.push_back(0);
+  for (int64_t i = 0; i < len; ++i) {
+    cumsum.push_back(cumsum[i] + sizes_data[i]);
+  }
+  CHECK_EQ(cumsum[len], graph->NumVertices())
+    << "Sum of the given sizes must equal to the number of nodes.";
+  dgl_id_t node_offset = 0, edge_offset = 0;
+  std::vector<Graph> rst(len);
+  for (int64_t i = 0; i < len; ++i) {
+    // copy adj
+    rst[i].adjlist_.insert(rst[i].adjlist_.end(),
+        graph->adjlist_.begin() + node_offset,
+        graph->adjlist_.begin() + node_offset + sizes_data[i]);
+    rst[i].reverse_adjlist_.insert(rst[i].reverse_adjlist_.end(),
+        graph->reverse_adjlist_.begin() + node_offset,
+        graph->reverse_adjlist_.begin() + node_offset + sizes_data[i]);
+    // relabel adjs
+    size_t num_edges = 0;
+    for (auto& elist : rst[i].adjlist_) {
+      for (size_t j = 0; j < elist.succ.size(); ++j) {
+        elist.succ[j] -= node_offset;
+        elist.edge_id[j] -= edge_offset;
+      }
+      num_edges += elist.succ.size();
+    }
+    for (auto& elist : rst[i].reverse_adjlist_) {
+      for (size_t j = 0; j < elist.succ.size(); ++j) {
+        elist.succ[j] -= node_offset;
+        elist.edge_id[j] -= edge_offset;
+      }
+    }
+    // copy edges
+    rst[i].all_edges_src_.reserve(num_edges);
+    rst[i].all_edges_dst_.reserve(num_edges);
+    rst[i].num_edges_ = num_edges;
+    for (size_t j = edge_offset; j < edge_offset + num_edges; ++j) {
+      rst[i].all_edges_src_.push_back(graph->all_edges_src_[j] - node_offset);
+      rst[i].all_edges_dst_.push_back(graph->all_edges_dst_[j] - node_offset);
+    }
+    // update offset
+    CHECK_EQ(rst[i].NumVertices(), sizes_data[i]);
+    CHECK_EQ(rst[i].NumEdges(), num_edges);
+    node_offset += sizes_data[i];
+    edge_offset += num_edges;
+  }
+  return rst;
+}
+
+}  // namespace dgl
--- a/src/runtime/c_runtime_api.cc
+++ b/src/runtime/c_runtime_api.cc
+/*!
+ *  Copyright (c) 2016 by Contributors
+ * \file c_runtime_api.cc
+ * \brief Runtime API implementation
+ */
+#include <dmlc/thread_local.h>
+#include <dgl/runtime/c_runtime_api.h>
+#include <dgl/runtime/c_backend_api.h>
+#include <dgl/runtime/packed_func.h>
+#include <dgl/runtime/module.h>
+#include <dgl/runtime/registry.h>
+#include <dgl/runtime/device_api.h>
+#include <array>
+#include <algorithm>
+#include <string>
+#include <cstdlib>
+#include "runtime_base.h"
+
+namespace tvm {
+namespace runtime {
+
+/*!
+ * \brief The name of Device API factory.
+ * \param type The device type.
+ */
+inline std::string DeviceName(int type) {
+  switch (type) {
+    case kDLCPU: return "cpu";
+    case kDLGPU: return "gpu";
+    case kDLOpenCL: return "opencl";
+    case kDLSDAccel: return "sdaccel";
+    case kDLAOCL: return "aocl";
+    case kDLVulkan: return "vulkan";
+    case kDLMetal: return "metal";
+    case kDLVPI: return "vpi";
+    case kDLROCM: return "rocm";
+    case kOpenGL: return "opengl";
+    case kExtDev: return "ext_dev";
+    default: LOG(FATAL) << "unknown type =" << type; return "Unknown";
+  }
+}
+
+class DeviceAPIManager {
+ public:
+  static const int kMaxDeviceAPI = 32;
+  // Get API
+  static DeviceAPI* Get(const TVMContext& ctx) {
+    return Get(ctx.device_type);
+  }
+  static DeviceAPI* Get(int dev_type, bool allow_missing = false) {
+    return Global()->GetAPI(dev_type, allow_missing);
+  }
+
+ private:
+  std::array<DeviceAPI*, kMaxDeviceAPI> api_;
+  DeviceAPI* rpc_api_{nullptr};
+  std::mutex mutex_;
+  // constructor
+  DeviceAPIManager() {
+    std::fill(api_.begin(), api_.end(), nullptr);
+  }
+  // Global static variable.
+  static DeviceAPIManager* Global() {
+    static DeviceAPIManager inst;
+    return &inst;
+  }
+  // Get or initialize API.
+  DeviceAPI* GetAPI(int type, bool allow_missing) {
+    if (type < kRPCSessMask) {
+      if (api_[type] != nullptr) return api_[type];
+      std::lock_guard<std::mutex> lock(mutex_);
+      if (api_[type] != nullptr) return api_[type];
+      api_[type] = GetAPI(DeviceName(type), allow_missing);
+      return api_[type];
+    } else {
+      if (rpc_api_ != nullptr) return rpc_api_;
+      std::lock_guard<std::mutex> lock(mutex_);
+      if (rpc_api_ != nullptr) return rpc_api_;
+      rpc_api_ = GetAPI("rpc", allow_missing);
+      return rpc_api_;
+    }
+  }
+  DeviceAPI* GetAPI(const std::string name, bool allow_missing) {
+    std::string factory = "device_api." + name;
+    auto* f = Registry::Get(factory);
+    if (f == nullptr) {
+      CHECK(allow_missing)
+          << "Device API " << name << " is not enabled.";
+      return nullptr;
+    }
+    void* ptr = (*f)();
+    return static_cast<DeviceAPI*>(ptr);
+  }
+};
+
+DeviceAPI* DeviceAPI::Get(TVMContext ctx, bool allow_missing) {
+  return DeviceAPIManager::Get(
+      static_cast<int>(ctx.device_type), allow_missing);
+}
+
+void* DeviceAPI::AllocWorkspace(TVMContext ctx,
+                                size_t size,
+                                TVMType type_hint) {
+  return AllocDataSpace(ctx, size, kTempAllocaAlignment, type_hint);
+}
+
+void DeviceAPI::FreeWorkspace(TVMContext ctx, void* ptr) {
+  FreeDataSpace(ctx, ptr);
+}
+
+TVMStreamHandle DeviceAPI::CreateStream(TVMContext ctx) {
+  LOG(FATAL) << "Device does not support stream api.";
+  return 0;
+}
+
+void DeviceAPI::FreeStream(TVMContext ctx, TVMStreamHandle stream) {
+  LOG(FATAL) << "Device does not support stream api.";
+}
+
+void DeviceAPI::SyncStreamFromTo(TVMContext ctx,
+                                 TVMStreamHandle event_src,
+                                 TVMStreamHandle event_dst) {
+  LOG(FATAL) << "Device does not support stream api.";
+}
+}  // namespace runtime
+}  // namespace tvm
+
+using namespace tvm::runtime;
+
+struct TVMRuntimeEntry {
+  std::string ret_str;
+  std::string last_error;
+  TVMByteArray ret_bytes;
+};
+
+typedef dmlc::ThreadLocalStore<TVMRuntimeEntry> TVMAPIRuntimeStore;
+
+const char *TVMGetLastError() {
+  return TVMAPIRuntimeStore::Get()->last_error.c_str();
+}
+
+void TVMAPISetLastError(const char* msg) {
+#ifndef _LIBCPP_SGX_CONFIG
+  TVMAPIRuntimeStore::Get()->last_error = msg;
+#else
+  sgx::OCallPackedFunc("__sgx_set_last_error__", msg);
+#endif
+}
+
+int TVMModLoadFromFile(const char* file_name,
+                       const char* format,
+                       TVMModuleHandle* out) {
+  API_BEGIN();
+  Module m = Module::LoadFromFile(file_name, format);
+  *out = new Module(m);
+  API_END();
+}
+
+int TVMModImport(TVMModuleHandle mod,
+                 TVMModuleHandle dep) {
+  API_BEGIN();
+  static_cast<Module*>(mod)->Import(
+      *static_cast<Module*>(dep));
+  API_END();
+}
+
+int TVMModGetFunction(TVMModuleHandle mod,
+                      const char* func_name,
+                      int query_imports,
+                      TVMFunctionHandle *func) {
+  API_BEGIN();
+  PackedFunc pf = static_cast<Module*>(mod)->GetFunction(
+      func_name, query_imports != 0);
+  if (pf != nullptr) {
+    *func = new PackedFunc(pf);
+  } else {
+    *func = nullptr;
+  }
+  API_END();
+}
+
+int TVMModFree(TVMModuleHandle mod) {
+  API_BEGIN();
+  delete static_cast<Module*>(mod);
+  API_END();
+}
+
+int TVMBackendGetFuncFromEnv(void* mod_node,
+                             const char* func_name,
+                             TVMFunctionHandle *func) {
+  API_BEGIN();
+  *func = (TVMFunctionHandle)(
+      static_cast<ModuleNode*>(mod_node)->GetFuncFromEnv(func_name));
+  API_END();
+}
+
+void* TVMBackendAllocWorkspace(int device_type,
+                               int device_id,
+                               uint64_t size,
+                               int dtype_code_hint,
+                               int dtype_bits_hint) {
+  TVMContext ctx;
+  ctx.device_type = static_cast<DLDeviceType>(device_type);
+  ctx.device_id = device_id;
+
+  TVMType type_hint;
+  type_hint.code = static_cast<decltype(type_hint.code)>(dtype_code_hint);
+  type_hint.bits = static_cast<decltype(type_hint.bits)>(dtype_bits_hint);
+  type_hint.lanes = 1;
+
+  return DeviceAPIManager::Get(ctx)->AllocWorkspace(ctx,
+                                                    static_cast<size_t>(size),
+                                                    type_hint);
+}
+
+int TVMBackendFreeWorkspace(int device_type,
+                            int device_id,
+                            void* ptr) {
+  TVMContext ctx;
+  ctx.device_type = static_cast<DLDeviceType>(device_type);
+  ctx.device_id = device_id;
+  DeviceAPIManager::Get(ctx)->FreeWorkspace(ctx, ptr);
+  return 0;
+}
+
+int TVMBackendRunOnce(void** handle,
+                      int (*f)(void*),
+                      void* cdata,
+                      int nbytes) {
+  if (*handle == nullptr) {
+    *handle = reinterpret_cast<void*>(1);
+    return (*f)(cdata);
+  }
+  return 0;
+}
+
+int TVMFuncFree(TVMFunctionHandle func) {
+  API_BEGIN();
+  delete static_cast<PackedFunc*>(func);
+  API_END();
+}
+
+int TVMFuncCall(TVMFunctionHandle func,
+                TVMValue* args,
+                int* arg_type_codes,
+                int num_args,
+                TVMValue* ret_val,
+                int* ret_type_code) {
+  API_BEGIN();
+  TVMRetValue rv;
+  (*static_cast<const PackedFunc*>(func)).CallPacked(
+      TVMArgs(args, arg_type_codes, num_args), &rv);
+  // handle return string.
+  if (rv.type_code() == kStr ||
+     rv.type_code() == kTVMType ||
+      rv.type_code() == kBytes) {
+    TVMRuntimeEntry* e = TVMAPIRuntimeStore::Get();
+    if (rv.type_code() != kTVMType) {
+      e->ret_str = *rv.ptr<std::string>();
+    } else {
+      e->ret_str = rv.operator std::string();
+    }
+    if (rv.type_code() == kBytes) {
+      e->ret_bytes.data = e->ret_str.c_str();
+      e->ret_bytes.size = e->ret_str.length();
+      *ret_type_code = kBytes;
+      ret_val->v_handle = &(e->ret_bytes);
+    } else {
+      *ret_type_code = kStr;
+      ret_val->v_str = e->ret_str.c_str();
+    }
+  } else {
+    rv.MoveToCHost(ret_val, ret_type_code);
+  }
+  API_END();
+}
+
+int TVMCFuncSetReturn(TVMRetValueHandle ret,
+                      TVMValue* value,
+                      int* type_code,
+                      int num_ret) {
+  API_BEGIN();
+  CHECK_EQ(num_ret, 1);
+  TVMRetValue* rv = static_cast<TVMRetValue*>(ret);
+  *rv = TVMArgValue(value[0], type_code[0]);
+  API_END();
+}
+
+int TVMFuncCreateFromCFunc(TVMPackedCFunc func,
+                           void* resource_handle,
+                           TVMPackedCFuncFinalizer fin,
+                           TVMFunctionHandle *out) {
+  API_BEGIN();
+  if (fin == nullptr) {
+    *out = new PackedFunc(
+        [func, resource_handle](TVMArgs args, TVMRetValue* rv) {
+          int ret = func((TVMValue*)args.values, (int*)args.type_codes, // NOLINT(*)
+                         args.num_args, rv, resource_handle);
+          if (ret != 0) {
+            std::string err = "TVMCall CFunc Error:\n";
+            err += TVMGetLastError();
+            throw dmlc::Error(err);
+          }
+        });
+  } else {
+    // wrap it in a shared_ptr, with fin as deleter.
+    // so fin will be called when the lambda went out of scope.
+    std::shared_ptr<void> rpack(resource_handle, fin);
+    *out = new PackedFunc(
+        [func, rpack](TVMArgs args, TVMRetValue* rv) {
+          int ret = func((TVMValue*)args.values, (int*)args.type_codes, // NOLINT(*)
+                         args.num_args, rv, rpack.get());
+          if (ret != 0) {
+            std::string err = "TVMCall CFunc Error:\n";
+            err += TVMGetLastError();
+            throw dmlc::Error(err);
+          }
+      });
+  }
+  API_END();
+}
+
+int TVMStreamCreate(int device_type, int device_id, TVMStreamHandle* out) {
+  API_BEGIN();
+  TVMContext ctx;
+  ctx.device_type = static_cast<DLDeviceType>(device_type);
+  ctx.device_id = device_id;
+  *out = DeviceAPIManager::Get(ctx)->CreateStream(ctx);
+  API_END();
+}
+
+int TVMStreamFree(int device_type, int device_id, TVMStreamHandle stream) {
+  API_BEGIN();
+  TVMContext ctx;
+  ctx.device_type = static_cast<DLDeviceType>(device_type);
+  ctx.device_id = device_id;
+  DeviceAPIManager::Get(ctx)->FreeStream(ctx, stream);
+  API_END();
+}
+
+int TVMSetStream(int device_type, int device_id, TVMStreamHandle stream) {
+  API_BEGIN();
+  TVMContext ctx;
+  ctx.device_type = static_cast<DLDeviceType>(device_type);
+  ctx.device_id = device_id;
+  DeviceAPIManager::Get(ctx)->SetStream(ctx, stream);
+  API_END();
+}
+
+int TVMSynchronize(int device_type, int device_id, TVMStreamHandle stream) {
+  API_BEGIN();
+  TVMContext ctx;
+  ctx.device_type = static_cast<DLDeviceType>(device_type);
+  ctx.device_id = device_id;
+  DeviceAPIManager::Get(ctx)->StreamSync(ctx, stream);
+  API_END();
+}
+
+int TVMStreamStreamSynchronize(int device_type,
+                               int device_id,
+                               TVMStreamHandle src,
+                               TVMStreamHandle dst) {
+  API_BEGIN();
+  TVMContext ctx;
+  ctx.device_type = static_cast<DLDeviceType>(device_type);
+  ctx.device_id = device_id;
+  DeviceAPIManager::Get(ctx)->SyncStreamFromTo(ctx, src, dst);
+  API_END();
+}
+
+int TVMCbArgToReturn(TVMValue* value, int code) {
+  API_BEGIN();
+  tvm::runtime::TVMRetValue rv;
+  rv = tvm::runtime::TVMArgValue(*value, code);
+  int tcode;
+  rv.MoveToCHost(value, &tcode);
+  CHECK_EQ(tcode, code);
+  API_END();
+}
+
+// set device api
+TVM_REGISTER_GLOBAL(tvm::runtime::symbol::tvm_set_device)
+.set_body([](TVMArgs args, TVMRetValue *ret) {
+    TVMContext ctx;
+    ctx.device_type = static_cast<DLDeviceType>(args[0].operator int());
+    ctx.device_id = args[1];
+    DeviceAPIManager::Get(ctx)->SetDevice(ctx);
+  });
+
+// set device api
+TVM_REGISTER_GLOBAL("_GetDeviceAttr")
+.set_body([](TVMArgs args, TVMRetValue *ret) {
+    TVMContext ctx;
+    ctx.device_type = static_cast<DLDeviceType>(args[0].operator int());
+    ctx.device_id = args[1];
+
+    DeviceAttrKind kind = static_cast<DeviceAttrKind>(args[2].operator int());
+    if (kind == kExist) {
+      DeviceAPI* api = DeviceAPIManager::Get(ctx.device_type, true);
+      if (api != nullptr) {
+        api->GetAttr(ctx, kind, ret);
+      } else {
+        *ret = 0;
+      }
+    } else {
+      DeviceAPIManager::Get(ctx)->GetAttr(ctx, kind, ret);
+    }
+  });