[GraphIndex] Immutable graph index using MXNet CSRArray (#86)

* add immutable graph index. * update ImmutableGraphIndex. * add benchmarks of subgraph generation. * fix node_subgraphs. * use scipy synthetic data in immutable graph. * create immutable_graph. * use spmv in gcn. * fix immutable graph index. * update graph index. * create ImmutableSubgraphIndex. * return subgraph node and edge mapping. * fix benchmark. * add mapping to subgraph nid. * test in_edges and out_edges. * update benchmark. * Fix immutable subgraph. * complete in_edges and out_edges. * fix benchmarks. * add test on node_subgraphs * add sse. * construct subgraphs in parallel. * add in/out degree(s). * make immutable graph index backend-specific. * implement has_edge(s) and edge_id(s). * Revert "use spmv in gcn." This reverts commit 9cfed5f5fb3dd2ed9b98745348b1c0e9731ed7f7. * implement node_subgraphs in GraphIndex. * fix sse. * address comments. * address comments. * Update mxnet docker.

[GraphIndex] Immutable graph index using MXNet CSRArray (#86)
* add immutable graph index. * update ImmutableGraphIndex. * add benchmarks of subgraph generation. * fix node_subgraphs. * use scipy synthetic data in immutable graph. * create immutable_graph. * use spmv in gcn. * fix immutable graph index. * update graph index. * create ImmutableSubgraphIndex. * return subgraph node and edge mapping. * fix benchmark. * add mapping to subgraph nid. * test in_edges and out_edges. * update benchmark. * Fix immutable subgraph. * complete in_edges and out_edges. * fix benchmarks. * add test on node_subgraphs * add sse. * construct subgraphs in parallel. * add in/out degree(s). * make immutable graph index backend-specific. * implement has_edge(s) and edge_id(s). * Revert "use spmv in gcn." This reverts commit 9cfed5f5fb3dd2ed9b98745348b1c0e9731ed7f7. * implement node_subgraphs in GraphIndex. * fix sse. * address comments. * address comments. * Update mxnet docker.
9e9a9488 · Da Zheng · GitHub · 0d6cd30b · 9e9a9488 · 9e9a9488
Unverified Commit 9e9a9488 authored Oct 26, 2018 by Da Zheng Committed by GitHub Oct 26, 2018
12 changed files
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -139,7 +139,7 @@ pipeline {
                stage('CPU') {
                    agent {
                        docker {
-                            image 'zhengda1936/dgl-mxnet-cpu:v2'
+                            image 'zhengda1936/dgl-mxnet-cpu:v3'
                        }
                    }
                    stages {

--- a/examples/mxnet/sse/sse_batch.py
+++ b/examples/mxnet/sse/sse_batch.py
+"""
+Learning Steady-States of Iterative Algorithms over Graphs
+Paper: http://proceedings.mlr.press/v80/dai18a.html
+
+"""
+import argparse
+import numpy as np
+import time
+import mxnet as mx
+from mxnet import gluon
+import dgl
+import dgl.function as fn
+from dgl import DGLGraph, utils
+from dgl.data import register_data_args, load_data
+
+def gcn_msg(src, edge):
+    # TODO should we use concat?
+    return {'m': mx.nd.concat(src['in'], src['h'], dim=1)}
+
+def gcn_reduce(node, msgs):
+    return {'accum': mx.nd.sum(msgs['m'], 1)}
+
+class NodeUpdate(gluon.Block):
+    def __init__(self, out_feats, activation=None, alpha=0.9):
+        super(NodeUpdate, self).__init__()
+        self.linear1 = gluon.nn.Dense(out_feats, activation=activation)
+        # TODO what is the dimension here?
+        self.linear2 = gluon.nn.Dense(out_feats)
+        self.alpha = alpha
+
+    def forward(self, node):
+        tmp = mx.nd.concat(node['in'], node['accum'], dim=1)
+        hidden = self.linear2(self.linear1(tmp))
+        return {'h': node['h'] * (1 - self.alpha) + self.alpha * hidden}
+
+class SSEUpdateHidden(gluon.Block):
+    def __init__(self,
+                 n_hidden,
+                 activation,
+                 dropout,
+                 use_spmv):
+        super(SSEUpdateHidden, self).__init__()
+        self.layer = NodeUpdate(n_hidden, activation)
+        self.dropout = dropout
+        self.use_spmv = use_spmv
+
+    def forward(self, g, vertices):
+        if self.use_spmv:
+            feat = g.get_n_repr()['in']
+            h = g.get_n_repr()['h']
+            g.set_n_repr({'cat': mx.nd.concat(feat, h, dim=1)})
+
+            msg_func = fn.copy_src(src='cat', out='tmp')
+            reduce_func = fn.sum(msg='tmp', out='accum')
+        else:
+            msg_func = gcn_msg
+            reduce_func = gcn_reduce
+        if vertices is None:
+            g.update_all(msg_func, reduce_func, self.layer)
+            ret = g.get_n_repr()['h']
+        else:
+            # We don't need dropout for inference.
+            if self.dropout:
+                # TODO here we apply dropout on all vertex representation.
+                val = mx.nd.Dropout(g.get_n_repr()['h'], p=self.dropout)
+                g.set_n_repr({'h': val})
+            g.pull(vertices, msg_func, reduce_func, self.layer)
+            ctx = g.get_n_repr()['h'].context
+            ret = mx.nd.take(g.get_n_repr()['h'], vertices.tousertensor().as_in_context(ctx))
+        return ret
+
+class SSEPredict(gluon.Block):
+    def __init__(self, update_hidden, out_feats, dropout):
+        super(SSEPredict, self).__init__()
+        self.linear1 = gluon.nn.Dense(out_feats, activation='relu')
+        self.linear2 = gluon.nn.Dense(out_feats)
+        self.update_hidden = update_hidden
+        self.dropout = dropout
+
+    def forward(self, g, vertices):
+        hidden = self.update_hidden(g, vertices)
+        if self.dropout:
+            hidden = mx.nd.Dropout(hidden, p=self.dropout)
+        return self.linear2(self.linear1(hidden))
+
+def subgraph_gen(g, seed_vertices):
+    vertices = []
+    for seed in seed_vertices:
+        src, _, _ = g.in_edges(seed)
+        vs = np.concatenate((src.asnumpy(), seed.asnumpy()), axis=0)
+        vs = mx.nd.array(np.unique(vs), dtype=np.int64)
+        vertices.append(vs)
+    subgs = g.subgraphs(vertices)
+    nids = []
+    for i, subg in enumerate(subgs):
+        subg.copy_from_parent()
+        nids.append(subg.map_to_subgraph_nid(utils.toindex(seed_vertices[i])))
+    return subgs, nids
+
+def main(args, data):
+    features = mx.nd.array(data.features)
+    labels = mx.nd.array(data.labels)
+    train_size = len(labels) * args.train_percent
+    train_vs = np.arange(train_size, dtype='int64')
+    eval_vs = np.arange(train_size, len(labels), dtype='int64')
+    print("train size: " + str(len(train_vs)))
+    print("eval size: " + str(len(eval_vs)))
+    train_labels = mx.nd.array(data.labels[train_vs])
+    eval_labels = mx.nd.array(data.labels[eval_vs])
+    in_feats = features.shape[1]
+    n_classes = data.num_labels
+    n_edges = data.graph.number_of_edges()
+
+    if args.gpu <= 0:
+        cuda = False
+        ctx = mx.cpu(0)
+    else:
+        cuda = True
+        features = features.as_in_context(mx.gpu(0))
+        train_labels = train_labels.as_in_context(mx.gpu(0))
+        eval_labels = eval_labels.as_in_context(mx.gpu(0))
+        ctx = mx.gpu(0)
+
+    # create the SSE model
+    try:
+        graph = data.graph.get_graph()
+    except AttributeError:
+        graph = data.graph
+    g = DGLGraph(graph, readonly=True)
+    g.set_n_repr({'in': features, 'h': mx.nd.random.normal(shape=(g.number_of_nodes(), args.n_hidden),
+        ctx=ctx)})
+
+    update_hidden = SSEUpdateHidden(args.n_hidden, 'relu', args.update_dropout, args.use_spmv)
+    model = SSEPredict(update_hidden, args.n_hidden, args.predict_dropout)
+    model.initialize(ctx=ctx)
+
+    # use optimizer
+    num_batches = int(g.number_of_nodes() / args.batch_size)
+    scheduler = mx.lr_scheduler.CosineScheduler(args.n_epochs * num_batches,
+            args.lr * 10, 0, 0, args.lr/5)
+    trainer = gluon.Trainer(model.collect_params(), 'adam', {'learning_rate': args.lr,
+        'lr_scheduler': scheduler})
+
+    # initialize graph
+    dur = []
+    for epoch in range(args.n_epochs):
+        # compute vertex embedding.
+        update_hidden(g, None)
+
+        t0 = time.time()
+        permute = np.random.permutation(len(train_vs))
+        randv = train_vs[permute]
+        rand_labels = train_labels[permute]
+        data_iter = mx.io.NDArrayIter(data=mx.nd.array(randv, dtype='int64'), label=rand_labels,
+                                      batch_size=args.batch_size)
+        train_loss = 0
+        data = []
+        labels = []
+        for batch in data_iter:
+            data.append(batch.data[0])
+            labels.append(batch.label[0])
+            if len(data) < args.num_parallel_subgraphs:
+                continue
+
+            subgs, seed_ids = subgraph_gen(g, data)
+            for subg, seed_id, label, d in zip(subgs, seed_ids, labels, data):
+                with mx.autograd.record():
+                    logits = model(subg, seed_id)
+                    loss = mx.nd.softmax_cross_entropy(logits, label)
+                loss.backward()
+                trainer.step(d.shape[0])
+                train_loss += loss.asnumpy()[0]
+            data = []
+            labels = []
+
+        #logits = model(eval_vs)
+        #eval_loss = mx.nd.softmax_cross_entropy(logits, eval_labels)
+        #eval_loss = eval_loss.asnumpy()[0]
+        eval_loss = 0
+
+        dur.append(time.time() - t0)
+        print("Epoch {:05d} | Train Loss {:.4f} | Eval Loss {:.4f} | Time(s) {:.4f} | ETputs(KTEPS) {:.2f}".format(
+            epoch, train_loss, eval_loss, np.mean(dur), n_edges / np.mean(dur) / 1000))
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='GCN')
+    register_data_args(parser)
+    parser.add_argument("--gpu", type=int, default=-1,
+            help="gpu")
+    parser.add_argument("--lr", type=float, default=1e-3,
+            help="learning rate")
+    parser.add_argument("--batch-size", type=int, default=128,
+            help="number of vertices in a batch")
+    parser.add_argument("--n-epochs", type=int, default=20,
+            help="number of training epochs")
+    parser.add_argument("--n-hidden", type=int, default=16,
+            help="number of hidden gcn units")
+    parser.add_argument("--warmup", type=int, default=10,
+            help="number of iterations to warm up with large learning rate")
+    parser.add_argument("--update-dropout", type=float, default=0.5,
+            help="the dropout rate for updating vertex embedding")
+    parser.add_argument("--predict-dropout", type=float, default=0.5,
+            help="the dropout rate for prediction")
+    parser.add_argument("--train_percent", type=float, default=0.5,
+            help="the percentage of data used for training")
+    parser.add_argument("--use-spmv", type=bool, default=False,
+            help="use SpMV for faster speed.")
+    parser.add_argument("--num-parallel-subgraphs", type=int, default=1,
+            help="the number of subgraphs to construct in parallel.")
+    args = parser.parse_args()
+
+    # load and preprocess dataset
+    data = load_data(args)
+    main(args, data)
--- a/include/dgl/graph_op.h
+++ b/include/dgl/graph_op.h
@@ -64,6 +64,33 @@ class GraphOp {
   * \return a list of partitioned graphs
   */
  static std::vector<Graph> DisjointPartitionBySizes(const Graph* graph, IdArray sizes);
+
+  /*!
+   * \brief Map vids in the parent graph to the vids in the subgraph.
+   *
+   * \param parent_vid_map An array that maps the vids in the parent graph to the
+   * subgraph. The elements store the vertex Ids in the parent graph, and the
+   * indices indicate the vertex Ids in the subgraph.
+   * \param query The vertex Ids in the parent graph.
+   * \return an Id array that contains the subgraph node Ids.
+   */
+  static IdArray MapParentIdToSubgraphId(IdArray parent_vid_map, IdArray query);
+
+  /*!
+   * \brief Expand an Id array based on the offset array.
+   *
+   * For example,
+   * ids:     [0, 1, 2, 3, 4],
+   * offset:  [0, 2, 2, 5, 6, 7],
+   * result:  [0, 0, 2, 2, 2, 3, 4].
+   * The offset array has one more element than the ids array.
+   * (offset[i], offset[i+1]) shows the location of ids[i] in the result array.
+   *
+   * \param ids An array that contains the node or edge Ids.
+   * \param offset An array that contains the offset after expansion.
+   * \return a expanded Id array.
+   */
+  static IdArray ExpandIds(IdArray ids, IdArray offset);
 };

 }  // namespace dgl

--- a/python/dgl/backend/__init__.py
+++ b/python/dgl/backend/__init__.py
@@ -5,9 +5,12 @@ import os
 __backend__ = os.environ.get('DGLBACKEND', 'pytorch').lower()
 if __backend__ == 'numpy':
    from .numpy import *
+    create_immutable_graph_index=None
 elif __backend__ == 'pytorch':
    from .pytorch import *
+    create_immutable_graph_index=None
 elif __backend__ == 'mxnet':
    from .mxnet import *
+    from .mxnet_immutable_graph_index import create_immutable_graph_index
 else:
    raise Exception("Unsupported backend %s" % __backend__)
--- a/python/dgl/backend/mxnet_immutable_graph_index.py
+++ b/python/dgl/backend/mxnet_immutable_graph_index.py
+from __future__ import absolute_import
+
+import ctypes
+import numpy as np
+import networkx as nx
+import scipy.sparse as sp
+import mxnet as mx
+
+from .mxnet import to_context
+
+class ImmutableGraphIndex(object):
+    """Backend-specific graph index object on immutable graphs.
+    We can use a CSR matrix to represent a graph structure. For functionality,
+    one CSR matrix is sufficient. However, for efficient access
+    to in-edges and out-edges of a directed graph, we need to use two CSR matrices.
+    In these CSR matrices, both rows and columns represents vertices. In one CSR
+    matrix, a row stores in-edges of a vertex (whose source vertex is a neighbor
+    and destination vertex is the vertex itself). Thus, a non-zero entry is
+    the neighbor Id and the value is the corresponding edge Id.
+    The other CSR matrix stores the out-edges in the same fashion.
+
+    Parameters
+    ----------
+    in_csr : a csr array that stores in-edges.
+        MXNet CSRArray
+    out_csr : a csr array that stores out-edges.
+        MXNet CSRArray
+    """
+    def __init__(self, in_csr, out_csr):
+        self._in_csr = in_csr
+        self._out_csr = out_csr
+
+    def number_of_nodes(self):
+        """Return the number of nodes.
+
+        Returns
+        -------
+        int
+            The number of nodes
+        """
+        return len(self._in_csr)
+
+    def number_of_edges(self):
+        """Return the number of edges.
+
+        Returns
+        -------
+        int
+            The number of edges
+        """
+        return self._in_csr.indices.shape[0]
+
+    def has_edges(self, u, v):
+        """Return true if the edge exists.
+
+        Parameters
+        ----------
+        u : NDArray
+            The src nodes.
+        v : NDArray
+            The dst nodes.
+
+        Returns
+        -------
+        NDArray
+            0-1 array indicating existence
+        """
+        ids = mx.nd.contrib.edge_id(self._in_csr, v, u)
+        return ids >= 0
+
+    def edge_ids(self, u, v):
+        """Return the edge ids.
+
+        Parameters
+        ----------
+        u : NDArray
+            The src nodes.
+        v : NDArray
+            The dst nodes.
+
+        Returns
+        -------
+        NDArray
+            Teh edge id array.
+        """
+        ids = mx.nd.contrib.edge_id(self._in_csr, v, u)
+        ids = ids.asnumpy()
+        return ids[ids >= 0]
+
+    def predecessors(self, v, radius=1):
+        """Return the predecessors of the node.
+
+        Parameters
+        ----------
+        v : int
+            The node.
+        radius : int, optional
+            The radius of the neighborhood.
+
+        Returns
+        -------
+        NDArray
+            Array of predecessors
+        """
+        if radius > 1:
+            raise Exception('Immutable graph doesn\'t support predecessors with radius > 1 for now.')
+        return self._in_csr[v].indices
+
+    def successors(self, v, radius=1):
+        """Return the successors of the node.
+
+        Parameters
+        ----------
+        v : int
+            The node.
+        radius : int, optional
+            The radius of the neighborhood.
+
+        Returns
+        -------
+        NDArray
+            Array of successors
+        """
+        if radius > 1:
+            raise Exception('Immutable graph doesn\'t support successors with radius > 1 for now.')
+        return self._out_csr[v].indices
+
+    def in_edges(self, v):
+        """Return the in edges of the node(s).
+
+        Parameters
+        ----------
+        v : NDArray
+            The node(s).
+        
+        Returns
+        -------
+        NDArray
+            index pointers
+        NDArray
+            The src nodes.
+        NDArray
+            The edge ids.
+        """
+        rows = mx.nd.take(self._in_csr, v)
+        return rows.indptr, rows.indices, rows.data
+
+    def out_edges(self, v):
+        """Return the out edges of the node(s).
+
+        Parameters
+        ----------
+        v : NDArray
+            The node(s).
+        
+        Returns
+        -------
+        NDArray
+            index pointers
+        NDArray
+            The dst nodes.
+        NDArray
+            The edge ids.
+        """
+        rows = mx.nd.take(self._out_csr, v)
+        return rows.indptr, rows.indices, rows.data
+
+    def edges(self, sorted=False):
+        """Return all the edges
+
+        Parameters
+        ----------
+        sorted : bool
+            True if the returned edges are sorted by their src and dst ids.
+        
+        Returns
+        -------
+        NDArray
+            The src nodes.
+        NDArray
+            The dst nodes.
+        NDArray
+            The edge ids.
+        """
+        #TODO(zhengda) we need to return NDArray directly
+        # We don't need to take care of the sorted flag because the vertex Ids
+        # are already sorted.
+        coo = self._in_csr.asscipy().tocoo()
+        return coo.col, coo.row, coo.data
+
+    def get_in_degree(self):
+        """Return the in degrees of all nodes.
+
+        Returns
+        -------
+        NDArray
+            degrees
+        """
+        return mx.nd.contrib.getnnz(self._in_csr, axis=1)
+
+    def get_out_degree(self):
+        """Return the out degrees of all nodes.
+
+        Returns
+        -------
+        NDArray
+            degrees
+        """
+        return mx.nd.contrib.getnnz(self._out_csr, axis=1)
+
+    def node_subgraph(self, v):
+        """Return the induced node subgraph.
+
+        Parameters
+        ----------
+        v : NDArray
+            The nodes.
+
+        Returns
+        -------
+        ImmutableGraphIndex
+            The subgraph index.
+        NDArray
+            Induced nodes
+        NDArray
+            Induced edges
+        """
+        v = mx.nd.sort(v)
+        # when return_mapping is turned on, dgl_subgraph returns another CSRArray that
+        # stores the edge Ids of the original graph.
+        csr = mx.nd.contrib.dgl_subgraph(self._in_csr, v, return_mapping=True)
+        induced_nodes = v
+        induced_edges = csr[1].data
+        return ImmutableGraphIndex(csr[0], None), induced_nodes, induced_edges
+
+    def node_subgraphs(self, vs_arr):
+        """Return the induced node subgraphs.
+
+        Parameters
+        ----------
+        vs_arr : a vector of utils.Index
+            The nodes.
+
+        Returns
+        -------
+        a vector of ImmutableGraphIndex
+            The subgraph index.
+        a vector of NDArrays
+            Induced nodes of subgraphs.
+        a vector of NDArrays
+            Induced edges of subgraphs.
+        """
+        vs_arr = [mx.nd.sort(v) for v in vs_arr]
+        res = mx.nd.contrib.dgl_subgraph(self._in_csr, *vs_arr, return_mapping=True)
+        in_csrs = res[0:len(vs_arr)]
+        induced_nodes = vs_arr
+        induced_edges = [e.data for e in res[len(vs_arr):]]
+        assert len(in_csrs) == len(induced_nodes)
+        assert len(in_csrs) == len(induced_edges)
+        gis = []
+        induced_ns = []
+        induced_es = []
+        for in_csr, induced_n, induced_e in zip(in_csrs, induced_nodes, induced_edges):
+            gis.append(ImmutableGraphIndex(in_csr, None))
+            induced_ns.append(induced_n)
+            induced_es.append(induced_e)
+        return gis, induced_ns, induced_es
+
+    def adjacency_matrix(self, transpose=False):
+        """Return the adjacency matrix representation of this graph.
+
+        By default, a row of returned adjacency matrix represents the destination
+        of an edge and the column represents the source.
+
+        When transpose is True, a row represents the source and a column represents
+        a destination.
+
+        Parameters
+        ----------
+        transpose : bool
+            A flag to tranpose the returned adjacency matrix.
+
+        Returns
+        -------
+        NDArray
+            An object that returns tensor given context.
+        """
+        if transpose:
+            mat = self._out_csr
+        else:
+            mat = self._in_csr
+
+        indices = mat.indices
+        indptr = mat.indptr
+        data = mx.nd.ones(indices.shape, dtype=np.float32)
+        return mx.nd.sparse.csr_matrix((data, indices, indptr), shape=mat.shape)
+
+    def from_coo_matrix(self, out_coo):
+        """construct the graph index from a SciPy coo matrix.
+
+        Parameters
+        ----------
+        out_coo : SciPy coo matrix
+            The non-zero entries indicate out-edges of the graph.
+        """
+        edge_ids = mx.nd.arange(0, len(out_coo.data), step=1, repeat=1, dtype=np.int32)
+        src = mx.nd.array(out_coo.row, dtype=np.int64)
+        dst = mx.nd.array(out_coo.col, dtype=np.int64)
+        # TODO we can't generate a csr_matrix with np.int64 directly.
+        self.__init__(mx.nd.sparse.csr_matrix((edge_ids, (dst, src)), shape=out_coo.shape).astype(np.int64),
+                mx.nd.sparse.csr_matrix((edge_ids, (src, dst)), shape=out_coo.shape).astype(np.int64))
+
+def create_immutable_graph_index():
+    """ Create an empty backend-specific immutable graph index.
+
+    Returns
+    -------
+    ImmutableGraphIndex
+        The backend-specific immutable graph index.
+    """
+    return ImmutableGraphIndex(None, None)
--- a/python/dgl/graph.py
+++ b/python/dgl/graph.py
@@ -33,14 +33,18 @@ class DGLGraph(object):
        Edge feature storage.
    multigraph : bool, optional
        Whether the graph would be a multigraph (default: False)
+    readonly : bool, optional
+        Whether the graph structure is read-only (default: False).
    """
    def __init__(self,
                 graph_data=None,
                 node_frame=None,
                 edge_frame=None,
-                 multigraph=False):
+                 multigraph=False,
+                 readonly=False):
        # graph
-        self._graph = create_graph_index(graph_data, multigraph)
+        self._readonly=readonly
+        self._graph = create_graph_index(graph_data, multigraph, readonly)
        # frame
        self._node_frame = node_frame if node_frame is not None else FrameRef()
        self._edge_frame = edge_frame if edge_frame is not None else FrameRef()
@@ -1386,7 +1390,26 @@ class DGLGraph(object):
        """
        induced_nodes = utils.toindex(nodes)
        sgi = self._graph.node_subgraph(induced_nodes)
-        return dgl.DGLSubGraph(self, sgi.induced_nodes, sgi.induced_edges, sgi)
+        return dgl.DGLSubGraph(self, sgi.induced_nodes, sgi.induced_edges,
+                sgi, readonly=self._readonly)
+
+    def subgraphs(self, nodes):
+        """Generate the subgraphs among the given nodes.
+
+        Parameters
+        ----------
+        nodes : a list of lists or iterable
+            A list of the nodes to construct subgraph.
+
+        Returns
+        -------
+        G : A list of DGLSubGraph
+            The subgraphs.
+        """
+        induced_nodes = [utils.toindex(n) for n in nodes]
+        sgis = self._graph.node_subgraphs(induced_nodes)
+        return [dgl.DGLSubGraph(self, sgi.induced_nodes, sgi.induced_edges,
+            sgi, readonly=self._readonly) for sgi in sgis]

    def edge_subgraph(self, edges):
        """Generate the subgraph among the given edges.

--- a/python/dgl/graph_index.py
+++ b/python/dgl/graph_index.py
@@ -9,6 +9,7 @@ from ._ffi.base import c_array
 from ._ffi.function import _init_api
 from . import backend as F
 from . import utils
+from .immutable_graph_index import create_immutable_graph_index

 GraphIndexHandle = ctypes.c_void_p

@@ -432,6 +433,24 @@ class GraphIndex(object):
        induced_edges = utils.toindex(rst(2))
        return SubgraphIndex(rst(0), self, v, induced_edges)

+    def node_subgraphs(self, vs_arr):
+        """Return the induced node subgraphs.
+
+        Parameters
+        ----------
+        vs_arr : a list of utils.Index
+            The nodes.
+
+        Returns
+        -------
+        a vector of SubgraphIndex
+            The subgraph index.
+        """
+        gis = []
+        for v in vs_arr:
+            gis.append(self.node_subgraph(v))
+        return gis
+
    def edge_subgraph(self, e):
        """Return the induced edge subgraph.

@@ -451,9 +470,20 @@ class GraphIndex(object):
        induced_nodes = utils.toindex(rst(1))
        return SubgraphIndex(rst(0), self, induced_nodes, e)

-    def adjacency_matrix(self):
+    def adjacency_matrix(self, transpose=False):
        """Return the adjacency matrix representation of this graph.

+        By default, a row of returned adjacency matrix represents the destination
+        of an edge and the column represents the source.
+
+        When transpose is True, a row represents the source and a column represents
+        a destination.
+
+        Parameters
+        ----------
+        transpose : bool
+            A flag to tranpose the returned adjacency matrix.
+
        Returns
        -------
        utils.CtxCachedObject
@@ -463,6 +493,9 @@ class GraphIndex(object):
            src, dst, _ = self.edges(sorted=False)
            src = F.unsqueeze(src.tousertensor(), 0)
            dst = F.unsqueeze(dst.tousertensor(), 0)
+            if transpose:
+                idx = F.pack([src, dst])
+            else:
                idx = F.pack([dst, src])
            n = self.number_of_nodes()
            dat = F.ones((self.number_of_edges(),))
@@ -658,6 +691,25 @@ class SubgraphIndex(GraphIndex):
        """
        return self._induced_edges

+def map_to_subgraph_nid(subgraph, parent_nids):
+    """Map parent node Ids to the subgraph node Ids.
+
+    Parameters
+    ----------
+    subgraph: SubgraphIndex or ImmutableSubgraphIndex
+        the graph index of a subgraph
+
+    parent_nids: utils.Index
+        Node Ids in the parent graph.
+
+    Returns
+    -------
+    utils.Index
+        Node Ids in the subgraph.
+    """
+    return utils.toindex(_CAPI_DGLMapSubgraphNID(subgraph.induced_nodes.todgltensor(),
+        parent_nids.todgltensor()))
+
 def disjoint_union(graphs):
    """Return a disjoint union of the input graphs.

@@ -716,7 +768,7 @@ def disjoint_partition(graph, num_or_size_splits):
        graphs.append(GraphIndex(handle))
    return graphs

-def create_graph_index(graph_data=None, multigraph=False):
+def create_graph_index(graph_data=None, multigraph=False, readonly=False):
    """Create a graph index object.

    Parameters
@@ -729,6 +781,12 @@ def create_graph_index(graph_data=None, multigraph=False):
    if isinstance(graph_data, GraphIndex):
        return graph_data

+    if readonly and graph_data is not None:
+        gi = create_immutable_graph_index(graph_data)
+        # If we can't create an immutable graph index, we'll have to fall back.
+        if gi is not None:
+            return gi
+
    handle = _CAPI_DGLGraphCreate(multigraph)
    gi = GraphIndex(handle)


--- a/python/dgl/immutable_graph_index.py
+++ b/python/dgl/immutable_graph_index.py
+from __future__ import absolute_import
+
+import ctypes
+import numpy as np
+import networkx as nx
+import scipy.sparse as sp
+
+from ._ffi.function import _init_api
+from . import backend as F
+from . import utils
+
+class ImmutableGraphIndex(object):
+    """Graph index object on immutable graphs.
+
+    Parameters
+    ----------
+    backend_csr: a csr array provided by the backend framework.
+    """
+    def __init__(self, backend_sparse):
+        self._sparse = backend_sparse
+        self._num_nodes = None
+        self._num_edges = None
+        self._in_deg = None
+        self._out_deg = None
+        self._cache = {}
+
+    def add_nodes(self, num):
+        """Add nodes.
+        
+        Parameters
+        ----------
+        num : int
+            Number of nodes to be added.
+        """
+        raise Exception('Immutable graph doesn\'t support adding nodes')
+
+    def add_edge(self, u, v):
+        """Add one edge.
+        
+        Parameters
+        ----------
+        u : int
+            The src node.
+        v : int
+            The dst node.
+        """
+        raise Exception('Immutable graph doesn\'t support adding an edge')
+
+    def add_edges(self, u, v):
+        """Add many edges.
+        
+        Parameters
+        ----------
+        u : utils.Index
+            The src nodes.
+        v : utils.Index
+            The dst nodes.
+        """
+        raise Exception('Immutable graph doesn\'t support adding edges')
+
+    def clear(self):
+        """Clear the graph."""
+        raise Exception('Immutable graph doesn\'t support clearing up')
+
+    def number_of_nodes(self):
+        """Return the number of nodes.
+
+        Returns
+        -------
+        int
+            The number of nodes
+        """
+        if self._num_nodes is None:
+            self._num_nodes = self._sparse.number_of_nodes()
+        return self._num_nodes
+
+    def number_of_edges(self):
+        """Return the number of edges.
+
+        Returns
+        -------
+        int
+            The number of edges
+        """
+        if self._num_edges is None:
+            self._num_edges = self._sparse.number_of_edges()
+        return self._num_edges
+
+    def has_node(self, vid):
+        """Return true if the node exists.
+
+        Parameters
+        ----------
+        vid : int
+            The nodes
+
+        Returns
+        -------
+        bool
+            True if the node exists
+        """
+        return vid < self.number_of_nodes()
+
+    def has_nodes(self, vids):
+        """Return true if the nodes exist.
+
+        Parameters
+        ----------
+        vid : utils.Index
+            The nodes
+
+        Returns
+        -------
+        utils.Index
+            0-1 array indicating existence
+        """
+        vid_array = vids.tousertensor()
+        return utils.toindex(vid_array < self.number_of_nodes())
+
+    def has_edge_between(self, u, v):
+        """Return true if the edge exists.
+
+        Parameters
+        ----------
+        u : int
+            The src node.
+        v : int
+            The dst node.
+
+        Returns
+        -------
+        bool
+            True if the edge exists
+        """
+        u = F.tensor([u])
+        v = F.tensor([v])
+        return self._sparse.has_edges(u, v).asnumpy()[0]
+
+    def has_edges_between(self, u, v):
+        """Return true if the edge exists.
+
+        Parameters
+        ----------
+        u : utils.Index
+            The src nodes.
+        v : utils.Index
+            The dst nodes.
+
+        Returns
+        -------
+        utils.Index
+            0-1 array indicating existence
+        """
+        return utils.toindex(self._sparse.has_edges(u.tousertensor(), v.tousertensor()))
+
+    def predecessors(self, v, radius=1):
+        """Return the predecessors of the node.
+
+        Parameters
+        ----------
+        v : int
+            The node.
+        radius : int, optional
+            The radius of the neighborhood.
+
+        Returns
+        -------
+        utils.Index
+            Array of predecessors
+        """
+        pred = self._sparse.predecessors(v, radius)
+        return utils.toindex(pred)
+
+    def successors(self, v, radius=1):
+        """Return the successors of the node.
+
+        Parameters
+        ----------
+        v : int
+            The node.
+        radius : int, optional
+            The radius of the neighborhood.
+
+        Returns
+        -------
+        utils.Index
+            Array of successors
+        """
+        succ = self._sparse.successors(v, radius)
+        return utils.toindex(succ)
+
+    def edge_id(self, u, v):
+        """Return the id of the edge.
+
+        Parameters
+        ----------
+        u : int
+            The src node.
+        v : int
+            The dst node.
+
+        Returns
+        -------
+        int
+            The edge id.
+        """
+        u = F.tensor([u])
+        v = F.tensor([v])
+        id = self._sparse.edge_ids(u, v)
+        return utils.toindex(id)
+
+    def edge_ids(self, u, v):
+        """Return the edge ids.
+
+        Parameters
+        ----------
+        u : utils.Index
+            The src nodes.
+        v : utils.Index
+            The dst nodes.
+
+        Returns
+        -------
+        utils.Index
+            The edge id array.
+        """
+        u = u.tousertensor()
+        v = v.tousertensor()
+        ids = self._sparse.edge_ids(u, v)
+        return utils.toindex(ids)
+
+    def in_edges(self, v):
+        """Return the in edges of the node(s).
+
+        Parameters
+        ----------
+        v : utils.Index
+            The node(s).
+        
+        Returns
+        -------
+        utils.Index
+            The src nodes.
+        utils.Index
+            The dst nodes.
+        utils.Index
+            The edge ids.
+        """
+        dst = v.tousertensor()
+        indptr, src, edges = self._sparse.in_edges(dst)
+        off = utils.toindex(indptr)
+        dst = _CAPI_DGLExpandIds(v.todgltensor(), off.todgltensor())
+        return utils.toindex(src), utils.toindex(dst), utils.toindex(edges)
+
+    def out_edges(self, v):
+        """Return the out edges of the node(s).
+
+        Parameters
+        ----------
+        v : utils.Index
+            The node(s).
+        
+        Returns
+        -------
+        utils.Index
+            The src nodes.
+        utils.Index
+            The dst nodes.
+        utils.Index
+            The edge ids.
+        """
+        src = v.tousertensor()
+        indptr, dst, edges = self._sparse.out_edges(src)
+        off = utils.toindex(indptr)
+        src = _CAPI_DGLExpandIds(v.todgltensor(), off.todgltensor())
+        return utils.toindex(src), utils.toindex(dst), utils.toindex(edges)
+
+    def edges(self, sorted=False):
+        """Return all the edges
+
+        Parameters
+        ----------
+        sorted : bool
+            True if the returned edges are sorted by their src and dst ids.
+        
+        Returns
+        -------
+        utils.Index
+            The src nodes.
+        utils.Index
+            The dst nodes.
+        utils.Index
+            The edge ids.
+        """
+        if "all_edges" in self._cache:
+            return self._cache["all_edges"]
+        src, dst, edges = self._sparse.edges(sorted)
+        self._cache["all_edges"] = (utils.toindex(src), utils.toindex(dst), utils.toindex(edges))
+        return self._cache["all_edges"]
+
+    def _get_in_degree(self):
+        if 'in_deg' not in self._cache:
+            self._cache['in_deg'] = self._sparse.get_in_degree()
+        return self._cache['in_deg']
+
+    def _get_out_degree(self):
+        if 'out_deg' not in self._cache:
+            self._cache['out_deg'] = self._sparse.get_out_degree()
+        return self._cache['out_deg']
+
+    def in_degree(self, v):
+        """Return the in degree of the node.
+
+        Parameters
+        ----------
+        v : int
+            The node.
+
+        Returns
+        -------
+        int
+            The in degree.
+        """
+        deg = self._get_in_degree()
+        return deg[v]
+
+    def in_degrees(self, v):
+        """Return the in degrees of the nodes.
+
+        Parameters
+        ----------
+        v : utils.Index
+            The nodes.
+
+        Returns
+        -------
+        int
+            The in degree array.
+        """
+        v_array = v.tousertensor()
+        deg = self._get_in_degree()
+        return utils.toindex(F.gather_row(deg, v_array))
+
+    def out_degree(self, v):
+        """Return the out degree of the node.
+
+        Parameters
+        ----------
+        v : int
+            The node.
+
+        Returns
+        -------
+        int
+            The out degree.
+        """
+        deg = self._get_out_degree()
+        return deg[v]
+
+    def out_degrees(self, v):
+        """Return the out degrees of the nodes.
+
+        Parameters
+        ----------
+        v : utils.Index
+            The nodes.
+
+        Returns
+        -------
+        int
+            The out degree array.
+        """
+        v_array = v.tousertensor()
+        deg = self._get_out_degree()
+        return utils.toindex(F.gather_row(deg, v_array))
+
+    def node_subgraph(self, v):
+        """Return the induced node subgraph.
+
+        Parameters
+        ----------
+        v : utils.Index
+            The nodes.
+
+        Returns
+        -------
+        ImmutableSubgraphIndex
+            The subgraph index.
+        """
+        v = v.tousertensor()
+        gi, induced_n, induced_e = self._sparse.node_subgraph(v)
+        induced_nodes = utils.toindex(induced_n)
+        induced_edges = utils.toindex(induced_e)
+        return ImmutableSubgraphIndex(gi, self, induced_nodes, induced_edges)
+
+    def node_subgraphs(self, vs_arr):
+        """Return the induced node subgraphs.
+
+        Parameters
+        ----------
+        vs_arr : a vector of utils.Index
+            The nodes.
+
+        Returns
+        -------
+        a vector of ImmutableSubgraphIndex
+            The subgraph index.
+        """
+        vs_arr = [v.tousertensor() for v in vs_arr]
+        gis, induced_nodes, induced_edges = self._sparse.node_subgraphs(vs_arr)
+        induced_nodes = [utils.toindex(v) for v in induced_nodes]
+        induced_edges = [utils.toindex(e) for e in induced_edges]
+        return [ImmutableSubgraphIndex(gi, self, induced_n,
+            induced_e) for gi, induced_n, induced_e in zip(gis, induced_nodes, induced_edges)]
+
+    def adjacency_matrix(self, transpose=False):
+        """Return the adjacency matrix representation of this graph.
+
+        By default, a row of returned adjacency matrix represents the destination
+        of an edge and the column represents the source.
+
+        When transpose is True, a row represents the source and a column represents
+        a destination.
+
+        Parameters
+        ----------
+        transpose : bool
+            A flag to tranpose the returned adjacency matrix.
+
+        Returns
+        -------
+        utils.CtxCachedObject
+            An object that returns tensor given context.
+        """
+        def get_adj(ctx):
+            new_mat = self._sparse.adjacency_matrix(transpose)
+            return F.to_context(new_mat, ctx)
+
+        if not transpose and 'in_adj' in self._cache:
+            return self._cache['in_adj']
+        elif transpose and 'out_adj' in self._cache:
+            return self._cache['out_adj']
+        else:
+            return utils.CtxCachedObject(lambda ctx: get_adj(ctx))
+
+    def incidence_matrix(self, oriented=False):
+        """Return the incidence matrix representation of this graph.
+        
+        Parameters
+        ----------
+        oriented : bool, optional (default=False)
+          Whether the returned incidence matrix is oriented.
+
+        Returns
+        -------
+        utils.CtxCachedObject
+            An object that returns tensor given context.
+        """
+        raise Exception('immutable graph doesn\'t support incidence_matrix for now.')
+
+    def to_networkx(self):
+        """Convert to networkx graph.
+
+        The edge id will be saved as the 'id' edge attribute.
+
+        Returns
+        -------
+        networkx.DiGraph
+            The nx graph
+        """
+        src, dst, eid = self.edges()
+        ret = nx.DiGraph()
+        for u, v, id in zip(src, dst, eid):
+            ret.add_edge(u, v, id=id)
+        return ret
+
+    def from_networkx(self, nx_graph):
+        """Convert from networkx graph.
+
+        If 'id' edge attribute exists, the edge will be added follows
+        the edge id order. Otherwise, order is undefined.
+        
+        Parameters
+        ----------
+        nx_graph : networkx.DiGraph
+            The nx graph
+        """
+        assert isinstance(nx_graph, nx.DiGraph), "The input graph has to be a NetworkX DiGraph."
+        # We store edge Ids as an edge attribute.
+        out_mat = nx.convert_matrix.to_scipy_sparse_matrix(nx_graph, format='coo')
+        self._sparse.from_coo_matrix(out_mat)
+
+    def from_scipy_sparse_matrix(self, adj):
+        """Convert from scipy sparse matrix.
+
+        Parameters
+        ----------
+        adj : scipy sparse matrix
+        """
+        assert isinstance(adj, sp.csr_matrix) or isinstance(adj, sp.coo_matrix), \
+                "The input matrix has to be a SciPy sparse matrix."
+        out_mat = adj.tocoo()
+        self._sparse.from_coo_matrix(out_mat)
+
+    def line_graph(self, backtracking=True):
+        """Return the line graph of this graph.
+
+        Parameters
+        ----------
+        backtracking : bool, optional (default=False)
+          Whether (i, j) ~ (j, i) in L(G).
+          (i, j) ~ (j, i) is the behavior of networkx.line_graph.
+
+        Returns
+        -------
+        ImmutableGraphIndex
+            The line graph of this graph.
+        """
+        raise Exception('immutable graph doesn\'t support line_graph')
+
+class ImmutableSubgraphIndex(ImmutableGraphIndex):
+    def __init__(self, backend_sparse, parent, induced_nodes, induced_edges):
+        super(ImmutableSubgraphIndex, self).__init__(backend_sparse)
+
+        self._parent = parent
+        self._induced_nodes = induced_nodes
+        self._induced_edges = induced_edges
+
+    @property
+    def induced_edges(self):
+        return self._induced_edges
+
+    @property
+    def induced_nodes(self):
+        return self._induced_nodes
+
+def create_immutable_graph_index(graph_data=None):
+    """Create a graph index object.
+
+    Parameters
+    ----------
+    graph_data : graph data, optional
+        Data to initialize graph. Same as networkx's semantics.
+    """
+    if isinstance(graph_data, ImmutableGraphIndex):
+        return graph_data
+    assert F.create_immutable_graph_index is not None, \
+            "The selected backend doesn't support read-only graph!"
+
+    gi = ImmutableGraphIndex(F.create_immutable_graph_index())
+    if graph_data is None:
+        return gi
+
+    # scipy format
+    if isinstance(graph_data, sp.spmatrix):
+        try:
+            gi.from_scipy_sparse_matrix(graph_data)
+            return gi
+        except:
+            raise Exception('Graph data is not a valid scipy sparse matrix.')
+
+    # networkx - any format
+    try:
+        gi.from_networkx(graph_data)
+    except:
+        raise Exception('Error while creating graph from input of type "%s".'
+                         % type(graph_data))
+
+    return gi
+
+_init_api("dgl.immutable_graph_index")
--- a/python/dgl/subgraph.py
+++ b/python/dgl/subgraph.py
@@ -7,6 +7,7 @@ from . import backend as F
 from .frame import Frame, FrameRef
 from .graph import DGLGraph
 from . import utils
+from .graph_index import map_to_subgraph_nid

 class DGLSubGraph(DGLGraph):
    """The subgraph class.
@@ -42,9 +43,11 @@ class DGLSubGraph(DGLGraph):
        The graph index.
    shared : bool, optional
        Whether the subgraph shares node/edge features with the parent graph.
+    readonly : bool, optional
+        Whether the graph structure is read-only (default: False).
    """
-    def __init__(self, parent, parent_nid, parent_eid, graph_idx, shared=False):
-        super(DGLSubGraph, self).__init__(graph_data=graph_idx)
+    def __init__(self, parent, parent_nid, parent_eid, graph_idx, shared=False, readonly=False):
+        super(DGLSubGraph, self).__init__(graph_data=graph_idx, readonly=readonly)
        self._parent = parent
        self._parent_nid = parent_nid
        self._parent_eid = parent_eid
@@ -114,3 +117,6 @@ class DGLSubGraph(DGLGraph):
        if self._parent._edge_frame.num_rows != 0:
            self._edge_frame = FrameRef(Frame(
                self._parent._edge_frame[self._parent_eid]))
+
+    def map_to_subgraph_nid(self, parent_vids):
+        return map_to_subgraph_nid(self._graph, parent_vids)
--- a/src/graph/graph_apis.cc
+++ b/src/graph/graph_apis.cc
@@ -140,6 +140,20 @@ TVM_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphHasVertices")
    *rv = gptr->HasVertices(vids);
  });

+TVM_REGISTER_GLOBAL("graph_index._CAPI_DGLMapSubgraphNID")
+.set_body([] (TVMArgs args, TVMRetValue* rv) {
+    const IdArray parent_vids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[0]));
+    const IdArray query = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1]));
+    *rv = GraphOp::MapParentIdToSubgraphId(parent_vids, query);
+  });
+
+TVM_REGISTER_GLOBAL("immutable_graph_index._CAPI_DGLExpandIds")
+.set_body([] (TVMArgs args, TVMRetValue* rv) {
+    const IdArray ids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[0]));
+    const IdArray offsets = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1]));
+    *rv = GraphOp::ExpandIds(ids, offsets);
+  });
+
 TVM_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphHasEdgeBetween")
 .set_body([] (TVMArgs args, TVMRetValue* rv) {
    GraphHandle ghandle = args[0];

--- a/src/graph/graph_op.cc
+++ b/src/graph/graph_op.cc
@@ -7,6 +7,12 @@
 #include <algorithm>

 namespace dgl {
+namespace {
+inline bool IsValidIdArray(const IdArray& arr) {
+  return arr->ctx.device_type == kDLCPU && arr->ndim == 1
+    && arr->dtype.code == kDLInt && arr->dtype.bits == 64;
+}
+}  // namespace

 Graph GraphOp::LineGraph(const Graph* g, bool backtracking) {
  typedef std::pair<dgl_id_t, dgl_id_t> entry;
@@ -124,4 +130,56 @@ std::vector<Graph> GraphOp::DisjointPartitionBySizes(const Graph* graph, IdArray
  return rst;
 }

+IdArray GraphOp::MapParentIdToSubgraphId(IdArray parent_vids, IdArray query) {
+  CHECK(IsValidIdArray(parent_vids)) << "Invalid parent id array.";
+  CHECK(IsValidIdArray(query)) << "Invalid query id array.";
+  const auto parent_len = parent_vids->shape[0];
+  const auto query_len = query->shape[0];
+  const dgl_id_t* parent_data = static_cast<dgl_id_t*>(parent_vids->data);
+  const dgl_id_t* query_data = static_cast<dgl_id_t*>(query->data);
+  IdArray rst = IdArray::Empty({query_len}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
+  dgl_id_t* rst_data = static_cast<dgl_id_t*>(rst->data);
+
+  const bool is_sorted = std::is_sorted(parent_data, parent_data + parent_len);
+  if (is_sorted) {
+    for (int64_t i = 0; i < query_len; i++) {
+      const dgl_id_t id = query_data[i];
+      const auto it = std::find(parent_data, parent_data + parent_len, id);
+      CHECK(it != parent_data + parent_len) << id << " doesn't exist in the parent Ids";
+      rst_data[i] = it - parent_data;
+    }
+  } else {
+    std::unordered_map<dgl_id_t, dgl_id_t> parent_map;
+    for (int64_t i = 0; i < parent_len; i++) {
+      const dgl_id_t id = parent_data[i];
+      parent_map[id] = i;
+    }
+    for (int64_t i = 0; i < query_len; i++) {
+      const dgl_id_t id = query_data[i];
+      auto it = parent_map.find(id);
+      CHECK(it != parent_map.end()) << id << " doesn't exist in the parent Ids";
+      rst_data[i] = it->second;
+    }
+  }
+  return rst;
+}
+
+IdArray GraphOp::ExpandIds(IdArray ids, IdArray offset) {
+  const auto id_len = ids->shape[0];
+  const auto off_len = offset->shape[0];
+  CHECK_EQ(id_len + 1, off_len);
+  const dgl_id_t *id_data = static_cast<dgl_id_t*>(ids->data);
+  const dgl_id_t *off_data = static_cast<dgl_id_t*>(offset->data);
+  const int64_t len = off_data[off_len - 1];
+  IdArray rst = IdArray::Empty({len}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
+  dgl_id_t *rst_data = static_cast<dgl_id_t*>(rst->data);
+  for (int64_t i = 0; i < id_len; i++) {
+    const int64_t local_len = off_data[i + 1] - off_data[i];
+    for (int64_t j = 0; j < local_len; j++) {
+      rst_data[off_data[i] + j] = id_data[i];
+    }
+  }
+  return rst;
+}
+
 }  // namespace dgl
--- a/tests/mxnet/test_graph_index.py
+++ b/tests/mxnet/test_graph_index.py
+import os
+os.environ['DGLBACKEND'] = 'mxnet'
+import mxnet as mx
+import numpy as np
+import scipy as sp
+from dgl.graph import GraphIndex, create_graph_index
+from dgl.graph_index import map_to_subgraph_nid
+import dgl.backend as F
+from dgl import utils
+
+def generate_graph():
+    g = create_graph_index()
+    g.add_nodes(10) # 10 nodes.
+    # create a graph where 0 is the source and 9 is the sink
+    for i in range(1, 9):
+        g.add_edge(0, i)
+        g.add_edge(i, 9)
+    # add a back flow from 9 to 0
+    g.add_edge(9, 0)
+    ig = create_graph_index(g.to_networkx(), readonly=True)
+    return g, ig
+
+def generate_rand_graph(n):
+    arr = (sp.sparse.random(n, n, density=0.1, format='coo') != 0).astype(np.int64)
+    g = create_graph_index(arr)
+    ig = create_graph_index(arr, readonly=True)
+    return g, ig
+
+def check_graph_equal(g1, g2):
+    ctx = F.get_context(mx.nd.array([1]))
+    adj1 = g1.adjacency_matrix().get(ctx) != 0
+    adj2 = g2.adjacency_matrix().get(ctx) != 0
+    assert mx.nd.sum(adj1 - adj2).asnumpy() == 0
+
+def test_graph_gen():
+    g, ig = generate_rand_graph(10)
+    check_graph_equal(g, ig)
+
+def check_basics(g, ig):
+    assert g.number_of_nodes() == ig.number_of_nodes()
+    assert g.number_of_edges() == ig.number_of_edges()
+
+    edges = g.edges()
+    iedges = ig.edges()
+
+    for i in range(g.number_of_nodes()):
+        assert g.has_node(i) == ig.has_node(i)
+
+    for i in range(g.number_of_nodes()):
+        assert mx.nd.sum(g.predecessors(i).tousertensor()).asnumpy() == mx.nd.sum(ig.predecessors(i).tousertensor()).asnumpy()
+        assert mx.nd.sum(g.successors(i).tousertensor()).asnumpy() == mx.nd.sum(ig.successors(i).tousertensor()).asnumpy()
+
+    randv = np.random.randint(0, g.number_of_nodes(), 10)
+    randv = utils.toindex(randv)
+    in_src1, in_dst1, in_eids1 = g.in_edges(randv)
+    in_src2, in_dst2, in_eids2 = ig.in_edges(randv)
+    nnz = in_src2.tousertensor().shape[0]
+    assert mx.nd.sum(in_src1.tousertensor() == in_src2.tousertensor()).asnumpy() == nnz
+    assert mx.nd.sum(in_dst1.tousertensor() == in_dst2.tousertensor()).asnumpy() == nnz
+    assert mx.nd.sum(in_eids1.tousertensor() == in_eids2.tousertensor()).asnumpy() == nnz
+
+    out_src1, out_dst1, out_eids1 = g.out_edges(randv)
+    out_src2, out_dst2, out_eids2 = ig.out_edges(randv)
+    nnz = out_dst2.tousertensor().shape[0]
+    assert mx.nd.sum(out_dst1.tousertensor() == out_dst2.tousertensor()).asnumpy() == nnz
+    assert mx.nd.sum(out_src1.tousertensor() == out_src2.tousertensor()).asnumpy() == nnz
+    assert mx.nd.sum(out_eids1.tousertensor() == out_eids2.tousertensor()).asnumpy() == nnz
+
+    num_v = len(randv)
+    assert mx.nd.sum(g.in_degrees(randv).tousertensor() == ig.in_degrees(randv).tousertensor()).asnumpy() == num_v
+    assert mx.nd.sum(g.out_degrees(randv).tousertensor() == ig.out_degrees(randv).tousertensor()).asnumpy() == num_v
+    randv = randv.tousertensor()
+    for v in randv.asnumpy():
+        assert g.in_degree(v) == ig.in_degree(v)
+        assert g.out_degree(v) == ig.out_degree(v)
+
+    for u in randv.asnumpy():
+        for v in randv.asnumpy():
+            if len(g.edge_id(u, v).tolist()) == 1:
+                assert g.edge_id(u, v).tolist() == ig.edge_id(u, v).tolist()
+            assert g.has_edge_between(u, v) == ig.has_edge_between(u, v)
+    randv = utils.toindex(randv)
+    ids = g.edge_ids(randv, randv)[2].tolist()
+    assert sum(ig.edge_ids(randv, randv).tolist() == ids) == len(ids)
+    assert sum(g.has_edges_between(randv, randv).tolist() == ig.has_edges_between(randv, randv).tolist()) == len(randv)
+
+
+def test_basics():
+    g, ig = generate_rand_graph(100)
+    check_basics(g, ig)
+
+def test_node_subgraph():
+    num_vertices = 100
+    g, ig = generate_rand_graph(num_vertices)
+
+    # node_subgraph
+    randv1 = np.random.randint(0, num_vertices, 20)
+    randv = np.unique(randv1)
+    subg = g.node_subgraph(utils.toindex(randv))
+    subig = ig.node_subgraph(utils.toindex(randv))
+    check_graph_equal(subg, subig)
+    assert mx.nd.sum(map_to_subgraph_nid(subg, utils.toindex(randv1[0:10])).tousertensor()
+            == map_to_subgraph_nid(subig, utils.toindex(randv1[0:10])).tousertensor()) == 10
+
+    # node_subgraphs
+    randvs = []
+    subgs = []
+    for i in range(4):
+        randv = np.unique(np.random.randint(0, num_vertices, 20))
+        randvs.append(utils.toindex(randv))
+        subgs.append(g.node_subgraph(utils.toindex(randv)))
+    subigs= ig.node_subgraphs(randvs)
+    for i in range(4):
+        check_graph_equal(subgs[i], subigs[i])
+
+
+if __name__ == '__main__':
+    test_basics()
+    test_graph_gen()
+    test_node_subgraph()