[Feature] Preference to COO for "hypersparse" unit graphs & graph compaction (#1238)

* unit graph that prefers coo queries * auto detect coo preference * forgot some functions * disable lint on detect_prefer_coo * reorg * change comment * lint * fix * move array_utils.h to src * compact graph impl * fix redundant copying in idhashmap * docstring * moving preference detection to C * lint * fix unit test & address comments * hypersparse autorestrict * docstring & fix * revert copyto and asnumbits * fix stupid bug * lint * leave a TODO for sorted COO * fixing same node type mapping to different id in different graphs * addresses comments * made induced nodes a feautre column * lint?

[Feature] Preference to COO for "hypersparse" unit graphs & graph compaction (#1238)
* unit graph that prefers coo queries * auto detect coo preference * forgot some functions * disable lint on detect_prefer_coo * reorg * change comment * lint * fix * move array_utils.h to src * compact graph impl * fix redundant copying in idhashmap * docstring * moving preference detection to C * lint * fix unit test & address comments * hypersparse autorestrict * docstring & fix * revert copyto and asnumbits * fix stupid bug * lint * leave a TODO for sorted COO * fixing same node type mapping to different id in different graphs * addresses comments * made induced nodes a feautre column * lint?
eeeb52f4 · Quan (Andy) Gan · GitHub · 828a5e5b · eeeb52f4 · eeeb52f4
Unverified Commit eeeb52f4 authored Feb 04, 2020 by Quan (Andy) Gan Committed by GitHub Feb 04, 2020
20 changed files
--- a/include/dgl/array.h
+++ b/include/dgl/array.h
@@ -211,7 +211,7 @@ struct CSRMatrix {
  runtime::NDArray indptr, indices;
  /*! \brief data array, could be empty. */
  runtime::NDArray data;
-  /*! \brief indicate that the edges are stored in the sorted order. */
+  /*! \brief whether the column indices per row are sorted */
  bool sorted;
 };
@@ -229,7 +229,9 @@ struct COOMatrix {
  int64_t num_rows, num_cols;
  /*! \brief COO index arrays */
  runtime::NDArray row, col;
-  /*! \brief data array, could be empty. */
+  /*!
+   * \brief data array, could be empty.  When empty, assume it is from 0 to NNZ - 1.
+   */
  runtime::NDArray data;
 };
@@ -253,6 +255,11 @@ runtime::NDArray CSRGetRowColumnIndices(CSRMatrix , int64_t row);
 /*! \brief Return the data array of the given row */
 runtime::NDArray CSRGetRowData(CSRMatrix , int64_t row);
+/*! \brief Whether the CSR matrix contains data */
+inline bool CSRHasData(CSRMatrix csr) {
+  return csr.data.defined();
+}
 /* \brief Get data. The return type is an ndarray due to possible duplicate entries. */
 runtime::NDArray CSRGetData(CSRMatrix , int64_t row, int64_t col);
 /*!
@@ -326,8 +333,39 @@ void CSRSort(CSRMatrix csr);
 ///////////////////////// COO routines //////////////////////////
-/*! \return True if the matrix has duplicate entries */
+/*! \brief Return true if the value (row, col) is non-zero */
-bool COOHasDuplicate(COOMatrix coo);
+bool COOIsNonZero(COOMatrix , int64_t row, int64_t col);
+/*!
+ * \brief Batched implementation of COOIsNonZero.
+ * \note This operator allows broadcasting (i.e, either row or col can be of length 1).
+ */
+runtime::NDArray COOIsNonZero(COOMatrix, runtime::NDArray row, runtime::NDArray col);
+/*! \brief Return the nnz of the given row */
+int64_t COOGetRowNNZ(COOMatrix , int64_t row);
+runtime::NDArray COOGetRowNNZ(COOMatrix , runtime::NDArray row);
+/*! \brief Return the data array of the given row */
+std::pair<runtime::NDArray, runtime::NDArray>
+COOGetRowDataAndIndices(COOMatrix , int64_t row);
+/*! \brief Whether the COO matrix contains data */
+inline bool COOHasData(COOMatrix csr) {
+  return csr.data.defined();
+}
+/*! \brief Get data. The return type is an ndarray due to possible duplicate entries. */
+runtime::NDArray COOGetData(COOMatrix , int64_t row, int64_t col);
+/*!
+ * \brief Get the data and the row,col indices for each returned entries.
+ * \note This operator allows broadcasting (i.e, either row or col can be of length 1).
+ */
+std::vector<runtime::NDArray> COOGetDataAndIndices(
+    COOMatrix , runtime::NDArray rows, runtime::NDArray cols);
+/*! \brief Return a transposed COO matrix */
+COOMatrix COOTranspose(COOMatrix coo);
 /*!
 * \brief Convert COO matrix to CSR matrix.
@@ -339,6 +377,32 @@ bool COOHasDuplicate(COOMatrix coo);
 */
 CSRMatrix COOToCSR(COOMatrix coo);
+/*!
+ * \brief Slice rows of the given matrix and return.
+ * \param coo COO matrix
+ * \param start Start row id (inclusive)
+ * \param end End row id (exclusive)
+ */
+COOMatrix COOSliceRows(COOMatrix coo, int64_t start, int64_t end);
+COOMatrix COOSliceRows(COOMatrix coo, runtime::NDArray rows);
+/*!
+ * \brief Get the submatrix specified by the row and col ids.
+ *
+ * In numpy notation, given matrix M, row index array I, col index array J
+ * This function returns the submatrix M[I, J].
+ *
+ * \param coo The input coo matrix
+ * \param rows The row index to select
+ * \param cols The col index to select
+ * \return submatrix
+ */
+COOMatrix COOSliceMatrix(COOMatrix coo, runtime::NDArray rows, runtime::NDArray cols);
+/*! \return True if the matrix has duplicate entries */
+bool COOHasDuplicate(COOMatrix coo);
 // inline implementations
 template <typename T>
 IdArray VecToIdArray(const std::vector<T>& vec,
@@ -399,7 +463,7 @@ IdArray VecToIdArray(const std::vector<T>& vec,
 /*
 * Dispatch according to float type (either float32 or float64):
 *
- * ATEN_ID_TYPE_SWITCH(array->dtype, FloatType, {
+ * ATEN_FLOAT_TYPE_SWITCH(array->dtype, FloatType, {
 *   // Now FloatType is the type corresponding to data type in array.
 *   // For instance, one can do this for a CPU array:
 *   FloatType *data = static_cast<FloatType *>(array->data);
@@ -422,7 +486,7 @@ IdArray VecToIdArray(const std::vector<T>& vec,
 /*
 * Dispatch according to data type (int32, int64, float32 or float64):
 *
- * ATEN_ID_TYPE_SWITCH(array->dtype, DType, {
+ * ATEN_DTYPE_SWITCH(array->dtype, DType, {
 *   // Now DType is the type corresponding to data type in array.
 *   // For instance, one can do this for a CPU array:
 *   DType *data = static_cast<DType *>(array->data);

--- a/include/dgl/base_heterograph.h
+++ b/include/dgl/base_heterograph.h
@@ -90,6 +90,11 @@ class BaseHeteroGraph : public runtime::Object {
   */
  virtual void Clear() = 0;
+  /*!
+   * \brief Get the data type of node and edge IDs of this graph.
+   */
+  virtual DLDataType DataType() const = 0;
  /*!
   * \brief Get the device context of this graph.
   */
@@ -98,6 +103,7 @@ class BaseHeteroGraph : public runtime::Object {
  /*!
   * \brief Get the number of integer bits used to store node/edge ids (32 or 64).
   */
+  // TODO(BarclayII) replace NumBits() calls to DataType() calls
  virtual uint8_t NumBits() const = 0;
  /*!
@@ -464,19 +470,44 @@ DGL_DEFINE_OBJECT_REF(HeteroSubgraphRef, HeteroSubgraph);
 // creators
-/*! \brief Create a bipartite graph from COO arrays */
+/*!
-HeteroGraphPtr CreateBipartiteFromCOO(
+ * \brief Sparse graph format.
-    int64_t num_src, int64_t num_dst, IdArray row, IdArray col);
+ */
+enum class SparseFormat {
+  ANY = 0,
+  COO = 1,
+  CSR = 2,
+  CSC = 3
+};
-/*! \brief Create a bipartite graph from (out) CSR arrays */
+inline SparseFormat ParseSparseFormat(const std::string& name) {
-HeteroGraphPtr CreateBipartiteFromCSR(
+  if (name == "coo")
-    int64_t num_src, int64_t num_dst,
+    return SparseFormat::COO;
-    IdArray indptr, IdArray indices, IdArray edge_ids);
+  else if (name == "csr")
+    return SparseFormat::CSR;
+  else if (name == "csc")
+    return SparseFormat::CSC;
+  else
+    return SparseFormat::ANY;
+}
 /*! \brief Create a heterograph from meta graph and a list of bipartite graph */
 HeteroGraphPtr CreateHeteroGraph(
    GraphPtr meta_graph, const std::vector<HeteroGraphPtr>& rel_graphs);
+/*!
+ * \brief Given a list of graphs, remove the common nodes that do not have inbound and
+ * outbound edges.
+ *
+ * The graphs should have identical node ID space (i.e. should have the same set of nodes,
+ * including types and IDs) and metagraph.
+ *
+ * \return A pair.  The first element is the list of compacted graphs, and the second
+ * element is the mapping from the compacted graphs and the original graph.
+ */
+std::pair<std::vector<HeteroGraphPtr>, std::vector<IdArray>>
+CompactGraphs(const std::vector<HeteroGraphPtr> &graphs);
 };  // namespace dgl
 #endif  // DGL_BASE_HETEROGRAPH_H_
--- a/python/dgl/convert.py
+++ b/python/dgl/convert.py
 """Module for converting graph from/to other object."""
 from collections import defaultdict
+from collections.abc import Iterable
 import numpy as np
 import scipy as sp
 import networkx as nx
@@ -19,9 +20,11 @@ __all__ = [
    'to_hetero',
    'to_homo',
    'to_networkx',
+    'compact_graphs',
 ]
-def graph(data, ntype='_N', etype='_E', card=None, validate=True, **kwargs):
+def graph(data, ntype='_N', etype='_E', card=None, validate=True, restrict_format='any',
+          **kwargs):
    """Create a graph with one type of nodes and edges.
    In the sparse matrix perspective, :func:`dgl.graph` creates a graph
@@ -49,6 +52,8 @@ def graph(data, ntype='_N', etype='_E', card=None, validate=True, **kwargs):
        If True, check if node ids are within cardinality, the check process may take
        some time. (Default: True)
        If False and card is not None, user would receive a warning.
+    restrict_format : 'any', 'coo', 'csr', 'csc', optional
+        Force the storage format.  Default: 'any' (i.e. let DGL decide what to use).
    kwargs : key-word arguments, optional
        Other key word arguments. Only comes into effect when we are using a NetworkX
        graph. It can consist of:
@@ -122,17 +127,24 @@ def graph(data, ntype='_N', etype='_E', card=None, validate=True, **kwargs):
        urange, vrange = None, None
    if isinstance(data, tuple):
        u, v = data
-        return create_from_edges(u, v, ntype, etype, ntype, urange, vrange, validate)
+        return create_from_edges(
+            u, v, ntype, etype, ntype, urange, vrange, validate,
+            restrict_format=restrict_format)
    elif isinstance(data, list):
-        return create_from_edge_list(data, ntype, etype, ntype, urange, vrange, validate)
+        return create_from_edge_list(
+            data, ntype, etype, ntype, urange, vrange, validate,
+            restrict_format=restrict_format)
    elif isinstance(data, sp.sparse.spmatrix):
-        return create_from_scipy(data, ntype, etype, ntype)
+        return create_from_scipy(
+            data, ntype, etype, ntype, restrict_format=restrict_format)
    elif isinstance(data, nx.Graph):
-        return create_from_networkx(data, ntype, etype, **kwargs)
+        return create_from_networkx(
+            data, ntype, etype, restrict_format=restrict_format, **kwargs)
    else:
        raise DGLError('Unsupported graph data type:', type(data))
-def bipartite(data, utype='_U', etype='_E', vtype='_V', card=None, validate=True, **kwargs):
+def bipartite(data, utype='_U', etype='_E', vtype='_V', card=None, validate=True,
+              restrict_format='any', **kwargs):
    """Create a bipartite graph.
    The result graph is directed and edges must be from ``utype`` nodes
@@ -165,6 +177,8 @@ def bipartite(data, utype='_U', etype='_E', vtype='_V', card=None, validate=True
        If True, check if node ids are within cardinality, the check process may take
        some time. (Default: True)
        If False and card is not None, user would receive a warning.
+    restrict_format : 'any', 'coo', 'csr', 'csc', optional
+        Force the storage format.  Default: 'any' (i.e. let DGL decide what to use).
    kwargs : key-word arguments, optional
        Other key word arguments. Only comes into effect when we are using a NetworkX
        graph. It can consist of:
@@ -253,13 +267,19 @@ def bipartite(data, utype='_U', etype='_E', vtype='_V', card=None, validate=True
        urange, vrange = None, None
    if isinstance(data, tuple):
        u, v = data
-        return create_from_edges(u, v, utype, etype, vtype, urange, vrange, validate)
+        return create_from_edges(
+            u, v, utype, etype, vtype, urange, vrange, validate,
+            restrict_format=restrict_format)
    elif isinstance(data, list):
-        return create_from_edge_list(data, utype, etype, vtype, urange, vrange, validate)
+        return create_from_edge_list(
+            data, utype, etype, vtype, urange, vrange, validate,
+            restrict_format=restrict_format)
    elif isinstance(data, sp.sparse.spmatrix):
-        return create_from_scipy(data, utype, etype, vtype)
+        return create_from_scipy(
+            data, utype, etype, vtype, restrict_format=restrict_format)
    elif isinstance(data, nx.Graph):
-        return create_from_networkx_bipartite(data, utype, etype, vtype, **kwargs)
+        return create_from_networkx_bipartite(
+            data, utype, etype, vtype, restrict_format=restrict_format, **kwargs)
    else:
        raise DGLError('Unsupported graph data type:', type(data))
@@ -331,24 +351,29 @@ def hetero_from_relations(rel_graphs):
    # TODO(minjie): this API can be generalized as a union operation of the input graphs
    # TODO(minjie): handle node/edge data
    # infer meta graph
-    ntype_dict = {}  # ntype -> ntid
+    ntype_set = set()
    meta_edges = []
    ntypes = []
    etypes = []
+    # TODO(BarclayII): I'm keeping the node type names sorted because even if
+    # the metagraph is the same, the same node type name in different graphs may
+    # map to different node type IDs.
+    # In the future, we need to lower the type names into C++.
    for rgrh in rel_graphs:
        assert len(rgrh.etypes) == 1
        stype, etype, dtype = rgrh.canonical_etypes[0]
-        if stype not in ntype_dict:
+        ntype_set.add(stype)
-            ntype_dict[stype] = len(ntypes)
+        ntype_set.add(dtype)
-            ntypes.append(stype)
+    ntypes = list(sorted(ntype_set))
+    ntype_dict = {ntype: i for i, ntype in enumerate(ntypes)}
+    for rgrh in rel_graphs:
+        stype, etype, dtype = rgrh.canonical_etypes[0]
        stid = ntype_dict[stype]
-        if dtype not in ntype_dict:
-            ntype_dict[dtype] = len(ntypes)
-            ntypes.append(dtype)
        dtid = ntype_dict[dtype]
        meta_edges.append((stid, dtid))
        etypes.append(etype)
    metagraph = graph_index.from_edge_list(meta_edges, True, True)
    # create graph index
    hgidx = heterograph_index.create_heterograph_from_relations(
        metagraph, [rgrh._graph for rgrh in rel_graphs])
@@ -699,11 +724,104 @@ def to_homo(G):
    return retg
+def compact_graphs(graphs):
+    """Given a list of graphs with the same set of nodes, find and eliminate the common
+    isolated nodes across all graphs.
+    This function requires the graphs to have the same set of nodes (i.e. the node types
+    must be the same, and the number of nodes of each node type must be the same).  The
+    metagraph does not have to be the same.
+    It finds all the nodes that have zero in-degree and zero out-degree in all the given
+    graphs, and eliminates them from all the graphs.
+    Useful for graph sampling where we have a giant graph but we only wish to perform
+    message passing on a smaller graph with a (tiny) subset of nodes.
+    The node and edge features are not preserved.
+    Parameters
+    ----------
+    graphs : DGLHeteroGraph or list[DGLHeteroGraph]
+        The graph, or list of graphs
+    Returns
+    -------
+    DGLHeteroGraph or list[DGLHeteroGraph]
+        The compacted graph or list of compacted graphs.
+        Each returned graph would have a feature ``dgl.NID`` containing the mapping
+        of node IDs for each type from the compacted graph(s) to the original graph(s).
+        Note that the mapping is the same for all the compacted graphs.
+    Examples
+    --------
+    The following code constructs a bipartite graph with 20 users and 10 games, but
+    only user #1 and #3, as well as game #3 and #5, have connections:
+    >>> g = dgl.bipartite([(1, 3), (3, 5)], 'user', 'plays', 'game', card=(20, 10))
+    The following would compact the graph above to another bipartite graph with only
+    two users and two games.
+    >>> new_g, induced_nodes = dgl.compact_graphs(g)
+    >>> induced_nodes
+    {'user': tensor([1, 3]), 'game': tensor([3, 5])}
+    The mapping tells us that only user #1 and #3 as well as game #3 and #5 are kept.
+    Furthermore, the first user and second user in the compacted graph maps to
+    user #1 and #3 in the original graph.  Games are similar.
+    One can verify that the edge connections are kept the same in the compacted graph.
+    >>> new_g.edges(form='all', order='eid', etype='plays')
+    (tensor([0, 1]), tensor([0, 1]), tensor([0, 1]))
+    When compacting multiple graphs, nodes that do not have any connections in any
+    of the given graphs are removed.  So if we compact ``g`` and the following ``g2``
+    graphs together:
+    >>> g2 = dgl.bipartite([(1, 6), (6, 8)], 'user', 'plays', 'game', card=(20, 10))
+    >>> (new_g, new_g2), induced_nodes = dgl.compact_graphs([g, g2])
+    >>> induced_nodes
+    {'user': tensor([1, 3, 6]), 'game': tensor([3, 5, 6, 8])}
+    Then one can see that user #1 from both graphs, users #3 from the first graph, as
+    well as user #6 from the second graph, are kept.  Games are similar.
+    Similarly, one can also verify the connections:
+    >>> new_g.edges(form='all', order='eid', etype='plays')
+    (tensor([0, 1]), tensor([0, 1]), tensor([0, 1]))
+    >>> new_g2.edges(form='all', order='eid', etype='plays')
+    (tensor([0, 2]), tensor([2, 3]), tensor([0, 1]))
+    """
+    return_single = False
+    if not isinstance(graphs, Iterable):
+        graphs = [graphs]
+        return_single = True
+    new_graph_indexes, induced_nodes = heterograph_index.compact_graph_indexes(
+        [g._graph for g in graphs])
+    new_graphs = [
+        DGLHeteroGraph(new_graph_index, graph.ntypes, graph.etypes)
+        for new_graph_index, graph in zip(new_graph_indexes, graphs)]
+    for g in new_graphs:
+        for i, ntype in enumerate(graphs[0].ntypes):
+            g.nodes[ntype].data[NID] = induced_nodes[i]
+    if return_single:
+        new_graphs = new_graphs[0]
+    return new_graphs
 ############################################################
 # Internal APIs
 ############################################################
-def create_from_edges(u, v, utype, etype, vtype, urange=None, vrange=None, validate=True):
+def create_from_edges(u, v, utype, etype, vtype, urange=None, vrange=None, validate=True,
+                      restrict_format="any"):
    """Internal function to create a graph from incident nodes with types.
    utype could be equal to vtype
@@ -728,6 +846,8 @@ def create_from_edges(u, v, utype, etype, vtype, urange=None, vrange=None, valid
        maximum of the destination node IDs in the edge list plus 1. (Default: None)
    validate : bool, optional
        If True, checks if node IDs are within range.
+    restrict_format : 'any', 'coo', 'csr', 'csc', optional
+        Force the storage format.  Default: 'any' (i.e. let DGL decide what to use).
    Returns
    -------
@@ -755,13 +875,16 @@ def create_from_edges(u, v, utype, etype, vtype, urange=None, vrange=None, valid
        num_ntypes = 1
    else:
        num_ntypes = 2
-    hgidx = heterograph_index.create_unitgraph_from_coo(num_ntypes, urange, vrange, u, v)
+    hgidx = heterograph_index.create_unitgraph_from_coo(
+        num_ntypes, urange, vrange, u, v, restrict_format)
    if utype == vtype:
        return DGLHeteroGraph(hgidx, [utype], [etype])
    else:
        return DGLHeteroGraph(hgidx, [utype, vtype], [etype])
-def create_from_edge_list(elist, utype, etype, vtype, urange=None, vrange=None, validate=True):
+def create_from_edge_list(elist, utype, etype, vtype, urange=None, vrange=None,
+                          validate=True, restrict_format='any'):
    """Internal function to create a heterograph from a list of edge tuples with types.
    utype could be equal to vtype
@@ -784,7 +907,8 @@ def create_from_edge_list(elist, utype, etype, vtype, urange=None, vrange=None,
        maximum of the destination node IDs in the edge list plus 1. (Default: None)
    validate : bool, optional
        If True, checks if node IDs are within range.
+    restrict_format : 'any', 'coo', 'csr', 'csc', optional
+        Force the storage format.  Default: 'any' (i.e. let DGL decide what to use).
    Returns
    -------
@@ -796,9 +920,11 @@ def create_from_edge_list(elist, utype, etype, vtype, urange=None, vrange=None,
        u, v = zip(*elist)
        u = list(u)
        v = list(v)
-    return create_from_edges(u, v, utype, etype, vtype, urange, vrange, validate)
+    return create_from_edges(
+        u, v, utype, etype, vtype, urange, vrange, validate, restrict_format)
-def create_from_scipy(spmat, utype, etype, vtype, with_edge_id=False):
+def create_from_scipy(spmat, utype, etype, vtype, with_edge_id=False,
+                      restrict_format='any'):
    """Internal function to create a heterograph from a scipy sparse matrix with types.
    Parameters
@@ -818,7 +944,8 @@ def create_from_scipy(spmat, utype, etype, vtype, with_edge_id=False):
        (source, destination) order.
    validate : bool, optional
        If True, checks if node IDs are within range.
+    restrict_format : 'any', 'coo', 'csr', 'csc', optional
+        Force the storage format.  Default: 'any' (i.e. let DGL decide what to use).
    Returns
    -------
@@ -830,7 +957,7 @@ def create_from_scipy(spmat, utype, etype, vtype, with_edge_id=False):
        row = utils.toindex(spmat.row)
        col = utils.toindex(spmat.col)
        hgidx = heterograph_index.create_unitgraph_from_coo(
-            num_ntypes, num_src, num_dst, row, col)
+            num_ntypes, num_src, num_dst, row, col, restrict_format)
    else:
        spmat = spmat.tocsr()
        indptr = utils.toindex(spmat.indptr)
@@ -838,7 +965,7 @@ def create_from_scipy(spmat, utype, etype, vtype, with_edge_id=False):
        # TODO(minjie): with_edge_id is only reasonable for csr matrix. How to fix?
        data = utils.toindex(spmat.data if with_edge_id else list(range(len(indices))))
        hgidx = heterograph_index.create_unitgraph_from_csr(
-            num_ntypes, num_src, num_dst, indptr, indices, data)
+            num_ntypes, num_src, num_dst, indptr, indices, data, restrict_format)
    if num_ntypes == 1:
        return DGLHeteroGraph(hgidx, [utype], [etype])
    else:
@@ -848,7 +975,8 @@ def create_from_networkx(nx_graph,
                         ntype, etype,
                         edge_id_attr_name='id',
                         node_attrs=None,
-                         edge_attrs=None):
+                         edge_attrs=None,
+                         restrict_format='any'):
    """Create a heterograph that has only one set of nodes and edges.
    Parameters
@@ -865,6 +993,8 @@ def create_from_networkx(nx_graph,
        Names for node features to retrieve from the NetworkX graph (Default: None)
    edge_attrs : list of str
        Names for edge features to retrieve from the NetworkX graph (Default: None)
+    restrict_format : 'any', 'coo', 'csr', 'csc', optional
+        Force the storage format.  Default: 'any' (i.e. let DGL decide what to use).
    Returns
    -------
@@ -899,7 +1029,8 @@ def create_from_networkx(nx_graph,
    src = utils.toindex(src)
    dst = utils.toindex(dst)
    num_nodes = nx_graph.number_of_nodes()
-    g = create_from_edges(src, dst, ntype, etype, ntype, num_nodes, num_nodes, validate=False)
+    g = create_from_edges(src, dst, ntype, etype, ntype, num_nodes, num_nodes,
+                          validate=False, restrict_format=restrict_format)
    # handle features
    # copy attributes
@@ -950,7 +1081,8 @@ def create_from_networkx_bipartite(nx_graph,
                                   utype, etype, vtype,
                                   edge_id_attr_name='id',
                                   node_attrs=None,
-                                   edge_attrs=None):
+                                   edge_attrs=None,
+                                   restrict_format='any'):
    """Create a heterograph that has one set of source nodes, one set of
    destination nodes and one set of edges.
@@ -974,6 +1106,8 @@ def create_from_networkx_bipartite(nx_graph,
        Names for node features to retrieve from the NetworkX graph (Default: None)
    edge_attrs : list of str
        Names for edge features to retrieve from the NetworkX graph (Default: None)
+    restrict_format : 'any', 'coo', 'csr', 'csc', optional
+        Force the storage format.  Default: 'any' (i.e. let DGL decide what to use).
    Returns
    -------
@@ -1013,7 +1147,7 @@ def create_from_networkx_bipartite(nx_graph,
    dst = utils.toindex(dst)
    g = create_from_edges(
        src, dst, utype, etype, vtype,
-        len(top_nodes), len(bottom_nodes), validate=False)
+        len(top_nodes), len(bottom_nodes), validate=False, restrict_format=restrict_format)
    # TODO attributes
    assert node_attrs is None, 'Retrieval of node attributes are not supported yet.'

--- a/python/dgl/heterograph_index.py
+++ b/python/dgl/heterograph_index.py
@@ -43,7 +43,7 @@ class HeteroGraphIndex(ObjectBase):
            num_dst = number_of_nodes[dst_ntype]
            src_id, dst_id, _ = edges_per_type
            rel_graphs.append(create_unitgraph_from_coo(
-                1 if src_ntype == dst_ntype else 2, num_src, num_dst, src_id, dst_id))
+                1 if src_ntype == dst_ntype else 2, num_src, num_dst, src_id, dst_id, 'any'))
        self.__init_handle_by_constructor__(
            _CAPI_DGLHeteroCreateHeteroGraph, metagraph, rel_graphs)
@@ -957,7 +957,8 @@ class HeteroSubgraphIndex(ObjectBase):
 # Creators
 #################################################################
-def create_unitgraph_from_coo(num_ntypes, num_src, num_dst, row, col):
+def create_unitgraph_from_coo(num_ntypes, num_src, num_dst, row, col,
+                              restrict_format):
    """Create a unitgraph graph index from COO format
    Parameters
@@ -972,15 +973,19 @@ def create_unitgraph_from_coo(num_ntypes, num_src, num_dst, row, col):
        Row index.
    col : utils.Index
        Col index.
+    restrict_format : "any", "coo", "csr" or "csc"
+        Restrict the storage format of the unit graph.
    Returns
    -------
    HeteroGraphIndex
    """
    return _CAPI_DGLHeteroCreateUnitGraphFromCOO(
-        int(num_ntypes), int(num_src), int(num_dst), row.todgltensor(), col.todgltensor())
+        int(num_ntypes), int(num_src), int(num_dst), row.todgltensor(), col.todgltensor(),
+        restrict_format)
-def create_unitgraph_from_csr(num_ntypes, num_src, num_dst, indptr, indices, edge_ids):
+def create_unitgraph_from_csr(num_ntypes, num_src, num_dst, indptr, indices, edge_ids,
+                              restrict_format):
    """Create a unitgraph graph index from CSR format
    Parameters
@@ -997,6 +1002,8 @@ def create_unitgraph_from_csr(num_ntypes, num_src, num_dst, indptr, indices, edg
        CSR indices.
    edge_ids : utils.Index
        Edge shuffle id.
+    restrict_format : "any", "coo", "csr" or "csc"
+        Restrict the storage format of the unit graph.
    Returns
    -------
@@ -1004,7 +1011,8 @@ def create_unitgraph_from_csr(num_ntypes, num_src, num_dst, indptr, indices, edg
    """
    return _CAPI_DGLHeteroCreateUnitGraphFromCSR(
        int(num_ntypes), int(num_src), int(num_dst),
-        indptr.todgltensor(), indices.todgltensor(), edge_ids.todgltensor())
+        indptr.todgltensor(), indices.todgltensor(), edge_ids.todgltensor(),
+        restrict_format)
 def create_heterograph_from_relations(metagraph, rel_graphs):
    """Create a heterograph from metagraph and graphs of every relation.
@@ -1061,6 +1069,31 @@ def disjoint_partition(graph, bnn_all_types, bne_all_types):
    return _CAPI_DGLHeteroDisjointPartitionBySizes(
        graph, bnn_all_types.todgltensor(), bne_all_types.todgltensor())
+def compact_graph_indexes(graphs):
+    """Given a list of graphs, remove the common nodes that do not have inbound and
+    outbound edges.
+    The graphs should have identical node space (i.e. should have the same set of
+    nodes, including types and IDs) and metagraph.
+    Parameters
+    ----------
+    graph : list[HeteroGraphIndex]
+        List of heterographs.
+    Returns
+    -------
+    list[HeteroGraphIndex]
+        A list of compacted heterographs.
+        The returned heterographs also have the same metagraph, which is identical
+        to the original heterographs.
+        The returned heterographs also have identical node space.
+    list[Tensor]
+        The induced node IDs of each node type.
+    """
+    new_graphs, induced_nodes = _CAPI_DGLCompactGraphs(graphs)
+    return new_graphs, [F.zerocopy_from_dgl_ndarray(nodes.data) for nodes in induced_nodes]
 @register_object("graph.FlattenedHeteroGraph")
 class FlattenedHeteroGraph(ObjectBase):
    """FlattenedHeteroGraph object class in C++ backend."""

--- a/python/dgl/runtime/spmv.py
+++ b/python/dgl/runtime/spmv.py
@@ -168,7 +168,7 @@ def build_gidx_and_mapping_uv(edge_tuples, num_src, num_dst):
        Number of ints needed to represent the graph
    """
    u, v, eid = edge_tuples
-    gidx = create_unitgraph_from_coo(2, num_src, num_dst, u, v)
+    gidx = create_unitgraph_from_coo(2, num_src, num_dst, u, v, 'any')
    forward, backward = gidx.get_csr_shuffle_order(0)
    eid = eid.tousertensor()
    nbits = gidx.bits_needed(0)

--- a/src/array/array.cc
+++ b/src/array/array.cc
@@ -411,6 +411,22 @@ void CSRSort(CSRMatrix csr) {
 ///////////////////////// COO routines //////////////////////////
+bool COOIsNonZero(COOMatrix coo, int64_t row, int64_t col) {
+  bool ret = false;
+  ATEN_COO_IDX_SWITCH(coo, XPU, IdType, {
+    ret = impl::COOIsNonZero<XPU, IdType>(coo, row, col);
+  });
+  return ret;
+}
+NDArray COOIsNonZero(COOMatrix coo, NDArray row, NDArray col) {
+  NDArray ret;
+  ATEN_COO_IDX_SWITCH(coo, XPU, IdType, {
+    ret = impl::COOIsNonZero<XPU, IdType>(coo, row, col);
+  });
+  return ret;
+}
 bool COOHasDuplicate(COOMatrix coo) {
  bool ret = false;
  ATEN_COO_IDX_SWITCH(coo, XPU, IdType, {
@@ -419,6 +435,55 @@ bool COOHasDuplicate(COOMatrix coo) {
  return ret;
 }
+int64_t COOGetRowNNZ(COOMatrix coo, int64_t row) {
+  int64_t ret = 0;
+  ATEN_COO_IDX_SWITCH(coo, XPU, IdType, {
+    ret = impl::COOGetRowNNZ<XPU, IdType>(coo, row);
+  });
+  return ret;
+}
+NDArray COOGetRowNNZ(COOMatrix coo, NDArray row) {
+  NDArray ret;
+  ATEN_COO_IDX_SWITCH(coo, XPU, IdType, {
+    ret = impl::COOGetRowNNZ<XPU, IdType>(coo, row);
+  });
+  return ret;
+}
+std::pair<NDArray, NDArray> COOGetRowDataAndIndices(COOMatrix coo, int64_t row) {
+  std::pair<NDArray, NDArray> ret;
+  ATEN_COO_SWITCH(coo, XPU, IdType, DType, {
+    ret = impl::COOGetRowDataAndIndices<XPU, IdType, DType>(coo, row);
+  });
+  return ret;
+}
+NDArray COOGetData(COOMatrix coo, int64_t row, int64_t col) {
+  NDArray ret;
+  ATEN_COO_SWITCH(coo, XPU, IdType, DType, {
+    ret = impl::COOGetData<XPU, IdType, DType>(coo, row, col);
+  });
+  return ret;
+}
+std::vector<NDArray> COOGetDataAndIndices(
+    COOMatrix coo, NDArray rows, NDArray cols) {
+  std::vector<NDArray> ret;
+  ATEN_COO_SWITCH(coo, XPU, IdType, DType, {
+    ret = impl::COOGetDataAndIndices<XPU, IdType, DType>(coo, rows, cols);
+  });
+  return ret;
+}
+COOMatrix COOTranspose(COOMatrix coo) {
+  COOMatrix ret;
+  ATEN_COO_SWITCH(coo, XPU, IdType, DType, {
+    ret = impl::COOTranspose<XPU, IdType, DType>(coo);
+  });
+  return ret;
+}
 CSRMatrix COOToCSR(COOMatrix coo) {
  CSRMatrix ret;
  ATEN_COO_SWITCH(coo, XPU, IdType, DType, {
@@ -427,5 +492,29 @@ CSRMatrix COOToCSR(COOMatrix coo) {
  return ret;
 }
+COOMatrix COOSliceRows(COOMatrix coo, int64_t start, int64_t end) {
+  COOMatrix ret;
+  ATEN_COO_SWITCH(coo, XPU, IdType, DType, {
+    ret = impl::COOSliceRows<XPU, IdType, DType>(coo, start, end);
+  });
+  return ret;
+}
+COOMatrix COOSliceRows(COOMatrix coo, NDArray rows) {
+  COOMatrix ret;
+  ATEN_COO_SWITCH(coo, XPU, IdType, DType, {
+    ret = impl::COOSliceRows<XPU, IdType, DType>(coo, rows);
+  });
+  return ret;
+}
+COOMatrix COOSliceMatrix(COOMatrix coo, NDArray rows, NDArray cols) {
+  COOMatrix ret;
+  ATEN_COO_SWITCH(coo, XPU, IdType, DType, {
+    ret = impl::COOSliceMatrix<XPU, IdType, DType>(coo, rows, cols);
+  });
+  return ret;
+}
 }  // namespace aten
 }  // namespace dgl
--- a/src/array/array_op.h
+++ b/src/array/array_op.h
@@ -107,12 +107,48 @@ CSRMatrix CSRSliceMatrix(CSRMatrix csr, runtime::NDArray rows, runtime::NDArray
 template <DLDeviceType XPU, typename IdType, typename DType>
 void CSRSort(CSRMatrix csr);
+template <DLDeviceType XPU, typename IdType>
+bool COOIsNonZero(COOMatrix coo, int64_t row, int64_t col);
+template <DLDeviceType XPU, typename IdType>
+runtime::NDArray COOIsNonZero(COOMatrix coo, runtime::NDArray row, runtime::NDArray col);
 template <DLDeviceType XPU, typename IdType>
 bool COOHasDuplicate(COOMatrix coo);
+template <DLDeviceType XPU, typename IdType>
+int64_t COOGetRowNNZ(COOMatrix coo, int64_t row);
+template <DLDeviceType XPU, typename IdType>
+runtime::NDArray COOGetRowNNZ(COOMatrix coo, runtime::NDArray row);
+template <DLDeviceType XPU, typename IdType, typename DType>
+std::pair<runtime::NDArray, runtime::NDArray>
+COOGetRowDataAndIndices(COOMatrix coo, int64_t row);
+template <DLDeviceType XPU, typename IdType, typename DType>
+runtime::NDArray COOGetData(COOMatrix coo, int64_t row, int64_t col);
+template <DLDeviceType XPU, typename IdType, typename DType>
+std::vector<runtime::NDArray> COOGetDataAndIndices(
+    COOMatrix coo, runtime::NDArray rows, runtime::NDArray cols);
+template <DLDeviceType XPU, typename IdType, typename DType>
+COOMatrix COOTranspose(COOMatrix coo);
 template <DLDeviceType XPU, typename IdType, typename DType>
 CSRMatrix COOToCSR(COOMatrix coo);
+template <DLDeviceType XPU, typename IdType, typename DType>
+COOMatrix COOSliceRows(COOMatrix coo, int64_t start, int64_t end);
+template <DLDeviceType XPU, typename IdType, typename DType>
+COOMatrix COOSliceRows(COOMatrix coo, runtime::NDArray rows);
+template <DLDeviceType XPU, typename IdType, typename DType>
+COOMatrix COOSliceMatrix(COOMatrix coo, runtime::NDArray rows, runtime::NDArray cols);
 }  // namespace impl
 }  // namespace aten
 }  // namespace dgl

--- a/src/array/cpu/array_utils.h
+++ b/src/array/cpu/array_utils.h
+/*!
+ *  Copyright (c) 2019 by Contributors
+ * \file dgl/array_utils.h
+ * \brief Utility classes and functions for DGL arrays.
+ */
+#ifndef DGL_ARRAY_CPU_ARRAY_UTILS_H_
+#define DGL_ARRAY_CPU_ARRAY_UTILS_H_
+#include <dgl/array.h>
+#include <vector>
+#include <unordered_map>
+#include <utility>
+namespace dgl {
+namespace aten {
+/*!
+ * \brief A hashmap that maps each ids in the given array to new ids starting from zero.
+ *
+ * Useful for relabeling integers and finding unique integers.
+ *
+ * Usually faster than std::unordered_map in existence checking.
+ */
+template <typename IdType>
+class IdHashMap {
+ public:
+  // default ctor
+  IdHashMap(): filter_(kFilterSize, false) {}
+  // Construct the hashmap using the given id array.
+  // The id array could contain duplicates.
+  explicit IdHashMap(IdArray ids): filter_(kFilterSize, false) {
+    Update(ids);
+  }
+  // Update the hashmap with given id array.
+  // The id array could contain duplicates.
+  void Update(IdArray ids) {
+    const IdType* ids_data = static_cast<IdType*>(ids->data);
+    const int64_t len = ids->shape[0];
+    IdType newid = oldv2newv_.size();
+    for (int64_t i = 0; i < len; ++i) {
+      const IdType id = ids_data[i];
+      if (!Contains(id)) {
+        oldv2newv_[id] = newid++;
+        filter_[id & kFilterMask] = true;
+      }
+    }
+  }
+  // Return true if the given id is contained in this hashmap.
+  bool Contains(IdType id) const {
+    return filter_[id & kFilterMask] && oldv2newv_.count(id);
+  }
+  // Return the new id of the given id. If the given id is not contained
+  // in the hash map, returns the default_val instead.
+  IdType Map(IdType id, IdType default_val) const {
+    if (filter_[id & kFilterMask]) {
+      auto it = oldv2newv_.find(id);
+      return (it == oldv2newv_.end()) ? default_val : it->second;
+    } else {
+      return default_val;
+    }
+  }
+  // Return the new id of each id in the given array.
+  IdArray Map(IdArray ids, IdType default_val) const {
+    const IdType* ids_data = static_cast<IdType*>(ids->data);
+    const int64_t len = ids->shape[0];
+    IdArray values = NewIdArray(len, ids->ctx, ids->dtype.bits);
+    IdType* values_data = static_cast<IdType*>(values->data);
+    for (int64_t i = 0; i < len; ++i)
+      values_data[i] = Map(ids_data[i], default_val);
+    return values;
+  }
+  // Return all the old ids collected so far, ordered by new id.
+  IdArray Values() const {
+    IdArray values = NewIdArray(oldv2newv_.size(), DLContext{kDLCPU, 0}, sizeof(IdType) * 8);
+    IdType* values_data = static_cast<IdType*>(values->data);
+    for (auto pair : oldv2newv_)
+      values_data[pair.second] = pair.first;
+    return values;
+  }
+ private:
+  static constexpr int32_t kFilterMask = 0xFFFFFF;
+  static constexpr int32_t kFilterSize = kFilterMask + 1;
+  // This bitmap is used as a bloom filter to remove some lookups.
+  // Hashtable is very slow. Using bloom filter can significantly speed up lookups.
+  std::vector<bool> filter_;
+  // The hashmap from old vid to new vid
+  std::unordered_map<IdType, IdType> oldv2newv_;
+};
+/*
+ * \brief Hash type for building maps/sets with pairs as keys.
+ */
+struct PairHash {
+  template <class T1, class T2>
+  std::size_t operator() (const std::pair<T1, T2>& pair) const {
+    return std::hash<T1>()(pair.first) ^ std::hash<T2>()(pair.second);
+  }
+};
+};  // namespace aten
+};  // namespace dgl
+#endif  // DGL_ARRAY_CPU_ARRAY_UTILS_H_
--- a/src/array/cpu/spmat_op_impl.cc
+++ b/src/array/cpu/spmat_op_impl.cc
@@ -6,6 +6,7 @@
 #include <dgl/array.h>
 #include <vector>
 #include <unordered_set>
+#include "array_utils.h"
 namespace dgl {
@@ -13,69 +14,6 @@ using runtime::NDArray;
 namespace aten {
 namespace impl {
-namespace {
-/*!
- * \brief A hashmap that maps each ids in the given array to new ids starting from zero.
- */
-template <typename IdType>
-class IdHashMap {
- public:
-  // Construct the hashmap using the given id arrays.
-  // The id array could contain duplicates.
-  explicit IdHashMap(IdArray ids): filter_(kFilterSize, false) {
-    const IdType* ids_data = static_cast<IdType*>(ids->data);
-    const int64_t len = ids->shape[0];
-    IdType newid = 0;
-    for (int64_t i = 0; i < len; ++i) {
-      const IdType id = ids_data[i];
-      if (!Contains(id)) {
-        oldv2newv_[id] = newid++;
-        filter_[id & kFilterMask] = true;
-      }
-    }
-  }
-  // Return true if the given id is contained in this hashmap.
-  bool Contains(IdType id) const {
-    return filter_[id & kFilterMask] && oldv2newv_.count(id);
-  }
-  // Return the new id of the given id. If the given id is not contained
-  // in the hash map, returns the default_val instead.
-  IdType Map(IdType id, IdType default_val) const {
-    if (filter_[id & kFilterMask]) {
-      auto it = oldv2newv_.find(id);
-      return (it == oldv2newv_.end()) ? default_val : it->second;
-    } else {
-      return default_val;
-    }
-  }
- private:
-  static constexpr int32_t kFilterMask = 0xFFFFFF;
-  static constexpr int32_t kFilterSize = kFilterMask + 1;
-  // This bitmap is used as a bloom filter to remove some lookups.
-  // Hashtable is very slow. Using bloom filter can significantly speed up lookups.
-  std::vector<bool> filter_;
-  // The hashmap from old vid to new vid
-  std::unordered_map<IdType, IdType> oldv2newv_;
-};
-struct PairHash {
-  template <class T1, class T2>
-  std::size_t operator() (const std::pair<T1, T2>& pair) const {
-    return std::hash<T1>()(pair.first) ^ std::hash<T2>()(pair.second);
-  }
-};
-inline bool CSRHasData(CSRMatrix csr) {
-  return csr.data.defined();
-}
-inline bool COOHasData(COOMatrix csr) {
-  return csr.data.defined();
-}
-}  // namespace
 ///////////////////////////// CSRIsNonZero /////////////////////////////
@@ -649,91 +587,6 @@ void CSRSort(CSRMatrix csr) {
 template void CSRSort<kDLCPU, int64_t, int64_t>(CSRMatrix csr);
 template void CSRSort<kDLCPU, int32_t, int32_t>(CSRMatrix csr);
-///////////////////////////// COOHasDuplicate /////////////////////////////
-template <DLDeviceType XPU, typename IdType>
-bool COOHasDuplicate(COOMatrix coo) {
-  std::unordered_set<std::pair<IdType, IdType>, PairHash> hashmap;
-  const IdType* src_data = static_cast<IdType*>(coo.row->data);
-  const IdType* dst_data = static_cast<IdType*>(coo.col->data);
-  const auto nnz = coo.row->shape[0];
-  for (IdType eid = 0; eid < nnz; ++eid) {
-    const auto& p = std::make_pair(src_data[eid], dst_data[eid]);
-    if (hashmap.count(p)) {
-      return true;
-    } else {
-      hashmap.insert(p);
-    }
-  }
-  return false;
-}
-template bool COOHasDuplicate<kDLCPU, int32_t>(COOMatrix coo);
-template bool COOHasDuplicate<kDLCPU, int64_t>(COOMatrix coo);
-///////////////////////////// COOToCSR /////////////////////////////
-// complexity: time O(NNZ), space O(1)
-template <DLDeviceType XPU, typename IdType, typename DType>
-CSRMatrix COOToCSR(COOMatrix coo) {
-  const int64_t N = coo.num_rows;
-  const int64_t NNZ = coo.row->shape[0];
-  const IdType* row_data = static_cast<IdType*>(coo.row->data);
-  const IdType* col_data = static_cast<IdType*>(coo.col->data);
-  NDArray ret_indptr = NDArray::Empty({N + 1}, coo.row->dtype, coo.row->ctx);
-  NDArray ret_indices = NDArray::Empty({NNZ}, coo.row->dtype, coo.row->ctx);
-  NDArray ret_data;
-  if (COOHasData(coo)) {
-    ret_data = NDArray::Empty({NNZ}, coo.data->dtype, coo.data->ctx);
-  } else {
-    // if no data array in the input coo, the return data array is a shuffle index.
-    ret_data = NDArray::Empty({NNZ}, coo.row->dtype, coo.row->ctx);
-  }
-  IdType* Bp = static_cast<IdType*>(ret_indptr->data);
-  IdType* Bi = static_cast<IdType*>(ret_indices->data);
-  std::fill(Bp, Bp + N, 0);
-  for (int64_t i = 0; i < NNZ; ++i) {
-    Bp[row_data[i]]++;
-  }
-  // cumsum
-  for (int64_t i = 0, cumsum = 0; i < N; ++i) {
-    const IdType temp = Bp[i];
-    Bp[i] = cumsum;
-    cumsum += temp;
-  }
-  Bp[N] = NNZ;
-  for (int64_t i = 0; i < NNZ; ++i) {
-    const IdType r = row_data[i];
-    Bi[Bp[r]] = col_data[i];
-    if (COOHasData(coo)) {
-      const DType* data = static_cast<DType*>(coo.data->data);
-      DType* Bx = static_cast<DType*>(ret_data->data);
-      Bx[Bp[r]] = data[i];
-    } else {
-      IdType* Bx = static_cast<IdType*>(ret_data->data);
-      Bx[Bp[r]] = i;
-    }
-    Bp[r]++;
-  }
-  // correct the indptr
-  for (int64_t i = 0, last = 0; i <= N; ++i) {
-    IdType temp = Bp[i];
-    Bp[i] = last;
-    last = temp;
-  }
-  return CSRMatrix{coo.num_rows, coo.num_cols, ret_indptr, ret_indices, ret_data};
-}
-template CSRMatrix COOToCSR<kDLCPU, int32_t, int32_t>(COOMatrix coo);
-template CSRMatrix COOToCSR<kDLCPU, int64_t, int64_t>(COOMatrix coo);
 }  // namespace impl
 }  // namespace aten
 }  // namespace dgl
--- a/src/array/cpu/spmat_op_impl_coo.cc
+++ b/src/array/cpu/spmat_op_impl_coo.cc
+/*!
+ *  Copyright (c) 2019 by Contributors
+ * \file array/cpu/spmat_op_impl.cc
+ * \brief CPU implementation of COO sparse matrix operators
+ */
+#include <dgl/array.h>
+#include <vector>
+#include <unordered_set>
+#include <unordered_map>
+#include "array_utils.h"
+namespace dgl {
+using runtime::NDArray;
+namespace aten {
+namespace impl {
+/*
+ * TODO(BarclayII):
+ * For row-major sorted COOs, we have faster implementation with binary search,
+ * sorted search, etc.  Later we should benchmark how much we can gain with
+ * sorted COOs on hypersparse graphs.
+ */
+///////////////////////////// COOIsNonZero /////////////////////////////
+template <DLDeviceType XPU, typename IdType>
+bool COOIsNonZero(COOMatrix coo, int64_t row, int64_t col) {
+  CHECK(row >= 0 && row < coo.num_rows) << "Invalid row index: " << row;
+  CHECK(col >= 0 && col < coo.num_cols) << "Invalid col index: " << col;
+  const IdType* coo_row_data = static_cast<IdType*>(coo.row->data);
+  const IdType* coo_col_data = static_cast<IdType*>(coo.col->data);
+  for (int64_t i = 0; i < coo.row->shape[0]; ++i) {
+    if (coo_row_data[i] == row && coo_col_data[i] == col)
+      return true;
+  }
+  return false;
+}
+template bool COOIsNonZero<kDLCPU, int32_t>(COOMatrix, int64_t, int64_t);
+template bool COOIsNonZero<kDLCPU, int64_t>(COOMatrix, int64_t, int64_t);
+template <DLDeviceType XPU, typename IdType>
+NDArray COOIsNonZero(COOMatrix coo, NDArray row, NDArray col) {
+  const auto rowlen = row->shape[0];
+  const auto collen = col->shape[0];
+  const auto rstlen = std::max(rowlen, collen);
+  NDArray rst = NDArray::Empty({rstlen}, row->dtype, row->ctx);
+  IdType* rst_data = static_cast<IdType*>(rst->data);
+  const IdType* row_data = static_cast<IdType*>(row->data);
+  const IdType* col_data = static_cast<IdType*>(col->data);
+  const int64_t row_stride = (rowlen == 1 && collen != 1) ? 0 : 1;
+  const int64_t col_stride = (collen == 1 && rowlen != 1) ? 0 : 1;
+  const int64_t kmax = std::max(rowlen, collen);
+#pragma omp parallel for
+  for (int64_t k = 0; k < kmax; ++k) {
+    int64_t i = row_stride * k;
+    int64_t j = col_stride * k;
+    rst_data[k] = COOIsNonZero<XPU, IdType>(coo, row_data[i], col_data[j])? 1 : 0;
+  }
+  return rst;
+}
+template NDArray COOIsNonZero<kDLCPU, int32_t>(COOMatrix, NDArray, NDArray);
+template NDArray COOIsNonZero<kDLCPU, int64_t>(COOMatrix, NDArray, NDArray);
+///////////////////////////// COOHasDuplicate /////////////////////////////
+template <DLDeviceType XPU, typename IdType>
+bool COOHasDuplicate(COOMatrix coo) {
+  std::unordered_set<std::pair<IdType, IdType>, PairHash> hashmap;
+  const IdType* src_data = static_cast<IdType*>(coo.row->data);
+  const IdType* dst_data = static_cast<IdType*>(coo.col->data);
+  const auto nnz = coo.row->shape[0];
+  for (IdType eid = 0; eid < nnz; ++eid) {
+    const auto& p = std::make_pair(src_data[eid], dst_data[eid]);
+    if (hashmap.count(p)) {
+      return true;
+    } else {
+      hashmap.insert(p);
+    }
+  }
+  return false;
+}
+template bool COOHasDuplicate<kDLCPU, int32_t>(COOMatrix coo);
+template bool COOHasDuplicate<kDLCPU, int64_t>(COOMatrix coo);
+///////////////////////////// COOGetRowNNZ /////////////////////////////
+template <DLDeviceType XPU, typename IdType>
+int64_t COOGetRowNNZ(COOMatrix coo, int64_t row) {
+  CHECK(row >= 0 && row < coo.num_rows) << "Invalid row index: " << row;
+  const IdType* coo_row_data = static_cast<IdType*>(coo.row->data);
+  int64_t result = 0;
+  for (int64_t i = 0; i < coo.row->shape[0]; ++i) {
+    if (coo_row_data[i] == row)
+      ++result;
+  }
+  return result;
+}
+template int64_t COOGetRowNNZ<kDLCPU, int32_t>(COOMatrix, int64_t);
+template int64_t COOGetRowNNZ<kDLCPU, int64_t>(COOMatrix, int64_t);
+template <DLDeviceType XPU, typename IdType>
+NDArray COOGetRowNNZ(COOMatrix coo, NDArray rows) {
+  const auto len = rows->shape[0];
+  const IdType* vid_data = static_cast<IdType*>(rows->data);
+  NDArray rst = NDArray::Empty({len}, rows->dtype, rows->ctx);
+  IdType* rst_data = static_cast<IdType*>(rst->data);
+#pragma omp parallel for
+  for (int64_t i = 0; i < len; ++i)
+    rst_data[i] = COOGetRowNNZ<XPU, IdType>(coo, vid_data[i]);
+  return rst;
+}
+template NDArray COOGetRowNNZ<kDLCPU, int32_t>(COOMatrix, NDArray);
+template NDArray COOGetRowNNZ<kDLCPU, int64_t>(COOMatrix, NDArray);
+///////////////////////////// COOGetRowDataAndIndices /////////////////////////////
+template <DLDeviceType XPU, typename IdType, typename DType>
+std::pair<NDArray, NDArray> COOGetRowDataAndIndices(
+    COOMatrix coo, int64_t row) {
+  CHECK(row >= 0 && row < coo.num_rows) << "Invalid row index: " << row;
+  const IdType* coo_row_data = static_cast<IdType*>(coo.row->data);
+  const IdType* coo_col_data = static_cast<IdType*>(coo.col->data);
+  const DType* coo_data = COOHasData(coo) ? static_cast<DType*>(coo.data->data) : nullptr;
+  std::vector<IdType> indices;
+  std::vector<DType> data;
+  for (int64_t i = 0; i < coo.row->shape[0]; ++i) {
+    if (coo_row_data[i] == row) {
+      indices.push_back(coo_col_data[i]);
+      data.push_back(coo_data ? coo_data[i] : i);
+    }
+  }
+  return std::make_pair(NDArray::FromVector(data), NDArray::FromVector(indices));
+}
+template std::pair<NDArray, NDArray>
+COOGetRowDataAndIndices<kDLCPU, int32_t, int32_t>(COOMatrix, int64_t);
+template std::pair<NDArray, NDArray>
+COOGetRowDataAndIndices<kDLCPU, int64_t, int64_t>(COOMatrix, int64_t);
+///////////////////////////// COOGetData /////////////////////////////
+template <DLDeviceType XPU, typename IdType, typename DType>
+NDArray COOGetData(COOMatrix coo, int64_t row, int64_t col) {
+  CHECK(row >= 0 && row < coo.num_rows) << "Invalid row index: " << row;
+  CHECK(col >= 0 && col < coo.num_cols) << "Invalid col index: " << col;
+  std::vector<DType> ret_vec;
+  const IdType* coo_row_data = static_cast<IdType*>(coo.row->data);
+  const IdType* coo_col_data = static_cast<IdType*>(coo.col->data);
+  const DType* data = COOHasData(coo) ? static_cast<DType*>(coo.data->data) : nullptr;
+  for (IdType i = 0; i < coo.row->shape[0]; ++i) {
+    if (coo_row_data[i] == row && coo_col_data[i] == col)
+      ret_vec.push_back(data ? data[i] : i);
+  }
+  return NDArray::FromVector(ret_vec);
+}
+template NDArray COOGetData<kDLCPU, int32_t, int32_t>(COOMatrix, int64_t, int64_t);
+template NDArray COOGetData<kDLCPU, int64_t, int64_t>(COOMatrix, int64_t, int64_t);
+///////////////////////////// COOGetDataAndIndices /////////////////////////////
+template <DLDeviceType XPU, typename IdType, typename DType>
+std::vector<NDArray> COOGetDataAndIndices(
+    COOMatrix coo, NDArray rows, NDArray cols) {
+  const int64_t rowlen = rows->shape[0];
+  const int64_t collen = cols->shape[0];
+  CHECK((rowlen == collen) || (rowlen == 1) || (collen == 1))
+    << "Invalid row and col id array.";
+  const int64_t row_stride = (rowlen == 1 && collen != 1) ? 0 : 1;
+  const int64_t col_stride = (collen == 1 && rowlen != 1) ? 0 : 1;
+  const IdType* row_data = static_cast<IdType*>(rows->data);
+  const IdType* col_data = static_cast<IdType*>(cols->data);
+  const IdType* coo_row_data = static_cast<IdType*>(coo.row->data);
+  const IdType* coo_col_data = static_cast<IdType*>(coo.col->data);
+  const DType* data = COOHasData(coo) ? static_cast<DType*>(coo.data->data) : nullptr;
+  std::vector<IdType> ret_rows, ret_cols;
+  std::vector<DType> ret_data;
+  for (int64_t i = 0, j = 0; i < rowlen && j < collen; i += row_stride, j += col_stride) {
+    const IdType row_id = row_data[i], col_id = col_data[j];
+    CHECK(row_id >= 0 && row_id < coo.num_rows) << "Invalid row index: " << row_id;
+    CHECK(col_id >= 0 && col_id < coo.num_cols) << "Invalid col index: " << col_id;
+    for (int64_t k = 0; k < coo.row->shape[0]; ++k) {
+      if (coo_row_data[k] == row_id && coo_col_data[k] == col_id) {
+        ret_rows.push_back(row_id);
+        ret_cols.push_back(col_id);
+        ret_data.push_back(data ? data[k] : k);
+      }
+    }
+  }
+  return {NDArray::FromVector(ret_rows),
+          NDArray::FromVector(ret_cols),
+          NDArray::FromVector(ret_data)};
+}
+template std::vector<NDArray> COOGetDataAndIndices<kDLCPU, int32_t, int32_t>(
+    COOMatrix coo, NDArray rows, NDArray cols);
+template std::vector<NDArray> COOGetDataAndIndices<kDLCPU, int64_t, int64_t>(
+    COOMatrix coo, NDArray rows, NDArray cols);
+///////////////////////////// COOTranspose /////////////////////////////
+template <DLDeviceType XPU, typename IdType, typename DType>
+COOMatrix COOTranspose(COOMatrix coo) {
+  return COOMatrix{coo.num_cols, coo.num_rows, coo.col, coo.row, coo.data};
+}
+template COOMatrix COOTranspose<kDLCPU, int32_t, int32_t>(COOMatrix coo);
+template COOMatrix COOTranspose<kDLCPU, int64_t, int64_t>(COOMatrix coo);
+///////////////////////////// COOToCSR /////////////////////////////
+// complexity: time O(NNZ), space O(1)
+template <DLDeviceType XPU, typename IdType, typename DType>
+CSRMatrix COOToCSR(COOMatrix coo) {
+  const int64_t N = coo.num_rows;
+  const int64_t NNZ = coo.row->shape[0];
+  const IdType* row_data = static_cast<IdType*>(coo.row->data);
+  const IdType* col_data = static_cast<IdType*>(coo.col->data);
+  NDArray ret_indptr = NDArray::Empty({N + 1}, coo.row->dtype, coo.row->ctx);
+  NDArray ret_indices = NDArray::Empty({NNZ}, coo.row->dtype, coo.row->ctx);
+  NDArray ret_data;
+  if (COOHasData(coo)) {
+    ret_data = NDArray::Empty({NNZ}, coo.data->dtype, coo.data->ctx);
+  } else {
+    // if no data array in the input coo, the return data array is a shuffle index.
+    ret_data = NDArray::Empty({NNZ}, coo.row->dtype, coo.row->ctx);
+  }
+  IdType* Bp = static_cast<IdType*>(ret_indptr->data);
+  IdType* Bi = static_cast<IdType*>(ret_indices->data);
+  std::fill(Bp, Bp + N, 0);
+  for (int64_t i = 0; i < NNZ; ++i) {
+    Bp[row_data[i]]++;
+  }
+  // cumsum
+  for (int64_t i = 0, cumsum = 0; i < N; ++i) {
+    const IdType temp = Bp[i];
+    Bp[i] = cumsum;
+    cumsum += temp;
+  }
+  Bp[N] = NNZ;
+  for (int64_t i = 0; i < NNZ; ++i) {
+    const IdType r = row_data[i];
+    Bi[Bp[r]] = col_data[i];
+    if (COOHasData(coo)) {
+      const DType* data = static_cast<DType*>(coo.data->data);
+      DType* Bx = static_cast<DType*>(ret_data->data);
+      Bx[Bp[r]] = data[i];
+    } else {
+      IdType* Bx = static_cast<IdType*>(ret_data->data);
+      Bx[Bp[r]] = i;
+    }
+    Bp[r]++;
+  }
+  // correct the indptr
+  for (int64_t i = 0, last = 0; i <= N; ++i) {
+    IdType temp = Bp[i];
+    Bp[i] = last;
+    last = temp;
+  }
+  return CSRMatrix{coo.num_rows, coo.num_cols, ret_indptr, ret_indices, ret_data};
+}
+template CSRMatrix COOToCSR<kDLCPU, int32_t, int32_t>(COOMatrix coo);
+template CSRMatrix COOToCSR<kDLCPU, int64_t, int64_t>(COOMatrix coo);
+///////////////////////////// COOSliceRows /////////////////////////////
+template <DLDeviceType XPU, typename IdType, typename DType>
+COOMatrix COOSliceRows(COOMatrix coo, int64_t start, int64_t end) {
+  CHECK(start >= 0 && start < coo.num_rows) << "Invalid start row " << start;
+  CHECK(end > 0 && end <= coo.num_rows) << "Invalid end row " << end;
+  const IdType* coo_row_data = static_cast<IdType*>(coo.row->data);
+  const IdType* coo_col_data = static_cast<IdType*>(coo.col->data);
+  const DType* coo_data = COOHasData(coo) ? static_cast<DType*>(coo.data->data) : nullptr;
+  std::vector<IdType> ret_row, ret_col;
+  std::vector<DType> ret_data;
+  for (int64_t i = 0; i < coo.row->shape[0]; ++i) {
+    const IdType row_id = coo_row_data[i];
+    const IdType col_id = coo_col_data[i];
+    if (row_id < end && row_id >= start) {
+      ret_row.push_back(row_id - start);
+      ret_col.push_back(col_id);
+      ret_data.push_back(coo_data ? coo_data[i] : i);
+    }
+  }
+  return COOMatrix{
+    end - start,
+    coo.num_cols,
+    NDArray::FromVector(ret_row),
+    NDArray::FromVector(ret_col),
+    NDArray::FromVector(ret_data)};
+}
+template COOMatrix COOSliceRows<kDLCPU, int32_t, int32_t>(COOMatrix, int64_t, int64_t);
+template COOMatrix COOSliceRows<kDLCPU, int64_t, int64_t>(COOMatrix, int64_t, int64_t);
+template <DLDeviceType XPU, typename IdType, typename DType>
+COOMatrix COOSliceRows(COOMatrix coo, NDArray rows) {
+  const IdType* coo_row_data = static_cast<IdType*>(coo.row->data);
+  const IdType* coo_col_data = static_cast<IdType*>(coo.col->data);
+  const DType* coo_data = COOHasData(coo) ? static_cast<DType*>(coo.data->data) : nullptr;
+  std::vector<IdType> ret_row, ret_col;
+  std::vector<DType> ret_data;
+  IdHashMap<IdType> hashmap(rows);
+  for (int64_t i = 0; i < coo.row->shape[0]; ++i) {
+    const IdType row_id = coo_row_data[i];
+    const IdType col_id = coo_col_data[i];
+    const IdType mapped_row_id = hashmap.Map(row_id, -1);
+    if (mapped_row_id != -1) {
+      ret_row.push_back(mapped_row_id);
+      ret_col.push_back(col_id);
+      ret_data.push_back(coo_data ? coo_data[i] : i);
+    }
+  }
+  return COOMatrix{
+    rows->shape[0],
+    coo.num_cols,
+    NDArray::FromVector(ret_row),
+    NDArray::FromVector(ret_col),
+    NDArray::FromVector(ret_data)};
+}
+template COOMatrix COOSliceRows<kDLCPU, int32_t, int32_t>(COOMatrix , NDArray);
+template COOMatrix COOSliceRows<kDLCPU, int64_t, int64_t>(COOMatrix , NDArray);
+///////////////////////////// COOSliceMatrix /////////////////////////////
+template <DLDeviceType XPU, typename IdType, typename DType>
+COOMatrix COOSliceMatrix(COOMatrix coo, runtime::NDArray rows, runtime::NDArray cols) {
+  const IdType* coo_row_data = static_cast<IdType*>(coo.row->data);
+  const IdType* coo_col_data = static_cast<IdType*>(coo.col->data);
+  const DType* coo_data = COOHasData(coo) ? static_cast<DType*>(coo.data->data) : nullptr;
+  IdHashMap<IdType> row_map(rows), col_map(cols);
+  std::vector<IdType> ret_row, ret_col;
+  std::vector<DType> ret_data;
+  for (int64_t i = 0; i < coo.row->shape[0]; ++i) {
+    const IdType row_id = coo_row_data[i];
+    const IdType col_id = coo_col_data[i];
+    const IdType mapped_row_id = row_map.Map(row_id, -1);
+    if (mapped_row_id != -1) {
+      const IdType mapped_col_id = col_map.Map(col_id, -1);
+      if (mapped_col_id != -1) {
+        ret_row.push_back(mapped_row_id);
+        ret_col.push_back(mapped_col_id);
+        ret_data.push_back(coo_data ? coo_data[i] : i);
+      }
+    }
+  }
+  return COOMatrix{
+    rows->shape[0],
+    cols->shape[0],
+    NDArray::FromVector(ret_row),
+    NDArray::FromVector(ret_col),
+    NDArray::FromVector(ret_data)};
+}
+template COOMatrix COOSliceMatrix<kDLCPU, int32_t, int32_t>(
+    COOMatrix coo, runtime::NDArray rows, runtime::NDArray cols);
+template COOMatrix COOSliceMatrix<kDLCPU, int64_t, int64_t>(
+    COOMatrix coo, runtime::NDArray rows, runtime::NDArray cols);
+}  // namespace impl
+}  // namespace aten
+}  // namespace dgl
--- a/src/graph/heterograph.cc
+++ b/src/graph/heterograph.cc
@@ -7,8 +7,14 @@
 #include <dgl/array.h>
 #include <dgl/packed_func_ext.h>
 #include <dgl/runtime/container.h>
+#include <vector>
+#include <tuple>
+#include <utility>
 #include "../c_api_common.h"
 #include "./unit_graph.h"
+// TODO(BarclayII): currently CompactGraphs depend on IdHashMap implementation which
+// only works on CPU.  Should fix later to make it device agnostic.
+#include "../array/cpu/array_utils.h"
 using namespace dgl::runtime;
@@ -103,6 +109,66 @@ HeteroSubgraph EdgeSubgraphNoPreserveNodes(
  return ret;
 }
+template<typename IdType>
+std::pair<std::vector<HeteroGraphPtr>, std::vector<IdArray>>
+CompactGraphs(const std::vector<HeteroGraphPtr> &graphs) {
+  // TODO(BarclayII): check whether the node space and metagraph of each graph is the same.
+  // Step 1: Collect the nodes that has connections for each type.
+  std::vector<aten::IdHashMap<IdType>> hashmaps(graphs[0]->NumVertexTypes());
+  std::vector<std::vector<EdgeArray>> all_edges(graphs.size());   // all_edges[i][etype]
+  for (size_t i = 0; i < graphs.size(); ++i) {
+    const HeteroGraphPtr curr_graph = graphs[i];
+    const int64_t num_etypes = curr_graph->NumEdgeTypes();
+    for (IdType etype = 0; etype < num_etypes; ++etype) {
+      IdType srctype, dsttype;
+      std::tie(srctype, dsttype) = curr_graph->GetEndpointTypes(etype);
+      const EdgeArray edges = curr_graph->Edges(etype, "eid");
+      hashmaps[srctype].Update(edges.src);
+      hashmaps[dsttype].Update(edges.dst);
+      all_edges[i].push_back(edges);
+    }
+  }
+  // Step 2: Relabel the nodes for each type to a smaller ID space and save the mapping.
+  std::vector<IdArray> induced_nodes;
+  for (auto &hashmap : hashmaps)
+    induced_nodes.push_back(hashmap.Values());
+  // Step 3: Remap the edges of each graph.
+  std::vector<HeteroGraphPtr> new_graphs;
+  for (size_t i = 0; i < graphs.size(); ++i) {
+    std::vector<HeteroGraphPtr> rel_graphs;
+    const HeteroGraphPtr curr_graph = graphs[i];
+    const auto meta_graph = curr_graph->meta_graph();
+    const int64_t num_etypes = curr_graph->NumEdgeTypes();
+    for (IdType etype = 0; etype < num_etypes; ++etype) {
+      IdType srctype, dsttype;
+      std::tie(srctype, dsttype) = curr_graph->GetEndpointTypes(etype);
+      const EdgeArray &edges = all_edges[i][etype];
+      const IdArray mapped_rows = hashmaps[srctype].Map(edges.src, -1);
+      const IdArray mapped_cols = hashmaps[dsttype].Map(edges.dst, -1);
+      rel_graphs.push_back(UnitGraph::CreateFromCOO(
+          srctype == dsttype ? 1 : 2,
+          induced_nodes[srctype]->shape[0],
+          induced_nodes[dsttype]->shape[0],
+          mapped_rows,
+          mapped_cols));
+    }
+    new_graphs.push_back(CreateHeteroGraph(meta_graph, rel_graphs));
+  }
+  return std::make_pair(new_graphs, induced_nodes);
+}
 }  // namespace
 HeteroGraph::HeteroGraph(GraphPtr meta_graph, const std::vector<HeteroGraphPtr>& rel_graphs)
@@ -419,6 +485,15 @@ HeteroGraphPtr CreateHeteroGraph(
  return HeteroGraphPtr(new HeteroGraph(meta_graph, rel_graphs));
 }
+std::pair<std::vector<HeteroGraphPtr>, std::vector<IdArray>>
+CompactGraphs(const std::vector<HeteroGraphPtr> &graphs) {
+  std::pair<std::vector<HeteroGraphPtr>, std::vector<IdArray>> result;
+  ATEN_ID_TYPE_SWITCH(graphs[0]->DataType(), IdType, {
+    result = CompactGraphs<IdType>(graphs);
+  });
+  return result;
+}
 ///////////////////////// C APIs /////////////////////////
 DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroCreateUnitGraphFromCOO")
@@ -428,7 +503,9 @@ DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroCreateUnitGraphFromCOO")
    int64_t num_dst = args[2];
    IdArray row = args[3];
    IdArray col = args[4];
-    auto hgptr = UnitGraph::CreateFromCOO(nvtypes, num_src, num_dst, row, col);
+    SparseFormat restrict_format = ParseSparseFormat(args[5]);
+    auto hgptr = UnitGraph::CreateFromCOO(
+        nvtypes, num_src, num_dst, row, col, restrict_format);
    *rv = HeteroGraphRef(hgptr);
  });
@@ -440,8 +517,9 @@ DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroCreateUnitGraphFromCSR")
    IdArray indptr = args[3];
    IdArray indices = args[4];
    IdArray edge_ids = args[5];
+    SparseFormat restrict_format = ParseSparseFormat(args[6]);
    auto hgptr = UnitGraph::CreateFromCSR(
-        nvtypes, num_src, num_dst, indptr, indices, edge_ids);
+        nvtypes, num_src, num_dst, indptr, indices, edge_ids, restrict_format);
    *rv = HeteroGraphRef(hgptr);
  });
@@ -782,6 +860,31 @@ DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroEdgeSubgraph")
    *rv = HeteroSubgraphRef(subg);
  });
+DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLCompactGraphs")
+.set_body([] (DGLArgs args, DGLRetValue* rv) {
+    List<HeteroGraphRef> graph_refs = args[0];
+    std::vector<HeteroGraphPtr> graphs;
+    for (HeteroGraphRef gref : graph_refs)
+      graphs.push_back(gref.sptr());
+    const auto &result_pair = CompactGraphs(graphs);
+    List<HeteroGraphRef> compacted_graph_refs;
+    List<Value> induced_nodes;
+    for (const HeteroGraphPtr g : result_pair.first)
+      compacted_graph_refs.push_back(HeteroGraphRef(g));
+    for (const IdArray &ids : result_pair.second)
+      induced_nodes.push_back(Value(MakeValue(ids)));
+    List<ObjectRef> result;
+    result.push_back(compacted_graph_refs);
+    result.push_back(induced_nodes);
+    *rv = result;
+  });
 // HeteroSubgraph C APIs
 DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroSubgraphGetGraph")

--- a/src/graph/heterograph.h
+++ b/src/graph/heterograph.h
@@ -41,6 +41,10 @@ class HeteroGraph : public BaseHeteroGraph {
    LOG(FATAL) << "Bipartite graph is not mutable.";
  }
+  DLDataType DataType() const override {
+    return relation_graphs_[0]->DataType();
+  }
  DLContext Context() const override {
    return relation_graphs_[0]->Context();
  }

--- a/src/graph/sampling/get_node_types_cpu.cc
+++ b/src/graph/sampling/get_node_types_cpu.cc
--- a/src/graph/sampling/metapath_randomwalk.h
+++ b/src/graph/sampling/metapath_randomwalk.h
@@ -4,8 +4,8 @@
 * \brief DGL sampler - templated implementation definition of random walks on CPU
 */
-#ifndef DGL_GRAPH_SAMPLING_METAPATH_RANDOMWALK_H_
+#ifndef DGL_GRAPH_SAMPLING_RANDOMWALKS_METAPATH_RANDOMWALK_H_
-#define DGL_GRAPH_SAMPLING_METAPATH_RANDOMWALK_H_
+#define DGL_GRAPH_SAMPLING_RANDOMWALKS_METAPATH_RANDOMWALK_H_
 #include <dgl/array.h>
 #include <dgl/base_heterograph.h>
@@ -145,4 +145,4 @@ IdArray MetapathBasedRandomWalk(
 };  // namespace dgl
-#endif  // DGL_GRAPH_SAMPLING_METAPATH_RANDOMWALK_H_
+#endif  // DGL_GRAPH_SAMPLING_RANDOMWALKS_METAPATH_RANDOMWALK_H_
--- a/src/graph/sampling/randomwalk_cpu.cc
+++ b/src/graph/sampling/randomwalk_cpu.cc
--- a/src/graph/sampling/randomwalk_with_restart_cpu.cc
+++ b/src/graph/sampling/randomwalk_with_restart_cpu.cc
--- a/src/graph/sampling/randomwalks.cc
+++ b/src/graph/sampling/randomwalks.cc
@@ -11,7 +11,7 @@
 #include <utility>
 #include <tuple>
 #include <vector>
-#include "../../c_api_common.h"
+#include "../../../c_api_common.h"
 #include "randomwalks_impl.h"
 using namespace dgl::runtime;

--- a/src/graph/sampling/randomwalks_cpu.h
+++ b/src/graph/sampling/randomwalks_cpu.h
@@ -4,8 +4,8 @@
 * \brief DGL sampler - templated implementation definition of random walks on CPU
 */
-#ifndef DGL_GRAPH_SAMPLING_RANDOMWALKS_CPU_H_
+#ifndef DGL_GRAPH_SAMPLING_RANDOMWALKS_RANDOMWALKS_CPU_H_
-#define DGL_GRAPH_SAMPLING_RANDOMWALKS_CPU_H_
+#define DGL_GRAPH_SAMPLING_RANDOMWALKS_RANDOMWALKS_CPU_H_
 #include <dgl/base_heterograph.h>
 #include <dgl/array.h>
@@ -71,4 +71,4 @@ IdArray GenericRandomWalk(
 };  // namespace dgl
-#endif  // DGL_GRAPH_SAMPLING_RANDOMWALKS_CPU_H_
+#endif  // DGL_GRAPH_SAMPLING_RANDOMWALKS_RANDOMWALKS_CPU_H_
--- a/src/graph/sampling/randomwalks_impl.h
+++ b/src/graph/sampling/randomwalks_impl.h
@@ -4,8 +4,8 @@
 * \brief DGL sampler - templated implementation definition of random walks
 */
-#ifndef DGL_GRAPH_SAMPLING_RANDOMWALKS_IMPL_H_
+#ifndef DGL_GRAPH_SAMPLING_RANDOMWALKS_RANDOMWALKS_IMPL_H_
-#define DGL_GRAPH_SAMPLING_RANDOMWALKS_IMPL_H_
+#define DGL_GRAPH_SAMPLING_RANDOMWALKS_RANDOMWALKS_IMPL_H_
 #include <dgl/base_heterograph.h>
 #include <dgl/array.h>
@@ -114,4 +114,4 @@ IdArray RandomWalkWithStepwiseRestart(
 };  // namespace dgl
-#endif  // DGL_GRAPH_SAMPLING_RANDOMWALKS_IMPL_H_
+#endif  // DGL_GRAPH_SAMPLING_RANDOMWALKS_RANDOMWALKS_IMPL_H_
--- a/src/graph/unit_graph.cc
+++ b/src/graph/unit_graph.cc