[Feature] PinSAGE sampler (#1249)

* [WIP] PinSAGE operators * moved the edge remapping mess into C * some docstrings * lint * lint x2 * lint x3 * skip gpu test on topk * extend pinsage to any metapath * lint x4 * addresses #1265 * add always_preserve (fixes #1266) and fix a silly bug * disable gpu test on compaction * lint * fix a horrible bug and add more tests * lint * addresses comments * lint * bugfix * addresses comments Co-authored-by: Minjie Wang <minjie.wang@nyu.edu>

[Feature] PinSAGE sampler (#1249)
* [WIP] PinSAGE operators * moved the edge remapping mess into C * some docstrings * lint * lint x2 * lint x3 * skip gpu test on topk * extend pinsage to any metapath * lint x4 * addresses #1265 * add always_preserve (fixes #1266) and fix a silly bug * disable gpu test on compaction * lint * fix a horrible bug and add more tests * lint * addresses comments * lint * bugfix * addresses comments Co-authored-by: Minjie Wang <minjie.wang@nyu.edu>
c3a33407 · Quan (Andy) Gan · GitHub · 87bca129 · c3a33407 · c3a33407
Unverified Commit c3a33407 authored Feb 28, 2020 by Quan (Andy) Gan Committed by GitHub Feb 28, 2020
20 changed files
--- a/include/dgl/array.h
+++ b/include/dgl/array.h
@@ -115,6 +115,26 @@ template<typename ValueType>
 ValueType IndexSelect(NDArray array, uint64_t index);
 NDArray IndexSelect(NDArray array, IdArray index);
+/*!
+ * \brief Permute the elements of an array according to given indices.
+ *
+ * Equivalent to:
+ *
+ * <code>
+ *     result = np.zeros_like(array)
+ *     result[indices] = array
+ * </code>
+ */
+NDArray Scatter(NDArray array, IdArray indices);
+/*!
+ * \brief Repeat each element a number of times.  Equivalent to np.repeat(array, repeats)
+ * \param array A 1D vector
+ * \param repeats A 1D integer vector for number of times to repeat for each element in
+ *                \c array.  Must have the same shape as \c array.
+ */
+NDArray Repeat(NDArray array, IdArray repeats);
 /*!
 * \brief Relabel the given ids to consecutive ids.
 *
@@ -234,9 +254,9 @@ struct CSRMatrix {
 * Note that we do allow duplicate non-zero entries -- multiple non-zero entries
 * that have the same row, col indices. It corresponds to multigraph in
 * graph terminology.
- *
- * We call a COO matrix is *coalesced* if its row index is sorted.
 */
+// TODO(BarclayII): Graph queries on COO formats should support the case where
+// data ordered by rows/columns instead of EID.
 struct COOMatrix {
  /*! \brief the dense shape of the matrix */
  int64_t num_rows = 0, num_cols = 0;
@@ -523,6 +543,12 @@ COOMatrix COOSliceMatrix(COOMatrix coo, runtime::NDArray rows, runtime::NDArray
 /*! \return True if the matrix has duplicate entries */
 bool COOHasDuplicate(COOMatrix coo);
+/*!
+ * \brief Deduplicate the entries of a sorted COO matrix, replacing the data with the
+ * number of occurrences of the row-col coordinates.
+ */
+std::pair<COOMatrix, IdArray> COOCoalesce(COOMatrix coo);
 /*!
 * \brief Sort the indices of a COO matrix.
 *
@@ -619,7 +645,7 @@ COOMatrix COORowWiseTopk(
    COOMatrix mat,
    IdArray rows,
    int64_t k,
-    FloatArray weight,
+    NDArray weight,
    bool ascending = false);
 // inline implementations

--- a/include/dgl/base_heterograph.h
+++ b/include/dgl/base_heterograph.h
@@ -594,19 +594,6 @@ HeteroGraphPtr CreateFromCSR(
    IdArray indptr, IdArray indices, IdArray edge_ids,
    SparseFormat restrict_format = SparseFormat::ANY);
-/*!
- * \brief Given a list of graphs, remove the common nodes that do not have inbound and
- * outbound edges.
- *
- * The graphs should have identical node ID space (i.e. should have the same set of nodes,
- * including types and IDs) and metagraph.
- *
- * \return A pair.  The first element is the list of compacted graphs, and the second
- * element is the mapping from the compacted graphs and the original graph.
- */
-std::pair<std::vector<HeteroGraphPtr>, std::vector<IdArray>>
-CompactGraphs(const std::vector<HeteroGraphPtr> &graphs);
 /*!
 * \brief Extract the subgraph of the in edges of the given nodes.
 * \param graph Graph

--- a/include/dgl/transform.h
+++ b/include/dgl/transform.h
+/*!
+ *  Copyright (c) 2019 by Contributors
+ * \file dgl/transform.h
+ * \brief DGL graph transformations
+ */
+#ifndef DGL_TRANSFORM_H_
+#define DGL_TRANSFORM_H_
+#include <vector>
+#include <tuple>
+#include <utility>
+#include "base_heterograph.h"
+#include "array.h"
+namespace dgl {
+namespace transform {
+/*!
+ * \brief Given a list of graphs, remove the common nodes that do not have inbound and
+ * outbound edges.
+ *
+ * The graphs should have identical node ID space (i.e. should have the same set of nodes,
+ * including types and IDs) and metagraph.
+ *
+ * \param graphs The list of graphs.
+ * \param always_preserve The list of nodes to preserve regardless of whether the inbound
+ *                        or outbound edges exist.
+ *
+ * \return A pair.  The first element is the list of compacted graphs, and the second
+ * element is the mapping from the compacted graphs and the original graph.
+ */
+std::pair<std::vector<HeteroGraphPtr>, std::vector<IdArray>>
+CompactGraphs(
+    const std::vector<HeteroGraphPtr> &graphs,
+    const std::vector<IdArray> &always_preserve);
+/*!
+ * \brief Convert a multigraph to a simple graph.
+ *
+ * \return A triplet of
+ * * \c hg : The said simple graph.
+ * * \c count : The array of edge occurrences per edge type.
+ * * \c edge_map : The mapping from original edge IDs to new edge IDs per edge type.
+ *
+ * \note Example: consider the following graph:
+ *
+ *     g = dgl.graph([(0, 1), (1, 3), (2, 2), (1, 3), (1, 4), (1, 4)])
+ *
+ * Then ToSimpleGraph(g) would yield the following elements:
+ *
+ * * The first element would be the simple graph itself:
+ *
+ *       simple_g = dgl.graph([(0, 1), (1, 3), (1, 4), (2, 2)])
+ *
+ * * The second element is an array \c count.  \c count[i] stands for the number of edges
+ *   connecting simple_g.src[i] and simple_g.dst[i] in the original graph.
+ *
+ *       count[0] = [1, 2, 2, 1]
+ *
+ * * One can find the mapping between edges from the original graph to the new simple
+ *   graph.
+ *
+ *       edge_map[0] = [0, 1, 3, 1, 2, 2]
+ */
+std::tuple<HeteroGraphPtr, std::vector<IdArray>, std::vector<IdArray>>
+ToSimpleGraph(const HeteroGraphPtr graph);
+};  // namespace transform
+};  // namespace dgl
+#endif  // DGL_TRANSFORM_H_
--- a/python/dgl/convert.py
+++ b/python/dgl/convert.py
 """Module for converting graph from/to other object."""
 from collections import defaultdict
-from collections.abc import Iterable
 import numpy as np
 import scipy as sp
 import networkx as nx
@@ -20,7 +19,6 @@ __all__ = [
    'to_hetero',
    'to_homo',
    'to_networkx',
-    'compact_graphs',
 ]
 def graph(data, ntype='_N', etype='_E', card=None, validate=True, restrict_format='any',
@@ -724,98 +722,6 @@ def to_homo(G):
    return retg
-def compact_graphs(graphs):
-    """Given a list of graphs with the same set of nodes, find and eliminate the common
-    isolated nodes across all graphs.
-    This function requires the graphs to have the same set of nodes (i.e. the node types
-    must be the same, and the number of nodes of each node type must be the same).  The
-    metagraph does not have to be the same.
-    It finds all the nodes that have zero in-degree and zero out-degree in all the given
-    graphs, and eliminates them from all the graphs.
-    Useful for graph sampling where we have a giant graph but we only wish to perform
-    message passing on a smaller graph with a (tiny) subset of nodes.
-    The node and edge features are not preserved.
-    Parameters
-    ----------
-    graphs : DGLHeteroGraph or list[DGLHeteroGraph]
-        The graph, or list of graphs
-    Returns
-    -------
-    DGLHeteroGraph or list[DGLHeteroGraph]
-        The compacted graph or list of compacted graphs.
-        Each returned graph would have a feature ``dgl.NID`` containing the mapping
-        of node IDs for each type from the compacted graph(s) to the original graph(s).
-        Note that the mapping is the same for all the compacted graphs.
-    Examples
-    --------
-    The following code constructs a bipartite graph with 20 users and 10 games, but
-    only user #1 and #3, as well as game #3 and #5, have connections:
-    >>> g = dgl.bipartite([(1, 3), (3, 5)], 'user', 'plays', 'game', card=(20, 10))
-    The following would compact the graph above to another bipartite graph with only
-    two users and two games.
-    >>> new_g, induced_nodes = dgl.compact_graphs(g)
-    >>> induced_nodes
-    {'user': tensor([1, 3]), 'game': tensor([3, 5])}
-    The mapping tells us that only user #1 and #3 as well as game #3 and #5 are kept.
-    Furthermore, the first user and second user in the compacted graph maps to
-    user #1 and #3 in the original graph.  Games are similar.
-    One can verify that the edge connections are kept the same in the compacted graph.
-    >>> new_g.edges(form='all', order='eid', etype='plays')
-    (tensor([0, 1]), tensor([0, 1]), tensor([0, 1]))
-    When compacting multiple graphs, nodes that do not have any connections in any
-    of the given graphs are removed.  So if we compact ``g`` and the following ``g2``
-    graphs together:
-    >>> g2 = dgl.bipartite([(1, 6), (6, 8)], 'user', 'plays', 'game', card=(20, 10))
-    >>> (new_g, new_g2), induced_nodes = dgl.compact_graphs([g, g2])
-    >>> induced_nodes
-    {'user': tensor([1, 3, 6]), 'game': tensor([3, 5, 6, 8])}
-    Then one can see that user #1 from both graphs, users #3 from the first graph, as
-    well as user #6 from the second graph, are kept.  Games are similar.
-    Similarly, one can also verify the connections:
-    >>> new_g.edges(form='all', order='eid', etype='plays')
-    (tensor([0, 1]), tensor([0, 1]), tensor([0, 1]))
-    >>> new_g2.edges(form='all', order='eid', etype='plays')
-    (tensor([0, 2]), tensor([2, 3]), tensor([0, 1]))
-    """
-    return_single = False
-    if not isinstance(graphs, Iterable):
-        graphs = [graphs]
-        return_single = True
-    new_graph_indexes, induced_nodes = heterograph_index.compact_graph_indexes(
-        [g._graph for g in graphs])
-    new_graphs = [
-        DGLHeteroGraph(new_graph_index, graph.ntypes, graph.etypes)
-        for new_graph_index, graph in zip(new_graph_indexes, graphs)]
-    for g in new_graphs:
-        for i, ntype in enumerate(graphs[0].ntypes):
-            g.nodes[ntype].data[NID] = induced_nodes[i]
-    if return_single:
-        new_graphs = new_graphs[0]
-    return new_graphs
 ############################################################
 # Internal APIs
 ############################################################

--- a/python/dgl/heterograph_index.py
+++ b/python/dgl/heterograph_index.py
@@ -145,6 +145,16 @@ class HeteroGraphIndex(ObjectBase):
        _CAPI_DGLHeteroClear(self)
        self._cache.clear()
+    def dtype(self):
+        """Return the data type of this graph index.
+        Returns
+        -------
+        DGLDataType
+            The data type of the graph.
+        """
+        return _CAPI_DGLHeteroDataType(self)
    def ctx(self):
        """Return the context of this graph index.
@@ -1069,31 +1079,6 @@ def disjoint_partition(graph, bnn_all_types, bne_all_types):
    return _CAPI_DGLHeteroDisjointPartitionBySizes(
        graph, bnn_all_types.todgltensor(), bne_all_types.todgltensor())
-def compact_graph_indexes(graphs):
-    """Given a list of graphs, remove the common nodes that do not have inbound and
-    outbound edges.
-    The graphs should have identical node space (i.e. should have the same set of
-    nodes, including types and IDs) and metagraph.
-    Parameters
-    ----------
-    graph : list[HeteroGraphIndex]
-        List of heterographs.
-    Returns
-    -------
-    list[HeteroGraphIndex]
-        A list of compacted heterographs.
-        The returned heterographs also have the same metagraph, which is identical
-        to the original heterographs.
-        The returned heterographs also have identical node space.
-    list[Tensor]
-        The induced node IDs of each node type.
-    """
-    new_graphs, induced_nodes = _CAPI_DGLCompactGraphs(graphs)
-    return new_graphs, [F.zerocopy_from_dgl_ndarray(nodes.data) for nodes in induced_nodes]
 @register_object("graph.FlattenedHeteroGraph")
 class FlattenedHeteroGraph(ObjectBase):
    """FlattenedHeteroGraph object class in C++ backend."""

--- a/python/dgl/sampling/__init__.py
+++ b/python/dgl/sampling/__init__.py
 """Sampler modules."""
 from .randomwalks import *
+from .pinsage import *
 from .neighbor import *
--- a/python/dgl/sampling/neighbor.py
+++ b/python/dgl/sampling/neighbor.py
@@ -7,7 +7,9 @@ from ..heterograph import DGLHeteroGraph
 from .. import ndarray as nd
 from .. import utils
-__all__ = ['sample_neighbors', 'sample_neighbors_topk']
+__all__ = [
+    'sample_neighbors',
+    'select_topk']
 def sample_neighbors(g, nodes, fanout, edge_dir='in', prob=None, replace=True):
    """Sample from the neighbors of the given nodes and return the induced subgraph.
@@ -82,7 +84,7 @@ def sample_neighbors(g, nodes, fanout, edge_dir='in', prob=None, replace=True):
        ret.edges[etype].data[EID] = induced_edges[i].tousertensor()
    return ret
-def sample_neighbors_topk(g, nodes, k, weight, edge_dir='in', ascending=False):
+def select_topk(g, k, weight, nodes=None, edge_dir='in', ascending=False):
    """Select the neighbors with k-largest weights on the connecting edges for each given node.
    If k > the number of neighbors, all the neighbors are sampled.
@@ -94,15 +96,15 @@ def sample_neighbors_topk(g, nodes, k, weight, edge_dir='in', ascending=False):
    ----------
    g : DGLHeteroGraph
        Full graph structure.
-    nodes : tensor or dict
-        Node ids to sample neighbors from. The allowed types
-        are dictionary of node types to node id tensors, or simply node id
-        tensor if the given graph g has only one type of nodes.
    k : int
        The K value.
    weight : str
        Feature name of the weights associated with each edge. Its shape should be
        compatible with a scalar edge feature tensor.
+    nodes : tensor or dict, optional
+        Node ids to sample neighbors from. The allowed types
+        are dictionary of node types to node id tensors, or simply node id
+        tensor if the given graph g has only one type of nodes.
    edge_dir : str, optional
        Edge direction ('in' or 'out'). If is 'in', sample from in edges.
        Otherwise, sample from out edges.
@@ -112,14 +114,19 @@ def sample_neighbors_topk(g, nodes, k, weight, edge_dir='in', ascending=False):
    Returns
    -------
-    DGLGraph
+    DGLHeteroGraph
        A sampled subgraph by top k criterion. The sampled subgraph has the same
        metagraph as the original one.
    """
-    if not isinstance(nodes, dict):
+    # Rectify nodes to a dictionary
+    if nodes is None:
+        nodes = {ntype: F.arange(0, g.number_of_nodes(ntype)) for ntype in g.ntypes}
+    elif not isinstance(nodes, dict):
        if len(g.ntypes) > 1:
            raise DGLError("Must specify node type when the graph is not homogeneous.")
        nodes = {g.ntypes[0] : nodes}
+    # Parse nodes into a list of NDArrays.
    nodes_all_types = []
    for ntype in g.ntypes:
        if ntype in nodes:

--- a/python/dgl/sampling/pinsage.py
+++ b/python/dgl/sampling/pinsage.py
+"""PinSAGE sampler & related functions and classes"""
+import numpy as np
+from .. import backend as F
+from .. import convert
+from .. import transform
+from .randomwalks import random_walk
+from .neighbor import select_topk
+from ..base import EID
+class RandomWalkNeighborSampler(object):
+    """PinSAGE-like sampler extended to any heterographs, given a metapath.
+    Given a heterogeneous graph, this neighbor sampler would generate a homogeneous
+    graph where the neighbors of each node are the most commonly visited nodes of the
+    same type by random walk with restarts.  The random walk with restarts are based
+    on a given metapath, which should have the same beginning and ending node type.
+    The homogeneous graph also has a feature that stores the number of visits to
+    the corresponding neighbors from the seed nodes.
+    This is a generalization of PinSAGE sampler which only works on bidirectional
+    bipartite graphs.
+    Parameters
+    ----------
+    G : DGLHeteroGraph
+        The heterogeneous graph.
+    random_walk_length : int
+        The maximum number of steps of random walk with restarts.
+        Note that here we consider a full traversal of the given metapath as a single
+        random walk "step" (i.e. a single step may consist of multiple hops).
+        Usually considered a hyperparameter.
+    random_walk_restart_prob : int
+        Restart probability of random walk with restarts.
+        Note that the random walks only would halt after a full traversal of a metapath.
+        It will never halt in the middle of a metapath.
+        Usually considered a hyperparameter.
+    num_random_walks : int
+        Number of random walks to try for each seed node.
+        Usually considered a hyperparameter.
+    num_neighbors : int
+        Number of neighbors to select for each seed.
+    metapath : list[str] or list[tuple[str, str, str]], optional
+        The metapath.
+        If not given, assumes that the graph is homogeneous.
+    weight_column : str, default "weights"
+        The weight of each neighbor, stored as an edge feature.
+    Inputs
+    ------
+    seed_nodes : Tensor
+        A tensor of seed node IDs of node type ``ntype``.
+    Outputs
+    -------
+    g : DGLHeteroGraph
+        A homogeneous graph constructed by selecting neighbors for each seed node according
+        to PinSAGE algorithm.
+    Examples
+    --------
+    See examples in :any:`PinSAGESampler`.
+    """
+    def __init__(self, G, random_walk_length, random_walk_restart_prob,
+                 num_random_walks, num_neighbors, metapath=None, weight_column='weights'):
+        self.G = G
+        self.weight_column = weight_column
+        self.num_random_walks = num_random_walks
+        self.num_neighbors = num_neighbors
+        self.random_walk_length = random_walk_length
+        if metapath is None:
+            if len(G.ntypes) > 1 or len(G.etypes) > 1:
+                raise ValueError('Metapath must be specified if the graph is homogeneous.')
+            metapath = [G.canonical_etypes[0]]
+        start_ntype = G.to_canonical_etype(metapath[0])[0]
+        end_ntype = G.to_canonical_etype(metapath[-1])[-1]
+        if start_ntype != end_ntype:
+            raise ValueError('The metapath must start and end at the same node type.')
+        self.ntype = start_ntype
+        self.metapath_hops = len(metapath)
+        self.metapath = metapath
+        self.full_metapath = metapath * random_walk_length
+        restart_prob = np.zeros(self.metapath_hops * random_walk_length)
+        restart_prob[self.metapath_hops::self.metapath_hops] = random_walk_restart_prob
+        self.restart_prob = F.zerocopy_from_numpy(restart_prob)
+    # pylint: disable=no-member
+    def __call__(self, seed_nodes):
+        seed_nodes = F.repeat(seed_nodes, self.num_random_walks, 0)
+        paths, _ = random_walk(
+            self.G, seed_nodes, metapath=self.full_metapath, restart_prob=self.restart_prob)
+        src = F.reshape(paths[:, self.metapath_hops::self.metapath_hops], (-1,))
+        dst = F.repeat(paths[:, 0], self.random_walk_length, 0)
+        src_mask = (src != -1)
+        src = F.boolean_mask(src, src_mask)
+        dst = F.boolean_mask(dst, src_mask)
+        # count the number of visits and pick the K-most frequent neighbors for each node
+        neighbor_graph = convert.graph(
+            (src, dst), card=self.G.number_of_nodes(self.ntype), ntype=self.ntype)
+        neighbor_graph = transform.to_simple(neighbor_graph, return_counts=self.weight_column)
+        counts = neighbor_graph.edata[self.weight_column]
+        neighbor_graph = select_topk(neighbor_graph, self.num_neighbors, self.weight_column)
+        selected_counts = F.gather_row(counts, neighbor_graph.edata[EID])
+        neighbor_graph.edata[self.weight_column] = selected_counts
+        return neighbor_graph
+class PinSAGESampler(RandomWalkNeighborSampler):
+    """PinSAGE neighbor sampler.
+    Given a bidirectional bipartite graph, PinSAGE neighbor sampler would generate
+    a homogeneous graph where the neighbors of each node are the most commonly visited
+    nodes of the same type by random walk with restarts.
+    Parameters
+    ----------
+    G : DGLHeteroGraph
+        The bidirectional bipartite graph.
+        The graph should only have two node types: ``ntype`` and ``other_type``.
+        The graph should only have two edge types, one connecting from ``ntype`` to
+        ``other_type``, and another connecting from ``other_type`` to ``ntype``.
+        PinSAGE works on a bidirectional bipartite graph where for each edge
+        going from node u to node v, there exists an edge going from node v to node u.
+    ntype : str
+        The node type for which the graph would be constructed on.
+    other_type : str
+        The other node type.
+    random_walk_length : int
+        The maximum number of steps of random walk with restarts.
+        Note that here we consider traversing from ``ntype`` to ``other_type`` then back
+        to ``ntype`` as a single step (i.e. a single step consists of two hops).
+        Usually considered a hyperparameter.
+    random_walk_restart_prob : int
+        Restart probability of random walk with restarts.
+        Note that the random walks only would halt on node type ``ntype``, and would
+        never halt on ``other_type``.
+        Usually considered a hyperparameter.
+    num_random_walks : int
+        Number of random walks to try for each seed node.
+        Usually considered a hyperparameter.
+    num_neighbors : int
+        Number of neighbors to select for each seed.
+    weight_column : str, default "weights"
+        The weight of each neighbor, stored as an edge feature.
+    Inputs
+    ------
+    seed_nodes : Tensor
+        A tensor of seed node IDs of node type ``ntype``.
+    Outputs
+    -------
+    g : DGLHeteroGraph
+        A homogeneous graph constructed by selecting neighbors for each seed node according
+        to PinSAGE algorithm.
+    Examples
+    --------
+    Generate a random bidirectional bipartite graph with 3000 "A" nodes and 5000 "B" nodes.
+    >>> g = scipy.sparse.random(3000, 5000, 0.003)
+    >>> G = dgl.heterograph({
+    ...     ('A', 'AB', 'B'): g,
+    ...     ('B', 'BA', 'A'): g.T})
+    Then we create a PinSAGE neighbor sampler that samples a graph of node type "A".  Each
+    node would have (a maximum of) 10 neighbors.
+    >>> sampler = dgl.sampling.PinSAGESampler(G, 'A', 'B', 3, 0.5, 200, 10)
+    This is how we select the neighbors for node #0, #1 and #2 of type "A" according to
+    PinSAGE algorithm:
+    >>> seeds = torch.LongTensor([0, 1, 2])
+    >>> frontier = sampler(seeds)
+    >>> frontier.all_edges(form='uv')
+    (tensor([ 230,    0,  802,   47,   50, 1639, 1533,  406, 2110, 2687, 2408, 2823,
+                0,  972, 1230, 1658, 2373, 1289, 1745, 2918, 1818, 1951, 1191, 1089,
+             1282,  566, 2541, 1505, 1022,  812]),
+     tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2,
+             2, 2, 2, 2, 2, 2]))
+    For an end-to-end example of PinSAGE model, including sampling on multiple layers
+    and computing with the sampled graphs, please refer to [TODO]
+    References
+    ----------
+    Graph Convolutional Neural Networks for Web-Scale Recommender Systems
+        Ying et al., 2018, https://arxiv.org/abs/1806.01973
+    """
+    def __init__(self, G, ntype, other_type, random_walk_length, random_walk_restart_prob,
+                 num_random_walks, num_neighbors, weight_column='weights'):
+        metagraph = G.metagraph
+        fw_etype = list(metagraph[ntype][other_type])[0]
+        bw_etype = list(metagraph[other_type][ntype])[0]
+        super().__init__(G, random_walk_length,
+                         random_walk_restart_prob, num_random_walks, num_neighbors,
+                         metapath=[fw_etype, bw_etype], weight_column=weight_column)
--- a/python/dgl/sampling/randomwalks.py
+++ b/python/dgl/sampling/randomwalks.py
@@ -46,7 +46,7 @@ def random_walk(g, nodes, *, metapath=None, length=None, prob=None, restart_prob
        The feature tensor must be non-negative.
        If omitted, we assume the neighbors are picked uniformly.
    restart_prob : float or Tensor, optional
-        Probability to stop after each step.
+        Probability to stop at each step.
        If a tensor is given, ``restart_prob`` should have the same length as ``metapath``.
    Returns

--- a/python/dgl/transform.py
+++ b/python/dgl/transform.py
 """Module for graph transformation utilities."""
+from collections.abc import Iterable, Mapping
 import numpy as np
 from scipy import sparse
 from ._ffi.function import _init_api
-from .base import EID
 from .graph import DGLGraph
 from .heterograph import DGLHeteroGraph
 from . import ndarray as nd
@@ -15,11 +15,27 @@ from .graph_index import _get_halo_subgraph_inner_edge
 from .batched_graph import BatchedDGLGraph, unbatch
 from .convert import graph, bipartite
 from . import utils
+from .base import EID, NID
+from . import ndarray as nd
-__all__ = ['line_graph', 'khop_adj', 'khop_graph', 'reverse', 'to_simple_graph', 'to_bidirected',
+__all__ = [
-           'laplacian_lambda_max', 'knn_graph', 'segmented_knn_graph', 'add_self_loop',
+    'line_graph',
-           'remove_self_loop', 'metapath_reachable_graph', 'in_subgraph', 'out_subgraph']
+    'khop_adj',
+    'khop_graph',
+    'reverse',
+    'to_simple_graph',
+    'to_bidirected',
+    'laplacian_lambda_max',
+    'knn_graph',
+    'segmented_knn_graph',
+    'add_self_loop',
+    'remove_self_loop',
+    'metapath_reachable_graph',
+    'compact_graphs',
+    'to_simple',
+    'in_subgraph',
+    'out_subgraph']
 def pairwise_squared_distance(x):
@@ -565,6 +581,139 @@ def partition_graph_with_halo(g, node_part, num_hops):
        subg_dict[i] = subg
    return subg_dict
+def compact_graphs(graphs, always_preserve=None):
+    """Given a list of graphs with the same set of nodes, find and eliminate the common
+    isolated nodes across all graphs.
+    This function requires the graphs to have the same set of nodes (i.e. the node types
+    must be the same, and the number of nodes of each node type must be the same).  The
+    metagraph does not have to be the same.
+    It finds all the nodes that have zero in-degree and zero out-degree in all the given
+    graphs, and eliminates them from all the graphs.
+    Useful for graph sampling where we have a giant graph but we only wish to perform
+    message passing on a smaller graph with a (tiny) subset of nodes.
+    The node and edge features are not preserved.
+    Parameters
+    ----------
+    graphs : DGLHeteroGraph or list[DGLHeteroGraph]
+        The graph, or list of graphs
+    always_preserve : Tensor or dict[str, Tensor], optional
+        If a dict of node types and node ID tensors is given, the nodes of given
+        node types would not be removed, regardless of whether they are isolated.
+        If a Tensor is given, assume that all the graphs have one (same) node type.
+    Returns
+    -------
+    DGLHeteroGraph or list[DGLHeteroGraph]
+        The compacted graph or list of compacted graphs.
+        Each returned graph would have a feature ``dgl.NID`` containing the mapping
+        of node IDs for each type from the compacted graph(s) to the original graph(s).
+        Note that the mapping is the same for all the compacted graphs.
+    Bugs
+    ----
+    This function currently requires that the same node type of all graphs should have
+    the same node type ID, i.e. the node types are *ordered* the same.
+    Examples
+    --------
+    The following code constructs a bipartite graph with 20 users and 10 games, but
+    only user #1 and #3, as well as game #3 and #5, have connections:
+    >>> g = dgl.bipartite([(1, 3), (3, 5)], 'user', 'plays', 'game', card=(20, 10))
+    The following would compact the graph above to another bipartite graph with only
+    two users and two games.
+    >>> new_g, induced_nodes = dgl.compact_graphs(g)
+    >>> induced_nodes
+    {'user': tensor([1, 3]), 'game': tensor([3, 5])}
+    The mapping tells us that only user #1 and #3 as well as game #3 and #5 are kept.
+    Furthermore, the first user and second user in the compacted graph maps to
+    user #1 and #3 in the original graph.  Games are similar.
+    One can verify that the edge connections are kept the same in the compacted graph.
+    >>> new_g.edges(form='all', order='eid', etype='plays')
+    (tensor([0, 1]), tensor([0, 1]), tensor([0, 1]))
+    When compacting multiple graphs, nodes that do not have any connections in any
+    of the given graphs are removed.  So if we compact ``g`` and the following ``g2``
+    graphs together:
+    >>> g2 = dgl.bipartite([(1, 6), (6, 8)], 'user', 'plays', 'game', card=(20, 10))
+    >>> (new_g, new_g2), induced_nodes = dgl.compact_graphs([g, g2])
+    >>> induced_nodes
+    {'user': tensor([1, 3, 6]), 'game': tensor([3, 5, 6, 8])}
+    Then one can see that user #1 from both graphs, users #3 from the first graph, as
+    well as user #6 from the second graph, are kept.  Games are similar.
+    Similarly, one can also verify the connections:
+    >>> new_g.edges(form='all', order='eid', etype='plays')
+    (tensor([0, 1]), tensor([0, 1]), tensor([0, 1]))
+    >>> new_g2.edges(form='all', order='eid', etype='plays')
+    (tensor([0, 2]), tensor([2, 3]), tensor([0, 1]))
+    """
+    return_single = False
+    if not isinstance(graphs, Iterable):
+        graphs = [graphs]
+        return_single = True
+    if len(graphs) == 0:
+        return []
+    # Ensure the node types are ordered the same.
+    # TODO(BarclayII): we ideally need to remove this constraint.
+    ntypes = graphs[0].ntypes
+    graph_dtype = graphs[0]._graph.dtype()
+    graph_ctx = graphs[0]._graph.ctx()
+    for g in graphs:
+        assert ntypes == g.ntypes, \
+            ("All graphs should have the same node types in the same order, got %s and %s" %
+             ntypes, g.ntypes)
+        assert graph_dtype == g._graph.dtype(), "Graph data type mismatch"
+        assert graph_ctx == g._graph.ctx(), "Graph device mismatch"
+    # Process the dictionary or tensor of "always preserve" nodes
+    if always_preserve is None:
+        always_preserve = {}
+    elif not isinstance(always_preserve, Mapping):
+        if len(ntypes) > 1:
+            raise ValueError("Node type must be given if multiple node types exist.")
+        always_preserve = {ntypes[0]: always_preserve}
+    always_preserve_nd = []
+    for ntype in ntypes:
+        nodes = always_preserve.get(ntype, None)
+        if nodes is None:
+            nodes = nd.empty([0], graph_dtype, graph_ctx)
+        else:
+            nodes = F.zerocopy_to_dgl_ndarray(nodes)
+        always_preserve_nd.append(nodes)
+    # Compact and construct heterographs
+    new_graph_indexes, induced_nodes = _CAPI_DGLCompactGraphs(
+        [g._graph for g in graphs], always_preserve_nd)
+    induced_nodes = [F.zerocopy_from_dgl_ndarray(nodes.data) for nodes in induced_nodes]
+    new_graphs = [
+        DGLHeteroGraph(new_graph_index, graph.ntypes, graph.etypes)
+        for new_graph_index, graph in zip(new_graph_indexes, graphs)]
+    for g in new_graphs:
+        for i, ntype in enumerate(graphs[0].ntypes):
+            g.nodes[ntype].data[NID] = induced_nodes[i]
+    if return_single:
+        new_graphs = new_graphs[0]
+    return new_graphs
 def in_subgraph(g, nodes):
    """Extract the subgraph containing only the in edges of the given nodes.
@@ -572,7 +721,6 @@ def in_subgraph(g, nodes):
    Node/edge features are not preserved. The original IDs
    the extracted edges are stored as the `dgl.EID` feature in the returned graph.
    Parameters
    ----------
    g : DGLHeteroGraph
@@ -612,7 +760,6 @@ def out_subgraph(g, nodes):
    Node/edge features are not preserved. The original IDs
    the extracted edges are stored as the `dgl.EID` feature in the returned graph.
    Parameters
    ----------
    g : DGLHeteroGraph
@@ -645,4 +792,68 @@ def out_subgraph(g, nodes):
        ret.edges[etype].data[EID] = induced_edges[i].tousertensor()
    return ret
+def to_simple(g, return_counts='count', writeback_mapping=None):
+    """Convert a heterogeneous multigraph to a heterogeneous simple graph, coalescing
+    duplicate edges into one.
+    This function does not preserve node and edge features.
+    Parameters
+    ----------
+    g : DGLHeteroGraph
+        The heterogeneous graph
+    return_counts : str, optional
+        If given, the returned graph would have a column with the same name that stores
+        the number of duplicated edges from the original graph.
+    writeback_mapping : str, optional
+        If given, the mapping from the edge IDs of original graph to those of the returned
+        graph would be written into edge feature with this name in the original graph for
+        each edge type.
+    Returns
+    -------
+    DGLHeteroGraph
+        The new heterogeneous simple graph.
+    Examples
+    --------
+    Consider the following graph
+    >>> g = dgl.graph([(0, 1), (1, 3), (2, 2), (1, 3), (1, 4), (1, 4)])
+    >>> sg = dgl.to_simple(g, return_counts='weights', writeback_mapping='new_eid')
+    The returned graph would have duplicate edges connecting (1, 3) and (1, 4) removed:
+    >>> sg.all_edges(form='uv', order='eid')
+    (tensor([0, 1, 1, 2]), tensor([1, 3, 4, 2]))
+    If ``return_counts`` is set, the returned graph will also return how many edges
+    in the original graph are connecting the endpoints of the edges in the new graph:
+    >>> sg.edata['weights']
+    tensor([1, 2, 2, 1])
+    This essentially reads that one edge is connecting (0, 1) in ``g``, whereas 2 edges
+    are connecting (1, 3) in ``g``, etc.
+    One can also retrieve the mapping from the edges in the original graph to edges in
+    the new graph by setting ``writeback_mapping`` and running
+    >>> g.edata['new_eid']
+    tensor([0, 1, 3, 1, 2, 2])
+    This tells us that the first edge in ``g`` is mapped to the first edge in ``sg``, and
+    the second and the fourth edge are mapped to the second edge in ``sg``, etc.
+    """
+    simple_graph_index, counts, edge_maps = _CAPI_DGLToSimpleHetero(g._graph)
+    simple_graph = DGLHeteroGraph(simple_graph_index, g.ntypes, g.etypes)
+    counts = [F.zerocopy_from_dgl_ndarray(count.data) for count in counts]
+    edge_maps = [F.zerocopy_from_dgl_ndarray(edge_map.data) for edge_map in edge_maps]
+    if return_counts is not None:
+        for count, canonical_etype in zip(counts, g.canonical_etypes):
+            simple_graph.edges[canonical_etype].data[return_counts] = count
+    if writeback_mapping is not None:
+        for edge_map, canonical_etype in zip(edge_maps, g.canonical_etypes):
+            g.edges[canonical_etype].data[writeback_mapping] = edge_map
+    return simple_graph
 _init_api("dgl.transform")
--- a/src/array/array.cc
+++ b/src/array/array.cc
@@ -230,6 +230,30 @@ template uint64_t IndexSelect<uint64_t>(NDArray array, uint64_t index);
 template float IndexSelect<float>(NDArray array, uint64_t index);
 template double IndexSelect<double>(NDArray array, uint64_t index);
+NDArray Scatter(NDArray array, IdArray indices) {
+  NDArray ret;
+  ATEN_XPU_SWITCH(array->ctx.device_type, XPU, {
+    ATEN_DTYPE_SWITCH(array->dtype, DType, "values", {
+      ATEN_ID_TYPE_SWITCH(indices->dtype, IdType, {
+        ret = impl::Scatter<XPU, DType, IdType>(array, indices);
+      });
+    });
+  });
+  return ret;
+}
+NDArray Repeat(NDArray array, IdArray repeats) {
+  NDArray ret;
+  ATEN_XPU_SWITCH(array->ctx.device_type, XPU, {
+    ATEN_DTYPE_SWITCH(array->dtype, DType, "values", {
+      ATEN_ID_TYPE_SWITCH(repeats->dtype, IdType, {
+        ret = impl::Repeat<XPU, DType, IdType>(array, repeats);
+      });
+    });
+  });
+  return ret;
+}
 IdArray Relabel_(const std::vector<IdArray>& arrays) {
  IdArray ret;
  ATEN_XPU_SWITCH(arrays[0]->ctx.device_type, XPU, {
@@ -426,11 +450,11 @@ COOMatrix CSRRowWiseSampling(
 }
 COOMatrix CSRRowWiseTopk(
-    CSRMatrix mat, IdArray rows, int64_t k, FloatArray weight, bool ascending) {
+    CSRMatrix mat, IdArray rows, int64_t k, NDArray weight, bool ascending) {
  COOMatrix ret;
  ATEN_CSR_SWITCH(mat, XPU, IdType, {
-    ATEN_FLOAT_TYPE_SWITCH(weight->dtype, FloatType, "weight", {
+    ATEN_DTYPE_SWITCH(weight->dtype, DType, "weight", {
-      ret = impl::CSRRowWiseTopk<XPU, IdType, FloatType>(
+      ret = impl::CSRRowWiseTopk<XPU, IdType, DType>(
          mat, rows, k, weight, ascending);
    });
  });
@@ -580,5 +604,13 @@ COOMatrix COORowWiseTopk(
  return ret;
 }
+std::pair<COOMatrix, IdArray> COOCoalesce(COOMatrix coo) {
+  std::pair<COOMatrix, IdArray> ret;
+  ATEN_COO_SWITCH(coo, XPU, IdType, {
+    ret = impl::COOCoalesce<XPU, IdType>(coo);
+  });
+  return ret;
+}
 }  // namespace aten
 }  // namespace dgl
--- a/src/array/array_op.h
+++ b/src/array/array_op.h
@@ -42,6 +42,12 @@ NDArray IndexSelect(NDArray array, IdArray index);
 template <DLDeviceType XPU, typename DType>
 DType IndexSelect(NDArray array, uint64_t index);
+template <DLDeviceType XPU, typename DType, typename IdType>
+NDArray Scatter(NDArray array, IdArray indices);
+template <DLDeviceType XPU, typename DType, typename IdType>
+NDArray Repeat(NDArray array, IdArray repeats);
 template <DLDeviceType XPU, typename IdType>
 IdArray Relabel_(const std::vector<IdArray>& arrays);
@@ -117,9 +123,9 @@ COOMatrix CSRRowWiseSamplingUniform(
    CSRMatrix mat, IdArray rows, int64_t num_samples, bool replace);
 // FloatType is the type of weight data.
-template <DLDeviceType XPU, typename IdType, typename FloatType>
+template <DLDeviceType XPU, typename IdType, typename DType>
 COOMatrix CSRRowWiseTopk(
-    CSRMatrix mat, IdArray rows, int64_t k, FloatArray weight, bool ascending);
+    CSRMatrix mat, IdArray rows, int64_t k, NDArray weight, bool ascending);
 ///////////////////////////////////////////////////////////////////////////////////////////
@@ -164,6 +170,9 @@ COOMatrix COOSliceRows(COOMatrix coo, runtime::NDArray rows);
 template <DLDeviceType XPU, typename IdType>
 COOMatrix COOSliceMatrix(COOMatrix coo, runtime::NDArray rows, runtime::NDArray cols);
+template <DLDeviceType XPU, typename IdType>
+std::pair<COOMatrix, IdArray> COOCoalesce(COOMatrix coo);
 template <DLDeviceType XPU, typename IdType>
 COOMatrix COOSort(COOMatrix mat, bool sort_column);

--- a/src/array/cpu/array_repeat.cc
+++ b/src/array/cpu/array_repeat.cc
+/*!
+ *  Copyright (c) 2020 by Contributors
+ * \file array/cpu/array_repeat.cc
+ * \brief Array repeat CPU implementation
+ */
+#include <dgl/array.h>
+#include <algorithm>
+namespace dgl {
+using runtime::NDArray;
+namespace aten {
+namespace impl {
+template <DLDeviceType XPU, typename DType, typename IdType>
+NDArray Repeat(NDArray array, IdArray repeats) {
+  CHECK(array->shape[0] == repeats->shape[0]) << "shape of array and repeats mismatch";
+  const int64_t len = array->shape[0];
+  const DType *array_data = static_cast<DType *>(array->data);
+  const IdType *repeats_data = static_cast<IdType *>(repeats->data);
+  IdType num_elements = 0;
+  for (int64_t i = 0; i < len; ++i)
+    num_elements += repeats_data[i];
+  NDArray result = NDArray::Empty({num_elements}, array->dtype, array->ctx);
+  DType *result_data = static_cast<DType *>(result->data);
+  IdType curr = 0;
+  for (int64_t i = 0; i < len; ++i) {
+    std::fill(result_data + curr, result_data + curr + repeats_data[i], array_data[i]);
+    curr += repeats_data[i];
+  }
+  return result;
+}
+template NDArray Repeat<kDLCPU, int32_t, int32_t>(NDArray, IdArray);
+template NDArray Repeat<kDLCPU, int64_t, int32_t>(NDArray, IdArray);
+template NDArray Repeat<kDLCPU, float, int32_t>(NDArray, IdArray);
+template NDArray Repeat<kDLCPU, double, int32_t>(NDArray, IdArray);
+template NDArray Repeat<kDLCPU, int32_t, int64_t>(NDArray, IdArray);
+template NDArray Repeat<kDLCPU, int64_t, int64_t>(NDArray, IdArray);
+template NDArray Repeat<kDLCPU, float, int64_t>(NDArray, IdArray);
+template NDArray Repeat<kDLCPU, double, int64_t>(NDArray, IdArray);
+};  // namespace impl
+};  // namespace aten
+};  // namespace dgl
--- a/src/array/cpu/array_scatter.cc
+++ b/src/array/cpu/array_scatter.cc
+/*!
+ *  Copyright (c) 2019 by Contributors
+ * \file array/cpu/array_scatter.cc
+ * \brief Array scatter CPU implementation
+ */
+#include <dgl/array.h>
+namespace dgl {
+using runtime::NDArray;
+namespace aten {
+namespace impl {
+template <DLDeviceType XPU, typename DType, typename IdType>
+NDArray Scatter(NDArray array, IdArray indices) {
+  NDArray result = NDArray::Empty({indices->shape[0]}, array->dtype, array->ctx);
+  const DType *array_data = static_cast<DType *>(array->data);
+  const IdType *indices_data = static_cast<IdType *>(indices->data);
+  DType *result_data = static_cast<DType *>(result->data);
+  for (int64_t i = 0; i < indices->shape[0]; ++i)
+    result_data[indices_data[i]] = array_data[i];
+  return result;
+}
+template NDArray Scatter<kDLCPU, int32_t, int32_t>(NDArray, IdArray);
+template NDArray Scatter<kDLCPU, int64_t, int32_t>(NDArray, IdArray);
+template NDArray Scatter<kDLCPU, float, int32_t>(NDArray, IdArray);
+template NDArray Scatter<kDLCPU, double, int32_t>(NDArray, IdArray);
+template NDArray Scatter<kDLCPU, int32_t, int64_t>(NDArray, IdArray);
+template NDArray Scatter<kDLCPU, int64_t, int64_t>(NDArray, IdArray);
+template NDArray Scatter<kDLCPU, float, int64_t>(NDArray, IdArray);
+template NDArray Scatter<kDLCPU, double, int64_t>(NDArray, IdArray);
+};  // namespace impl
+};  // namespace aten
+};  // namespace dgl
--- a/src/array/cpu/coo_coalesce.cc
+++ b/src/array/cpu/coo_coalesce.cc
+/*!
+ *  Copyright (c) 2019 by Contributors
+ * \file array/cpu/coo_coalesce.cc
+ * \brief COO coalescing
+ */
+#include <dgl/array.h>
+#include <vector>
+namespace dgl {
+namespace aten {
+namespace impl {
+template <DLDeviceType XPU, typename IdType>
+std::pair<COOMatrix, IdArray> COOCoalesce(COOMatrix coo) {
+  const int64_t nnz = coo.row->shape[0];
+  const IdType* coo_row_data = static_cast<IdType*>(coo.row->data);
+  const IdType* coo_col_data = static_cast<IdType*>(coo.col->data);
+  if (!coo.row_sorted || !coo.col_sorted)
+    coo = COOSort(coo, true);
+  std::vector<IdType> new_row, new_col, count;
+  IdType prev_row = -1, prev_col = -1;
+  for (int64_t i = 0; i < nnz; ++i) {
+    const IdType curr_row = coo_row_data[i];
+    const IdType curr_col = coo_col_data[i];
+    if (curr_row == prev_row && curr_col == prev_col) {
+      ++count[count.size() - 1];
+    } else {
+      new_row.push_back(curr_row);
+      new_col.push_back(curr_col);
+      count.push_back(1);
+      prev_row = curr_row;
+      prev_col = curr_col;
+    }
+  }
+  COOMatrix coo_result = COOMatrix{
+      coo.num_rows, coo.num_cols, NDArray::FromVector(new_row), NDArray::FromVector(new_col),
+      NDArray(), true};
+  return std::make_pair(coo_result, NDArray::FromVector(count));
+}
+template std::pair<COOMatrix, IdArray> COOCoalesce<kDLCPU, int32_t>(COOMatrix);
+template std::pair<COOMatrix, IdArray> COOCoalesce<kDLCPU, int64_t>(COOMatrix);
+};  // namespace impl
+};  // namespace aten
+};  // namespace dgl
--- a/src/array/cpu/rowwise_topk.cc
+++ b/src/array/cpu/rowwise_topk.cc
@@ -12,9 +12,9 @@ namespace aten {
 namespace impl {
 namespace {
-template <typename IdxType, typename FloatType>
+template <typename IdxType, typename DType>
-inline PickFn<IdxType> GetTopkPickFn(int64_t k, FloatArray weight, bool ascending) {
+inline PickFn<IdxType> GetTopkPickFn(int64_t k, NDArray weight, bool ascending) {
-  const FloatType* wdata = static_cast<FloatType*>(weight->data);
+  const DType* wdata = static_cast<DType*>(weight->data);
  PickFn<IdxType> pick_fn = [k, ascending, wdata]
    (IdxType rowid, IdxType off, IdxType len,
     const IdxType* col, const IdxType* data,
@@ -55,37 +55,53 @@ inline PickFn<IdxType> GetTopkPickFn(int64_t k, FloatArray weight, bool ascendin
 }  // namespace
-template <DLDeviceType XPU, typename IdxType, typename FloatType>
+template <DLDeviceType XPU, typename IdxType, typename DType>
 COOMatrix CSRRowWiseTopk(
-    CSRMatrix mat, IdArray rows, int64_t k, FloatArray weight, bool ascending) {
+    CSRMatrix mat, IdArray rows, int64_t k, NDArray weight, bool ascending) {
-  auto pick_fn = GetTopkPickFn<IdxType, FloatType>(k, weight, ascending);
+  auto pick_fn = GetTopkPickFn<IdxType, DType>(k, weight, ascending);
  return CSRRowWisePick(mat, rows, k, false, pick_fn);
 }
+template COOMatrix CSRRowWiseTopk<kDLCPU, int32_t, int32_t>(
+    CSRMatrix, IdArray, int64_t, NDArray, bool);
+template COOMatrix CSRRowWiseTopk<kDLCPU, int64_t, int32_t>(
+    CSRMatrix, IdArray, int64_t, NDArray, bool);
+template COOMatrix CSRRowWiseTopk<kDLCPU, int32_t, int64_t>(
+    CSRMatrix, IdArray, int64_t, NDArray, bool);
+template COOMatrix CSRRowWiseTopk<kDLCPU, int64_t, int64_t>(
+    CSRMatrix, IdArray, int64_t, NDArray, bool);
 template COOMatrix CSRRowWiseTopk<kDLCPU, int32_t, float>(
-    CSRMatrix, IdArray, int64_t, FloatArray, bool);
+    CSRMatrix, IdArray, int64_t, NDArray, bool);
 template COOMatrix CSRRowWiseTopk<kDLCPU, int64_t, float>(
-    CSRMatrix, IdArray, int64_t, FloatArray, bool);
+    CSRMatrix, IdArray, int64_t, NDArray, bool);
 template COOMatrix CSRRowWiseTopk<kDLCPU, int32_t, double>(
-    CSRMatrix, IdArray, int64_t, FloatArray, bool);
+    CSRMatrix, IdArray, int64_t, NDArray, bool);
 template COOMatrix CSRRowWiseTopk<kDLCPU, int64_t, double>(
-    CSRMatrix, IdArray, int64_t, FloatArray, bool);
+    CSRMatrix, IdArray, int64_t, NDArray, bool);
-template <DLDeviceType XPU, typename IdxType, typename FloatType>
+template <DLDeviceType XPU, typename IdxType, typename DType>
 COOMatrix COORowWiseTopk(
-    COOMatrix mat, IdArray rows, int64_t k, FloatArray weight, bool ascending) {
+    COOMatrix mat, IdArray rows, int64_t k, NDArray weight, bool ascending) {
-  auto pick_fn = GetTopkPickFn<IdxType, FloatType>(k, weight, ascending);
+  auto pick_fn = GetTopkPickFn<IdxType, DType>(k, weight, ascending);
  return COORowWisePick(mat, rows, k, false, pick_fn);
 }
+template COOMatrix COORowWiseTopk<kDLCPU, int32_t, int32_t>(
+    COOMatrix, IdArray, int64_t, NDArray, bool);
+template COOMatrix COORowWiseTopk<kDLCPU, int64_t, int32_t>(
+    COOMatrix, IdArray, int64_t, NDArray, bool);
+template COOMatrix COORowWiseTopk<kDLCPU, int32_t, int64_t>(
+    COOMatrix, IdArray, int64_t, NDArray, bool);
+template COOMatrix COORowWiseTopk<kDLCPU, int64_t, int64_t>(
+    COOMatrix, IdArray, int64_t, NDArray, bool);
 template COOMatrix COORowWiseTopk<kDLCPU, int32_t, float>(
-    COOMatrix, IdArray, int64_t, FloatArray, bool);
+    COOMatrix, IdArray, int64_t, NDArray, bool);
 template COOMatrix COORowWiseTopk<kDLCPU, int64_t, float>(
-    COOMatrix, IdArray, int64_t, FloatArray, bool);
+    COOMatrix, IdArray, int64_t, NDArray, bool);
 template COOMatrix COORowWiseTopk<kDLCPU, int32_t, double>(
-    COOMatrix, IdArray, int64_t, FloatArray, bool);
+    COOMatrix, IdArray, int64_t, NDArray, bool);
 template COOMatrix COORowWiseTopk<kDLCPU, int64_t, double>(
-    COOMatrix, IdArray, int64_t, FloatArray, bool);
+    COOMatrix, IdArray, int64_t, NDArray, bool);
 }  // namespace impl
 }  // namespace aten

--- a/src/c_api_common.h
+++ b/src/c_api_common.h
@@ -21,7 +21,10 @@ inline bool operator == (const DLDataType& ty1, const DLDataType& ty2) {
 /*! \brief Output the string representation of device context.*/
 inline std::ostream& operator << (std::ostream& os, const DLDataType& ty) {
-  return os << "code=" << ty.code << ",bits=" << ty.bits << "lanes=" << ty.lanes;
+  return os <<
+    "code=" << static_cast<int>(ty.code) <<
+    ",bits=" << static_cast<int>(ty.bits) <<
+    "lanes=" << static_cast<int>(ty.lanes);
 }
 /*! \brief Check whether two device contexts are the same.*/

--- a/src/graph/heterograph.cc
+++ b/src/graph/heterograph.cc
@@ -16,9 +16,6 @@
 #include "../c_api_common.h"
 #include "./unit_graph.h"
 #include "graph_serializer.h"
-// TODO(BarclayII): currently CompactGraphs depend on IdHashMap implementation which
-// only works on CPU.  Should fix later to make it device agnostic.
-#include "../array/cpu/array_utils.h"
 using namespace dgl::runtime;
@@ -115,66 +112,6 @@ HeteroSubgraph EdgeSubgraphNoPreserveNodes(
  return ret;
 }
-template<typename IdType>
-std::pair<std::vector<HeteroGraphPtr>, std::vector<IdArray>>
-CompactGraphs(const std::vector<HeteroGraphPtr> &graphs) {
-  // TODO(BarclayII): check whether the node space and metagraph of each graph is the same.
-  // Step 1: Collect the nodes that has connections for each type.
-  std::vector<aten::IdHashMap<IdType>> hashmaps(graphs[0]->NumVertexTypes());
-  std::vector<std::vector<EdgeArray>> all_edges(graphs.size());   // all_edges[i][etype]
-  for (size_t i = 0; i < graphs.size(); ++i) {
-    const HeteroGraphPtr curr_graph = graphs[i];
-    const int64_t num_etypes = curr_graph->NumEdgeTypes();
-    for (IdType etype = 0; etype < num_etypes; ++etype) {
-      IdType srctype, dsttype;
-      std::tie(srctype, dsttype) = curr_graph->GetEndpointTypes(etype);
-      const EdgeArray edges = curr_graph->Edges(etype, "eid");
-      hashmaps[srctype].Update(edges.src);
-      hashmaps[dsttype].Update(edges.dst);
-      all_edges[i].push_back(edges);
-    }
-  }
-  // Step 2: Relabel the nodes for each type to a smaller ID space and save the mapping.
-  std::vector<IdArray> induced_nodes;
-  for (auto &hashmap : hashmaps)
-    induced_nodes.push_back(hashmap.Values());
-  // Step 3: Remap the edges of each graph.
-  std::vector<HeteroGraphPtr> new_graphs;
-  for (size_t i = 0; i < graphs.size(); ++i) {
-    std::vector<HeteroGraphPtr> rel_graphs;
-    const HeteroGraphPtr curr_graph = graphs[i];
-    const auto meta_graph = curr_graph->meta_graph();
-    const int64_t num_etypes = curr_graph->NumEdgeTypes();
-    for (IdType etype = 0; etype < num_etypes; ++etype) {
-      IdType srctype, dsttype;
-      std::tie(srctype, dsttype) = curr_graph->GetEndpointTypes(etype);
-      const EdgeArray &edges = all_edges[i][etype];
-      const IdArray mapped_rows = hashmaps[srctype].Map(edges.src, -1);
-      const IdArray mapped_cols = hashmaps[dsttype].Map(edges.dst, -1);
-      rel_graphs.push_back(UnitGraph::CreateFromCOO(
-          srctype == dsttype ? 1 : 2,
-          induced_nodes[srctype]->shape[0],
-          induced_nodes[dsttype]->shape[0],
-          mapped_rows,
-          mapped_cols));
-    }
-    new_graphs.push_back(CreateHeteroGraph(meta_graph, rel_graphs));
-  }
-  return std::make_pair(new_graphs, induced_nodes);
-}
 }  // namespace
 HeteroGraph::HeteroGraph(GraphPtr meta_graph, const std::vector<HeteroGraphPtr>& rel_graphs)
@@ -589,15 +526,6 @@ HeteroGraphPtr CreateFromCSR(
  return HeteroGraphPtr(new HeteroGraph(unit_g->meta_graph(), {unit_g}));
 }
-std::pair<std::vector<HeteroGraphPtr>, std::vector<IdArray>>
-CompactGraphs(const std::vector<HeteroGraphPtr> &graphs) {
-  std::pair<std::vector<HeteroGraphPtr>, std::vector<IdArray>> result;
-  ATEN_ID_TYPE_SWITCH(graphs[0]->DataType(), IdType, {
-    result = CompactGraphs<IdType>(graphs);
-  });
-  return result;
-}
 constexpr uint64_t kDGLSerialize_HeteroGraph = 0xDD589FBE35224ABF;
 bool HeteroGraph::Load(dmlc::Stream* fs) {
@@ -764,6 +692,12 @@ DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroClear")
    hg->Clear();
  });
+DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroDataType")
+.set_body([] (DGLArgs args, DGLRetValue* rv) {
+    HeteroGraphRef hg = args[0];
+    *rv = hg->DataType();
+  });
 DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroContext")
 .set_body([] (DGLArgs args, DGLRetValue* rv) {
    HeteroGraphRef hg = args[0];
@@ -996,31 +930,6 @@ DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroEdgeSubgraph")
    *rv = HeteroSubgraphRef(subg);
  });
-DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLCompactGraphs")
-.set_body([] (DGLArgs args, DGLRetValue* rv) {
-    List<HeteroGraphRef> graph_refs = args[0];
-    std::vector<HeteroGraphPtr> graphs;
-    for (HeteroGraphRef gref : graph_refs)
-      graphs.push_back(gref.sptr());
-    const auto &result_pair = CompactGraphs(graphs);
-    List<HeteroGraphRef> compacted_graph_refs;
-    List<Value> induced_nodes;
-    for (const HeteroGraphPtr g : result_pair.first)
-      compacted_graph_refs.push_back(HeteroGraphRef(g));
-    for (const IdArray &ids : result_pair.second)
-      induced_nodes.push_back(Value(MakeValue(ids)));
-    List<ObjectRef> result;
-    result.push_back(compacted_graph_refs);
-    result.push_back(induced_nodes);
-    *rv = result;
-  });
 DGL_REGISTER_GLOBAL("transform._CAPI_DGLInSubgraph")
 .set_body([] (DGLArgs args, DGLRetValue *rv) {
    HeteroGraphRef hg = args[0];

--- a/src/graph/sampling/randomwalks/randomwalks.cc
+++ b/src/graph/sampling/randomwalks/randomwalks.cc
@@ -35,7 +35,7 @@ void CheckRandomWalkInputs(
  for (uint64_t i = 0; i < prob.size(); ++i) {
    FloatArray p = prob[i];
    CHECK_FLOAT(p, "probability");
-    if (p.GetSize() == 0)
+    if (p.GetSize() != 0)
      CHECK_NDIM(p, 1, "probability");
  }
 }

--- a/src/graph/transform/compact.cc
+++ b/src/graph/transform/compact.cc
+/*!
+ *  Copyright (c) 2019 by Contributors
+ * \file graph/transform/compact.cc
+ * \brief Compact graph implementation
+ */
+#include <dgl/base_heterograph.h>
+#include <dgl/transform.h>
+#include <dgl/array.h>
+#include <dgl/packed_func_ext.h>
+#include <vector>
+#include <utility>
+#include "../../c_api_common.h"
+#include "../unit_graph.h"
+// TODO(BarclayII): currently CompactGraphs depend on IdHashMap implementation which
+// only works on CPU.  Should fix later to make it device agnostic.
+#include "../../array/cpu/array_utils.h"
+namespace dgl {
+using namespace dgl::runtime;
+using namespace dgl::aten;
+namespace transform {
+namespace {
+template<typename IdType>
+std::pair<std::vector<HeteroGraphPtr>, std::vector<IdArray>>
+CompactGraphs(
+    const std::vector<HeteroGraphPtr> &graphs,
+    const std::vector<IdArray> &always_preserve) {
+  // TODO(BarclayII): check whether the node space and metagraph of each graph is the same.
+  // Step 1: Collect the nodes that has connections for each type.
+  std::vector<aten::IdHashMap<IdType>> hashmaps(graphs[0]->NumVertexTypes());
+  std::vector<std::vector<EdgeArray>> all_edges(graphs.size());   // all_edges[i][etype]
+  for (size_t i = 0; i < always_preserve.size(); ++i)
+    hashmaps[i].Update(always_preserve[i]);
+  for (size_t i = 0; i < graphs.size(); ++i) {
+    const HeteroGraphPtr curr_graph = graphs[i];
+    const int64_t num_etypes = curr_graph->NumEdgeTypes();
+    for (IdType etype = 0; etype < num_etypes; ++etype) {
+      IdType srctype, dsttype;
+      std::tie(srctype, dsttype) = curr_graph->GetEndpointTypes(etype);
+      const EdgeArray edges = curr_graph->Edges(etype, "eid");
+      hashmaps[srctype].Update(edges.src);
+      hashmaps[dsttype].Update(edges.dst);
+      all_edges[i].push_back(edges);
+    }
+  }
+  // Step 2: Relabel the nodes for each type to a smaller ID space and save the mapping.
+  std::vector<IdArray> induced_nodes;
+  for (auto &hashmap : hashmaps)
+    induced_nodes.push_back(hashmap.Values());
+  // Step 3: Remap the edges of each graph.
+  std::vector<HeteroGraphPtr> new_graphs;
+  for (size_t i = 0; i < graphs.size(); ++i) {
+    std::vector<HeteroGraphPtr> rel_graphs;
+    const HeteroGraphPtr curr_graph = graphs[i];
+    const auto meta_graph = curr_graph->meta_graph();
+    const int64_t num_etypes = curr_graph->NumEdgeTypes();
+    for (IdType etype = 0; etype < num_etypes; ++etype) {
+      IdType srctype, dsttype;
+      std::tie(srctype, dsttype) = curr_graph->GetEndpointTypes(etype);
+      const EdgeArray &edges = all_edges[i][etype];
+      const IdArray mapped_rows = hashmaps[srctype].Map(edges.src, -1);
+      const IdArray mapped_cols = hashmaps[dsttype].Map(edges.dst, -1);
+      rel_graphs.push_back(UnitGraph::CreateFromCOO(
+          srctype == dsttype ? 1 : 2,
+          induced_nodes[srctype]->shape[0],
+          induced_nodes[dsttype]->shape[0],
+          mapped_rows,
+          mapped_cols));
+    }
+    new_graphs.push_back(CreateHeteroGraph(meta_graph, rel_graphs));
+  }
+  return std::make_pair(new_graphs, induced_nodes);
+}
+};  // namespace
+std::pair<std::vector<HeteroGraphPtr>, std::vector<IdArray>>
+CompactGraphs(
+    const std::vector<HeteroGraphPtr> &graphs,
+    const std::vector<IdArray> &always_preserve) {
+  std::pair<std::vector<HeteroGraphPtr>, std::vector<IdArray>> result;
+  // TODO(BarclayII): check for all IdArrays
+  CHECK(graphs[0]->DataType() == always_preserve[0]->dtype) << "data type mismatch.";
+  ATEN_ID_TYPE_SWITCH(graphs[0]->DataType(), IdType, {
+    result = CompactGraphs<IdType>(graphs, always_preserve);
+  });
+  return result;
+}
+DGL_REGISTER_GLOBAL("transform._CAPI_DGLCompactGraphs")
+.set_body([] (DGLArgs args, DGLRetValue* rv) {
+    List<HeteroGraphRef> graph_refs = args[0];
+    List<Value> always_preserve_refs = args[1];
+    std::vector<HeteroGraphPtr> graphs;
+    std::vector<IdArray> always_preserve;
+    for (HeteroGraphRef gref : graph_refs)
+      graphs.push_back(gref.sptr());
+    for (Value array : always_preserve_refs)
+      always_preserve.push_back(array->data);
+    const auto &result_pair = CompactGraphs(graphs, always_preserve);
+    List<HeteroGraphRef> compacted_graph_refs;
+    List<Value> induced_nodes;
+    for (const HeteroGraphPtr g : result_pair.first)
+      compacted_graph_refs.push_back(HeteroGraphRef(g));
+    for (const IdArray &ids : result_pair.second)
+      induced_nodes.push_back(Value(MakeValue(ids)));
+    List<ObjectRef> result;
+    result.push_back(compacted_graph_refs);
+    result.push_back(induced_nodes);
+    *rv = result;
+  });
+};  // namespace transform
+};  // namespace dgl