[Transform] Docstring and subframes (#1962)

* update knn graph docs * more docs * [Doc] transform module docstrings * remove copy_ndata and copy_edata * fix * lint * fix * fix * fix * clean up docstrings * fix docstring * dtype specifications * addresses comments * fix Co-authored-by: Mufei Li <mufeili1996@gmail.com> Co-authored-by: xiang song(charlie.song) <classicxsong@gmail.com>

[Transform] Docstring and subframes (#1962)
* update knn graph docs * more docs * [Doc] transform module docstrings * remove copy_ndata and copy_edata * fix * lint * fix * fix * fix * clean up docstrings * fix docstring * dtype specifications * addresses comments * fix Co-authored-by: Mufei Li <mufeili1996@gmail.com> Co-authored-by: xiang song(charlie.song) <classicxsong@gmail.com>
cd484352 · Quan (Andy) Gan · GitHub · 6294677f · cd484352 · cd484352
Unverified Commit cd484352 authored Aug 13, 2020 by Quan (Andy) Gan Committed by GitHub Aug 13, 2020
18 changed files
--- a/docs/source/api/python/dataloading.rst
+++ b/docs/source/api/python/dataloading.rst
-.. _api-sampling:
+.. _api-dataloading:
 dgl.dataloading
 =================================
 .. automodule:: dgl.dataloading
+DataLoaders
+-----------
 PyTorch node/edge DataLoaders
-----------------------------
+`````````````````````````````
+.. currentmodule:: dgl.dataloading.pytorch
-.. autoclass:: pytorch.NodeDataLoader
+.. autoclass:: NodeDataLoader
-.. autoclass:: pytorch.EdgeDataLoader
+.. autoclass:: EdgeDataLoader
 General collating functions
---------------------------
+```````````````````````````
+.. currentmodule:: dgl.dataloading
 .. autoclass:: Collator
+    :members: dataset, collate
 .. autoclass:: NodeCollator
+    :members: dataset, collate
 .. autoclass:: EdgeCollator
+    :members: dataset, collate
+Neighborhood Sampling Classes
+-----------------------------
 Base Multi-layer Neighborhood Sampling Class
--------------------------------------------
+````````````````````````````````````````````
 .. autoclass:: BlockSampler
+    :members: sample_frontier, sample_blocks
 Uniform Node-wise Neighbor Sampling (GraphSAGE style)
-----------------------------------------------------
+`````````````````````````````````````````````````````
 .. autoclass:: MultiLayerNeighborSampler
+    :members: sample_frontier
+.. _negative-sampling:
 Negative Samplers for Link Prediction
 -------------------------------------
-.. autoclass:: negative_sampler.Uniform
+.. currentmodule:: dgl.dataloading.negative_sampler
+.. autoclass:: Uniform
+    :members: __call__
--- a/docs/source/api/python/index.rst
+++ b/docs/source/api/python/index.rst
@@ -11,4 +11,4 @@ API Reference
   dgl.ops
   dgl.function
   sampling
-   dataloading
+   dgl.dataloading
--- a/docs/source/api/python/sampling.rst
+++ b/docs/source/api/python/sampling.rst
@@ -5,8 +5,6 @@ dgl.sampling
 .. automodule:: dgl.sampling
-Sampling algorithms on graphs.
 Random walk sampling functions
 ------------------------------

--- a/docs/source/api/python/transform.rst
+++ b/docs/source/api/python/transform.rst
-.. _api-transform:
-dgl.transform
-=================================
-.. automodule:: dgl.transform
-Common algorithms on graphs.
-.. autosummary::
-    :toctree: ../../generated/
-    line_graph
-    khop_adj
-    khop_graph
-    reverse
-    to_simple_graph
-    to_bidirected
-    laplacian_lambda_max
-    knn_graph
-    segmented_knn_graph
-    add_self_loop
-    remove_self_loop
-    metapath_reachable_graph
-    compact_graphs
-    to_block
-    to_simple
-    in_subgraph
-    out_subgraph
-    remove_edges
-    as_immutable_graph
-    as_heterograph
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -108,6 +108,7 @@ Getting Started
   api/python/dgl.ops
   api/python/dgl.function
   api/python/sampling
+   api/python/dgl.dataloading
 .. toctree::
   :maxdepth: 3

--- a/python/dgl/dataloading/__init__.py
+++ b/python/dgl/dataloading/__init__.py
@@ -3,10 +3,10 @@ computation dependency of necessary nodes with neighborhood sampling methods.
 This includes
-* :py:class:`~dgl.dataloading.pytorch.NodeDataLoader`` for iterating over the nodes in
+* :py:class:`~dgl.dataloading.pytorch.NodeDataLoader` for iterating over the nodes in
  a graph in minibatches.
-* :py:class:`~dgl.dataloading.pytorch.EdgeDataLoader`` for iterating over the edges in
+* :py:class:`~dgl.dataloading.pytorch.EdgeDataLoader` for iterating over the edges in
  a graph in minibatches.
 * Various sampler classes that perform neighborhood sampling for multi-layer GNNs.

--- a/python/dgl/dataloading/dataloader.py
+++ b/python/dgl/dataloading/dataloader.py
@@ -61,17 +61,18 @@ def _find_exclude_eids_with_reverse_types(g, eids, reverse_etype_map):
    return exclude_eids
 def _find_exclude_eids(g, exclude_mode, eids, **kwargs):
-    """Find all edge IDs to exclude according to ``exclude_mode``.
+    """Find all edge IDs to exclude according to :attr:`exclude_mode`.
    Parameters
    ----------
-    g : DGLHeteroGraph
+    g : DGLGraph
        The graph.
    exclude_mode : str, optional
        Can be either of the following,
        None (default)
            Does not exclude any edge.
        'reverse_id'
            Exclude all edges specified in ``eids``, as well as their reverse edges
            of the same edge type.
@@ -81,6 +82,7 @@ def _find_exclude_eids(g, exclude_mode, eids, **kwargs):
            This mode assumes that the reverse of an edge with ID ``e`` and type
            ``etype`` will have ID ``reverse_eid_map[e]`` and type ``etype``.
        'reverse_types'
            Exclude all edges specified in ``eids``, as well as their reverse
            edges of the corresponding edge types.
@@ -110,32 +112,32 @@ def _find_exclude_eids(g, exclude_mode, eids, **kwargs):
 class BlockSampler(object):
    """Abstract class specifying the neighborhood sampling strategy for DGL data loaders.
-    The main method for BlockSampler is :func:`~dgl.dataloading.BlockSampler.sample_blocks`,
+    The main method for BlockSampler is :meth:`sample_blocks`,
    which generates a list of blocks for a multi-layer GNN given a set of seed nodes to
    have their outputs computed.
-    The default implementation of :py:meth:`~dgl.dataloading.BlockSampler.sample_blocks` is
+    The default implementation of :meth:`sample_blocks` is
-    to repeat ``num_layers`` times the following procedure from the last layer to the first
+    to repeat :attr:`num_layers` times the following procedure from the last layer to the first
    layer:
    * Obtain a frontier.  The frontier is defined as a graph with the same nodes as the
      original graph but only the edges involved in message passing on the current layer.
-      Customizable via :py:meth:`~dgl.dataloading.BlockSampler.sample_frontier`.
+      Customizable via :meth:`sample_frontier`.
    * Optionally, if the task is link prediction or edge classfication, remove edges
      connecting training node pairs.  If the graph is undirected, also remove the
      reverse edges.  This is controlled by the argument :attr:`exclude_eids` in
-      :py:meth:``~dgl.dataloading.BlockSampler.sample_blocks`` method.
+      :meth:`sample_blocks` method.
    * Convert the frontier into a block.
    * Optionally assign the IDs of the edges in the original graph selected in the first step
      to the block, controlled by the argument ``return_eids`` in
-      :py:meth:``~dgl.dataloading.BlockSampler.sample_blocks`` method.
+      :meth:`sample_blocks` method.
    * Prepend the block to the block list to be returned.
-    All subclasses should override :py:meth:`~dgl.dataloading.BlockSampler.sample_frontier`
+    All subclasses should override :meth:`sample_frontier`
    method while specifying the number of layers to sample in :attr:`num_layers` argument.
    Parameters
@@ -148,7 +150,7 @@ class BlockSampler(object):
    Notes
    -----
-    For the concept of frontiers and blocks, please refer to User Guide Section 6.
+    For the concept of frontiers and blocks, please refer to User Guide Section 6 [TODO].
    """
    def __init__(self, num_layers, return_eids):
        self.num_layers = num_layers
@@ -157,11 +159,13 @@ class BlockSampler(object):
    def sample_frontier(self, block_id, g, seed_nodes):
        """Generate the frontier given the output nodes.
+        The subclasses should override this function.
        Parameters
        ----------
        block_id : int
            Represents which GNN layer the frontier is generated for.
-        g : DGLHeteroGraph
+        g : DGLGraph
            The original graph.
        seed_nodes : Tensor or dict[ntype, Tensor]
            The output nodes by node type.
@@ -171,12 +175,12 @@ class BlockSampler(object):
        Returns
        -------
-        DGLHeteroGraph
+        DGLGraph
            The frontier generated for the current layer.
-        See also
+        Notes
-        --------
+        -----
-        For the concept of frontiers and blocks, please refer to User Guide Section 6.
+        For the concept of frontiers and blocks, please refer to User Guide Section 6 [TODO].
        """
        raise NotImplementedError
@@ -185,7 +189,7 @@ class BlockSampler(object):
        Parameters
        ----------
-        g : DGLHeteroGraph
+        g : DGLGraph
            The original graph.
        seed_nodes : Tensor or dict[ntype, Tensor]
            The output nodes by node type.
@@ -197,12 +201,12 @@ class BlockSampler(object):
        Returns
        -------
-        list[DGLHeteroGraph]
+        list[DGLGraph]
            The blocks generated for computing the multi-layer GNN output.
-        See also
+        Notes
-        --------
+        -----
-        For the concept of frontiers and blocks, please refer to User Guide Section 6.
+        For the concept of frontiers and blocks, please refer to User Guide Section 6 [TODO].
        """
        blocks = []
        exclude_eids = (
@@ -248,13 +252,13 @@ class BlockSampler(object):
 class Collator(ABC):
    """Abstract DGL collator for training GNNs on downstream tasks stochastically.
-    Provides a ``dataset`` object containing the collection of all nodes or edges,
+    Provides a :attr:`dataset` object containing the collection of all nodes or edges,
-    as well as a ``collate`` method that combines a set of items from ``dataset`` and
+    as well as a :attr:`collate` method that combines a set of items from
-    obtains the blocks.
+    :attr:`dataset` and obtains the blocks.
-    See also
+    Notes
-    --------
+    -----
-    For the concept of blocks, please refer to User Guide Section 6.
+    For the concept of blocks, please refer to User Guide Section 6 [TODO].
    """
    @abstractproperty
    def dataset(self):
@@ -268,11 +272,11 @@ class Collator(ABC):
        Parameters
        ----------
        items : list[str, int]
-            The list of node or edge type-ID pairs.
+            The list of node or edge IDs or type-ID pairs.
-        See also
+        Notes
-        --------
+        -----
-        For the concept of blocks, please refer to User Guide Section 6.
+        For the concept of blocks, please refer to User Guide Section 6 [TODO].
        """
        raise NotImplementedError
@@ -282,7 +286,7 @@ class NodeCollator(Collator):
    Parameters
    ----------
-    g : DGLHeteroGraph
+    g : DGLGraph
        The graph.
    nids : Tensor or dict[ntype, Tensor]
        The node set to compute outputs.
@@ -324,6 +328,12 @@ class NodeCollator(Collator):
        """Find the list of blocks necessary for computing the representation of given
        nodes for a node classification/regression task.
+        Parameters
+        ----------
+        items : list[int] or list[tuple[str, int]]
+            Either a list of node IDs (for homogeneous graphs), or a list of node type-ID
+            pairs (for heterogeneous graphs).
        Returns
        -------
        input_nodes : Tensor or dict[ntype, Tensor]
@@ -336,7 +346,7 @@ class NodeCollator(Collator):
            If the original graph has multiple node types, return a dictionary of
            node type names and node ID tensors.  Otherwise, return a single tensor.
-        blocks : list[DGLHeteroGraph]
+        blocks : list[DGLGraph]
            The list of blocks necessary for computing the representation.
        """
        if isinstance(items[0], tuple):
@@ -369,14 +379,14 @@ class EdgeCollator(Collator):
    Parameters
    ----------
-    g : DGLHeteroGraph
+    g : DGLGraph
        The graph from which the edges are iterated in minibatches and the subgraphs
        are generated.
    eids : Tensor or dict[etype, Tensor]
        The edge set in graph :attr:`g` to compute outputs.
    block_sampler : dgl.dataloading.BlockSampler
        The neighborhood sampler.
-    g_sampling : DGLHeteroGraph, optional
+    g_sampling : DGLGraph, optional
        The graph where neighborhood sampling and message passing is performed.
        Note that this is not necessarily the same as :attr:`g`.
@@ -425,7 +435,7 @@ class EdgeCollator(Collator):
          or a dictionary of edge types and such pairs if the graph is heterogenenous.
        A set of builtin negative samplers are provided in
-        :py:mod:`dgl.dataloading.negative_sampler`.
+        :ref:`the negative sampling module <negative-sampling>`.
    Examples
    --------
@@ -613,6 +623,12 @@ class EdgeCollator(Collator):
        """Combines the sampled edges into a minibatch for edge classification, edge
        regression, and link prediction tasks.
+        Parameters
+        ----------
+        items : list[int] or list[tuple[str, int]]
+            Either a list of edge IDs (for homogeneous graphs), or a list of edge type-ID
+            pairs (for heterogeneous graphs).
        Returns
        -------
        Either ``(input_nodes, pair_graph, blocks)``, or
@@ -624,19 +640,19 @@ class EdgeCollator(Collator):
            If the original graph has multiple node types, return a dictionary of
            node type names and node ID tensors.  Otherwise, return a single tensor.
-        pair_graph : DGLHeteroGraph
+        pair_graph : DGLGraph
            The graph that contains only the edges in the minibatch as well as their incident
            nodes.
            Note that the metagraph of this graph will be identical to that of the original
            graph.
-        negative_pair_graph : DGLHeteroGraph
+        negative_pair_graph : DGLGraph
            The graph that contains only the edges connecting the source and destination nodes
            yielded from the given negative sampler, if negative sampling is enabled.
            Note that the metagraph of this graph will be identical to that of the original
            graph.
-        blocks : list[DGLHeteroGraph]
+        blocks : list[DGLGraph]
            The list of blocks necessary for computing the representation of the edges.
        """
        if self.negative_sampler is None:

--- a/python/dgl/dataloading/negative_sampler.py
+++ b/python/dgl/dataloading/negative_sampler.py
@@ -11,7 +11,7 @@ class _BaseNegativeSampler(object):
        Parameters
        ----------
-        g : DGLHeteroGraph
+        g : DGLGraph
            The graph.
        eids : Tensor or dict[etype, Tensor]
            The sampled edges in the minibatch.

--- a/python/dgl/dataloading/pytorch/__init__.py
+++ b/python/dgl/dataloading/pytorch/__init__.py
@@ -9,14 +9,14 @@ class NodeDataLoader(DataLoader):
    Parameters
    ----------
-    g : DGLHeteroGraph
+    g : DGLGraph
        The graph.
    nids : Tensor or dict[ntype, Tensor]
        The node set to compute outputs.
-    block_sampler : :py:class:`~dgl.dataloading.BlockSampler`
+    block_sampler : dgl.dataloading.BlockSampler
        The neighborhood sampler.
    kwargs : dict
-        Arguments being passed to ``torch.utils.data.DataLoader``.
+        Arguments being passed to :py:class:`torch.utils.data.DataLoader`.
    Examples
    --------
@@ -52,13 +52,13 @@ class EdgeDataLoader(DataLoader):
    Parameters
    ----------
-    g : DGLHeteroGraph
+    g : DGLGraph
        The graph.
    nids : Tensor or dict[ntype, Tensor]
        The node set to compute outputs.
-    block_sampler : :py:class:`~dgl.dataloading.BlockSampler`
+    block_sampler : dgl.dataloading.BlockSampler
        The neighborhood sampler.
-    g_sampling : DGLHeteroGraph, optional
+    g_sampling : DGLGraph, optional
        The graph where neighborhood sampling is performed.
        One may wish to iterate over the edges in one graph while perform sampling in
@@ -72,20 +72,28 @@ class EdgeDataLoader(DataLoader):
        minibatch.  Possible values are
        * None,
-        * ``reverse``,
+        * ``reverse_id``,
        * ``reverse_types``
-        See the docstring in :py:class:`~dgl.dataloading.EdgeCollator`.
+        See the description of the argument with the same name in the docstring of
+        :class:`~dgl.dataloading.EdgeCollator` for more details.
    reverse_edge_ids : Tensor or dict[etype, Tensor], optional
-        See the docstring in :py:class:`~dgl.dataloading.EdgeCollator`.
+        The mapping from the original edge IDs to the ID of their reverse edges.
+        See the description of the argument with the same name in the docstring of
+        :class:`~dgl.dataloading.EdgeCollator` for more details.
    reverse_etypes : dict[etype, etype], optional
-        See the docstring in :py:class:`~dgl.dataloading.EdgeCollator`.
+        The mapping from the original edge types to their reverse edge types.
+        See the description of the argument with the same name in the docstring of
+        :class:`~dgl.dataloading.EdgeCollator` for more details.
    negative_sampler : callable, optional
        The negative sampler.
-        See the docstring in :py:class:`~dgl.dataloading.EdgeCollator`.
+        See the description of the argument with the same name in the docstring of
+        :class:`~dgl.dataloading.EdgeCollator` for more details.
    kwargs : dict
-        Arguments being passed to `torch.utils.data.DataLoader`.
+        Arguments being passed to :py:class:`torch.utils.data.DataLoader`.
    Examples
    --------
@@ -167,7 +175,7 @@ class EdgeDataLoader(DataLoader):
    See also
    --------
-    :py:class:`~dgl.dataloading.EdgeCollator`
+    :class:`~dgl.dataloading.EdgeCollator`
    For end-to-end usages, please refer to the following tutorial/examples:

--- a/python/dgl/partition.py
+++ b/python/dgl/partition.py
@@ -216,12 +216,12 @@ def metis_partition_assignment(g, k, balance_ntypes=None, balance_edges=False):
        vwgt = F.stack(vwgt, 1)
        shape = (np.prod(F.shape(vwgt),),)
        vwgt = F.reshape(vwgt, shape)
-        vwgt = F.zerocopy_to_dgl_ndarray(vwgt)
+        vwgt = F.to_dgl_nd(vwgt)
        print(
            'Construct multi-constraint weights: {:.3f} seconds'.format(time.time() - start))
    else:
        vwgt = F.zeros((0,), F.int64, F.cpu())
-        vwgt = F.zerocopy_to_dgl_ndarray(vwgt)
+        vwgt = F.to_dgl_nd(vwgt)
    start = time.time()
    node_part = _CAPI_DGLMetisPartition_Hetero(sym_g._graph, k, vwgt)

--- a/python/dgl/sampling/__init__.py
+++ b/python/dgl/sampling/__init__.py
-"""Sampling operators.
+"""This module contains the implementations of various sampling operators.
-This module contains the implementations of various sampling operators.
 """
 from .randomwalks import *
 from .pinsage import *

--- a/python/dgl/sampling/neighbor.py
+++ b/python/dgl/sampling/neighbor.py
@@ -24,7 +24,7 @@ def sample_neighbors(g, nodes, fanout, edge_dir='in', prob=None, replace=False):
    Parameters
    ----------
    g : DGLGraph
-        The graph
+        The graph.  Must be on CPU.
    nodes : tensor or dict
        Node IDs to sample neighbors from.
@@ -57,7 +57,7 @@ def sample_neighbors(g, nodes, fanout, edge_dir='in', prob=None, replace=False):
    Returns
    -------
    DGLGraph
-        A sampled subgraph containing only the sampled neighboring edges.
+        A sampled subgraph containing only the sampled neighboring edges.  It is on CPU.
    Examples
    --------
@@ -95,6 +95,8 @@ def sample_neighbors(g, nodes, fanout, edge_dir='in', prob=None, replace=False):
        if len(g.ntypes) > 1:
            raise DGLError("Must specify node type when the graph is not homogeneous.")
        nodes = {g.ntypes[0] : nodes}
+    assert g.device == F.cpu(), "Graph must be on CPU."
    nodes = utils.prepare_tensor_dict(g, nodes, 'nodes')
    nodes_all_types = []
    for ntype in g.ntypes:
@@ -147,7 +149,7 @@ def select_topk(g, k, weight, nodes=None, edge_dir='in', ascending=False):
    Parameters
    ----------
    g : DGLGraph
-        The graph
+        The graph.  Must be on CPU.
    k : int or dict[etype, int]
        The number of edges to be selected for each node on each edge type.
@@ -178,7 +180,7 @@ def select_topk(g, k, weight, nodes=None, edge_dir='in', ascending=False):
    Returns
    -------
    DGLGraph
-        A sampled subgraph containing only the sampled neighboring edges.
+        A sampled subgraph containing only the sampled neighboring edges.  It is on CPU.
    Examples
    --------
@@ -195,6 +197,7 @@ def select_topk(g, k, weight, nodes=None, edge_dir='in', ascending=False):
        if len(g.ntypes) > 1:
            raise DGLError("Must specify node type when the graph is not homogeneous.")
        nodes = {g.ntypes[0] : nodes}
+    assert g.device == F.cpu(), "Graph must be on CPU."
    # Parse nodes into a list of NDArrays.
    nodes = utils.prepare_tensor_dict(g, nodes, 'nodes')

--- a/python/dgl/sampling/pinsage.py
+++ b/python/dgl/sampling/pinsage.py
@@ -8,6 +8,7 @@ from .. import transform
 from .randomwalks import random_walk
 from .neighbor import select_topk
 from ..base import EID
+from .. import utils
 class RandomWalkNeighborSampler(object):
@@ -29,7 +30,7 @@ class RandomWalkNeighborSampler(object):
    Parameters
    ----------
    G : DGLGraph
-        The graph.
+        The graph.  It must be on CPU.
    num_traversals : int
        The maximum number of metapath-based traversals for a single random walk.
@@ -53,24 +54,13 @@ class RandomWalkNeighborSampler(object):
        The name of the edge feature to be stored on the returned graph with the number of
        visits.
-    Inputs
-    ------
-    seed_nodes : Tensor
-        A tensor of given node IDs of node type ``ntype`` to generate neighbors from.  The
-        node type ``ntype`` is the beginning and ending node type of the given metapath.
-    Outputs
-    -------
-    g : DGLGraph
-        A homogeneous graph constructed by selecting neighbors for each given node according
-        to the algorithm above.
    Examples
    --------
    See examples in :any:`PinSAGESampler`.
    """
    def __init__(self, G, num_traversals, termination_prob,
                 num_random_walks, num_neighbors, metapath=None, weight_column='weights'):
+        assert G.device == F.cpu(), "Graph must be on CPU."
        self.G = G
        self.weight_column = weight_column
        self.num_random_walks = num_random_walks
@@ -96,6 +86,23 @@ class RandomWalkNeighborSampler(object):
    # pylint: disable=no-member
    def __call__(self, seed_nodes):
+        """
+        Parameters
+        ----------
+        seed_nodes : Tensor
+            A tensor of given node IDs of node type ``ntype`` to generate neighbors from.  The
+            node type ``ntype`` is the beginning and ending node type of the given metapath.
+            It must be on CPU and have the same dtype as the ID type of the graph.
+        Returns
+        -------
+        g : DGLGraph
+            A homogeneous graph constructed by selecting neighbors for each given node according
+            to the algorithm above.  The returned graph is on CPU.
+        """
+        seed_nodes = utils.prepare_tensor(self.G, seed_nodes, 'seed_nodes')
        seed_nodes = F.repeat(seed_nodes, self.num_random_walks, 0)
        paths, _ = random_walk(
            self.G, seed_nodes, metapath=self.full_metapath, restart_prob=self.restart_prob)
@@ -163,17 +170,6 @@ class PinSAGESampler(RandomWalkNeighborSampler):
        The name of the edge feature to be stored on the returned graph with the number of
        visits.
-    Inputs
-    ------
-    seed_nodes : Tensor
-        A tensor of given node IDs of node type ``ntype`` to generate neighbors from.
-    Outputs
-    -------
-    g : DGLHeteroGraph
-        A homogeneous graph constructed by selecting neighbors for each given node according
-        to PinSage algorithm.
    Examples
    --------
    Generate a random bidirectional bipartite graph with 3000 "A" nodes and 5000 "B" nodes.

--- a/python/dgl/sampling/randomwalks.py
+++ b/python/dgl/sampling/randomwalks.py
@@ -29,22 +29,25 @@ def random_walk(g, nodes, *, metapath=None, length=None, prob=None, restart_prob
    Parameters
    ----------
    g : DGLGraph
-        The graph.
+        The graph.  Must be on CPU.
    nodes : Tensor
        Node ID tensor from which the random walk traces starts.
+        The tensor must be on CPU, and must have the same dtype as the ID type
+        of the graph.
    metapath : list[str or tuple of str], optional
        Metapath, specified as a list of edge types.
-        Mutually exclusive with ``length``.
+        Mutually exclusive with :attr:`length`.
        If omitted, DGL assumes that ``g`` only has one node & edge type.  In this
        case, the argument ``length`` specifies the length of random walk traces.
    length : int, optional
        Length of random walks.
-        Mutually exclusive with ``metapath``.
+        Mutually exclusive with :attr:`metapath`.
-        Only used when ``metapath`` is None.
+        Only used when :attr:`metapath` is None.
    prob : str, optional
        The name of the edge feature tensor on the graph storing the (unnormalized)
        probabilities associated with each edge for choosing the next node.
@@ -57,16 +60,23 @@ def random_walk(g, nodes, *, metapath=None, length=None, prob=None, restart_prob
    restart_prob : float or Tensor, optional
        Probability to terminate the current trace before each transition.
-        If a tensor is given, ``restart_prob`` should have the same length as ``metapath``.
+        If a tensor is given, :attr:`restart_prob` should have the same length as
+        :attr:`metapath` or :attr:`length`.
    Returns
    -------
    traces : Tensor
-        A 2-dimensional node ID tensor with shape ``(num_seeds, len(metapath) + 1)``.
+        A 2-dimensional node ID tensor with shape ``(num_seeds, len(metapath) + 1)`` or
+        ``(num_seeds, length + 1)`` if :attr:`metapath` is None.
    types : Tensor
-        A 1-dimensional node type ID tensor with shape ``(len(metapath) + 1)``.
+        A 1-dimensional node type ID tensor with shape ``(len(metapath) + 1)`` or
+        ``(length + 1)``.
        The type IDs match the ones in the original graph ``g``.
+    Notes
+    -----
+    The returned tensors are on CPU.
    Examples
    --------
    The following creates a homogeneous graph:
@@ -126,6 +136,7 @@ def random_walk(g, nodes, *, metapath=None, length=None, prob=None, restart_prob
             [ 2,  0,  1,  1,  3,  2,  2],
             [ 0,  1,  1,  3,  0,  0,  0]]), tensor([0, 0, 1, 0, 0, 1, 0]))
    """
+    assert g.device == F.cpu(), "Graph must be on CPU."
    n_etypes = len(g.canonical_etypes)
    n_ntypes = len(g.ntypes)
@@ -139,8 +150,8 @@ def random_walk(g, nodes, *, metapath=None, length=None, prob=None, restart_prob
        metapath = [g.get_etype_id(etype) for etype in metapath]
    gidx = g._graph
-    nodes = utils.toindex(nodes, g._idtype_str).todgltensor()
+    nodes = F.to_dgl_nd(utils.prepare_tensor(g, nodes, 'nodes'))
-    metapath = utils.toindex(metapath, g._idtype_str).todgltensor().copyto(nodes.ctx)
+    metapath = F.to_dgl_nd(utils.prepare_tensor(g, metapath, 'metapath'))
    # Load the probability tensor from the edge frames
    if prob is None:
@@ -149,7 +160,7 @@ def random_walk(g, nodes, *, metapath=None, length=None, prob=None, restart_prob
        p_nd = []
        for etype in g.canonical_etypes:
            if prob in g.edges[etype].data:
-                prob_nd = F.zerocopy_to_dgl_ndarray(g.edges[etype].data[prob])
+                prob_nd = F.to_dgl_nd(g.edges[etype].data[prob])
                if prob_nd.ctx != nodes.ctx:
                    raise ValueError(
                        'context of seed node array and edges[%s].data[%s] are different' %
@@ -162,15 +173,15 @@ def random_walk(g, nodes, *, metapath=None, length=None, prob=None, restart_prob
    if restart_prob is None:
        traces, types = _CAPI_DGLSamplingRandomWalk(gidx, nodes, metapath, p_nd)
    elif F.is_tensor(restart_prob):
-        restart_prob = F.zerocopy_to_dgl_ndarray(restart_prob)
+        restart_prob = F.to_dgl_nd(restart_prob)
        traces, types = _CAPI_DGLSamplingRandomWalkWithStepwiseRestart(
            gidx, nodes, metapath, p_nd, restart_prob)
    else:
        traces, types = _CAPI_DGLSamplingRandomWalkWithRestart(
            gidx, nodes, metapath, p_nd, restart_prob)
-    traces = F.zerocopy_from_dgl_ndarray(traces)
+    traces = F.from_dgl_nd(traces)
-    types = F.zerocopy_from_dgl_ndarray(types)
+    types = F.from_dgl_nd(types)
    return traces, types
 def pack_traces(traces, types):
@@ -181,9 +192,9 @@ def pack_traces(traces, types):
    Parameters
    ----------
    traces : Tensor
-        A 2-dimensional node ID tensor.
+        A 2-dimensional node ID tensor.  Must be on CPU and either ``int32`` or ``int64``.
    types : Tensor
-        A 1-dimensional node type ID tensor.
+        A 1-dimensional node type ID tensor.  Must be on CPU and either ``int32`` or ``int64``.
    Returns
    -------
@@ -197,6 +208,10 @@ def pack_traces(traces, types):
    offsets : Tensor
        Offset of each trace in the originial traces tensor in the new concatenated tensor.
+    Notes
+    -----
+    The returned tensors are on CPU.
    Examples
    --------
    >>> g2 = dgl.heterograph({
@@ -233,15 +248,17 @@ def pack_traces(traces, types):
    >>> vids[1], vtypes[1]
    (tensor([0, 1, 1, 3, 0, 0, 0]), tensor([0, 0, 1, 0, 0, 1, 0]))
    """
-    traces = F.zerocopy_to_dgl_ndarray(traces)
+    assert F.is_tensor(traces) and F.context(traces) == F.cpu(), "traces must be a CPU tensor"
-    types = F.zerocopy_to_dgl_ndarray(types)
+    assert F.is_tensor(types) and F.context(types) == F.cpu(), "types must be a CPU tensor"
+    traces = F.to_dgl_nd(traces)
+    types = F.to_dgl_nd(types)
    concat_vids, concat_types, lengths, offsets = _CAPI_DGLSamplingPackTraces(traces, types)
-    concat_vids = F.zerocopy_from_dgl_ndarray(concat_vids)
+    concat_vids = F.from_dgl_nd(concat_vids)
-    concat_types = F.zerocopy_from_dgl_ndarray(concat_types)
+    concat_types = F.from_dgl_nd(concat_types)
-    lengths = F.zerocopy_from_dgl_ndarray(lengths)
+    lengths = F.from_dgl_nd(lengths)
-    offsets = F.zerocopy_from_dgl_ndarray(offsets)
+    offsets = F.from_dgl_nd(offsets)
    return concat_vids, concat_types, lengths, offsets

--- a/python/dgl/subgraph.py
+++ b/python/dgl/subgraph.py
@@ -43,7 +43,7 @@ def node_subgraph(graph, nodes):
    Returns
    -------
-    G : DGLHeteroGraph
+    G : DGLGraph
        The subgraph.
        The nodes and edges in the subgraph are relabeled using consecutive
@@ -59,16 +59,18 @@ def node_subgraph(graph, nodes):
    Instantiate a heterograph.
-    >>> plays_g = dgl.bipartite(([0, 1, 1, 2], [0, 0, 2, 1]), 'user', 'plays', 'game')
+    >>> g = dgl.heterograph({
-    >>> follows_g = dgl.graph(([0, 1, 1], [1, 2, 2]), 'user', 'follows')
+    ...     ('user', 'plays', 'game'): ([0, 1, 1, 2], [0, 0, 2, 1]),
-    >>> g = dgl.hetero_from_relations([plays_g, follows_g])
+    ...     ('user', 'follows', 'user'): ([0, 1, 1], [1, 2, 2])})
    >>> # Set node features
    >>> g.nodes['user'].data['h'] = torch.tensor([[0.], [1.], [2.]])
    Get subgraphs.
    >>> g.subgraph({'user': [4, 5]})
-    An error occurs as these nodes do not exist.
+    Traceback (most recent call last):
+        ...
+    dgl._ffi.base.DGLError: ...
    >>> sub_g = g.subgraph({'user': [1, 2]})
    >>> print(sub_g)
    Graph(num_nodes={'user': 2, 'game': 0},
@@ -158,7 +160,7 @@ def edge_subgraph(graph, edges, preserve_nodes=False):
    Returns
    -------
-    G : DGLHeteroGraph
+    G : DGLGraph
        The subgraph.
        The nodes and edges are relabeled using consecutive integers from 0.
@@ -173,16 +175,18 @@ def edge_subgraph(graph, edges, preserve_nodes=False):
    Instantiate a heterograph.
-    >>> plays_g = dgl.bipartite(([0, 1, 1, 2], [0, 0, 2, 1]), 'user', 'plays', 'game')
+    >>> g = dgl.heterograph({
-    >>> follows_g = dgl.graph(([0, 1, 1], [1, 2, 2]), 'user', 'follows')
+    ...     ('user', 'plays', 'game'): ([0, 1, 1, 2], [0, 0, 2, 1]),
-    >>> g = dgl.hetero_from_relations([plays_g, follows_g])
+    ...     ('user', 'follows', 'user'): ([0, 1, 1], [1, 2, 2])})
    >>> # Set edge features
    >>> g.edges['follows'].data['h'] = torch.tensor([[0.], [1.], [2.]])
    Get subgraphs.
    >>> g.edge_subgraph({('user', 'follows', 'user'): [5, 6]})
-    An error occurs as these edges do not exist.
+    Traceback (most recent call last):
+        ...
+    dgl._ffi.base.DGLError: ...
    >>> sub_g = g.edge_subgraph({('user', 'follows', 'user'): [1, 2],
    >>>                          ('user', 'plays', 'game'): [2]})
    >>> print(sub_g)
@@ -244,15 +248,18 @@ def edge_subgraph(graph, edges, preserve_nodes=False):
 DGLHeteroGraph.edge_subgraph = edge_subgraph
 def in_subgraph(g, nodes):
-    """Extract the subgraph containing only the in edges of the given nodes.
+    """Return the subgraph induced on the inbound edges of all edge types of the
+    given nodes.
-    The subgraph keeps the same type schema and the cardinality of the original one.
+    All the nodes are preserved regardless of whether they have an edge or not.
-    Node/edge features are not preserved. The original IDs
-    the extracted edges are stored as the `dgl.EID` feature in the returned graph.
+    The metagraph of the returned subgraph is the same as the parent graph.
+    Features are copied from the original graph.
    Parameters
    ----------
-    g : DGLHeteroGraph
+    g : DGLGraph
        Full graph structure.
    nodes : tensor or dict
        Node ids to sample neighbors from. The allowed types
@@ -261,8 +268,53 @@ def in_subgraph(g, nodes):
    Returns
    -------
-    DGLHeteroGraph
+    DGLGraph
        The subgraph.
+        One can retrieve the mapping from subgraph edge ID to parent
+        edge ID via ``dgl.EID`` edge features of the subgraph.
+    Examples
+    --------
+    The following example uses PyTorch backend.
+    Instantiate a heterograph.
+    >>> g = dgl.heterograph({
+    ...     ('user', 'plays', 'game'): ([0, 1, 1, 2], [0, 0, 2, 1]),
+    ...     ('user', 'follows', 'user'): ([0, 1, 1], [1, 2, 2])})
+    >>> # Set edge features
+    >>> g.edges['follows'].data['h'] = torch.tensor([[0.], [1.], [2.]])
+    Get subgraphs.
+    >>> sub_g = g.in_subgraph({'user': [2], 'game': [2]})
+    >>> print(sub_g)
+    Graph(num_nodes={'game': 3, 'user': 3},
+          num_edges={('user', 'plays', 'game'): 1, ('user', 'follows', 'user'): 2},
+          metagraph=[('user', 'game', 'plays'), ('user', 'user', 'follows')])
+    Get the original node/edge indices.
+    >>> sub_g.edges['plays'].data[dgl.EID]
+    tensor([2])
+    >>> sub_g.edges['follows'].data[dgl.EID]
+    tensor([1, 2])
+    Get the copied edge features.
+    >>> sub_g.edges['follows'].data['h']
+    tensor([[1.],
+            [2.]])
+    >>> sub_g.edges['follows'].data['h'] += 1
+    >>> g.edges['follows'].data['h']          # Features are not shared.
+    tensor([[0.],
+            [1.],
+            [2.]])
+    See also
+    --------
+    out_subgraph
    """
    if g.is_block:
        raise DGLError('Extracting subgraph of a block graph is not allowed.')
@@ -285,15 +337,18 @@ def in_subgraph(g, nodes):
 DGLHeteroGraph.in_subgraph = in_subgraph
 def out_subgraph(g, nodes):
-    """Extract the subgraph containing only the out edges of the given nodes.
+    """Return the subgraph induced on the outbound edges of all edge types of the
+    given nodes.
-    The subgraph keeps the same type schema and the cardinality of the original one.
+    All the nodes are preserved regardless of whether they have an edge or not.
-    Node/edge features are not preserved. The original IDs
-    the extracted edges are stored as the `dgl.EID` feature in the returned graph.
+    The metagraph of the returned subgraph is the same as the parent graph.
+    Features are copied from the original graph.
    Parameters
    ----------
-    g : DGLHeteroGraph
+    g : DGLGraph
        Full graph structure.
    nodes : tensor or dict
        Node ids to sample neighbors from. The allowed types
@@ -302,8 +357,53 @@ def out_subgraph(g, nodes):
    Returns
    -------
-    DGLHeteroGraph
+    DGLGraph
        The subgraph.
+        One can retrieve the mapping from subgraph edge ID to parent
+        edge ID via ``dgl.EID`` edge features of the subgraph.
+    Examples
+    --------
+    The following example uses PyTorch backend.
+    Instantiate a heterograph.
+    >>> g = dgl.heterograph({
+    ...     ('user', 'plays', 'game'): ([0, 1, 1, 2], [0, 0, 2, 1]),
+    ...     ('user', 'follows', 'user'): ([0, 1, 1], [1, 2, 2])})
+    >>> # Set edge features
+    >>> g.edges['follows'].data['h'] = torch.tensor([[0.], [1.], [2.]])
+    Get subgraphs.
+    >>> sub_g = g.out_subgraph({'user': [1]})
+    >>> print(sub_g)
+    Graph(num_nodes={'game': 3, 'user': 3},
+          num_edges={('user', 'plays', 'game'): 2, ('user', 'follows', 'user'): 2},
+          metagraph=[('user', 'game', 'plays'), ('user', 'user', 'follows')])
+    Get the original node/edge indices.
+    >>> sub_g.edges['plays'].data[dgl.EID]
+    tensor([1, 2])
+    >>> sub_g.edges['follows'].data[dgl.EID]
+    tensor([1, 2])
+    Get the copied edge features.
+    >>> sub_g.edges['follows'].data['h']
+    tensor([[1.],
+            [2.]])
+    >>> sub_g.edges['follows'].data['h'] += 1
+    >>> g.edges['follows'].data['h']          # Features are not shared.
+    tensor([[0.],
+            [1.],
+            [2.]])
+    See also
+    --------
+    in_subgraph
    """
    if g.is_block:
        raise DGLError('Extracting subgraph of a block graph is not allowed.')
@@ -342,7 +442,7 @@ def node_type_subgraph(graph, ntypes):
    Returns
    -------
-    G : DGLHeteroGraph
+    G : DGLGraph
        The subgraph.
    Examples
@@ -351,9 +451,9 @@ def node_type_subgraph(graph, ntypes):
    Instantiate a heterograph.
-    >>> plays_g = dgl.bipartite(([0, 1, 1, 2], [0, 0, 2, 1]), 'user', 'plays', 'game')
+    >>> g = dgl.heterograph({
-    >>> follows_g = dgl.graph(([0, 1, 1], [1, 2, 2]), 'user', 'follows')
+    ...     ('user', 'plays', 'game'): ([0, 1, 1, 2], [0, 0, 2, 1]),
-    >>> g = dgl.hetero_from_relations([plays_g, follows_g])
+    ...     ('user', 'follows', 'user'): ([0, 1, 1], [1, 2, 2])})
    >>> # Set node features
    >>> g.nodes['user'].data['h'] = torch.tensor([[0.], [1.], [2.]])
@@ -409,7 +509,7 @@ def edge_type_subgraph(graph, etypes):
    Returns
    -------
-    G : DGLHeteroGraph
+    G : DGLGraph
        The subgraph.
    Examples
@@ -418,9 +518,9 @@ def edge_type_subgraph(graph, etypes):
    Instantiate a heterograph.
-    >>> plays_g = dgl.bipartite(([0, 1, 1, 2], [0, 0, 2, 1]), 'user', 'plays', 'game')
+    >>> g = dgl.heterograph({
-    >>> follows_g = dgl.graph(([0, 1, 1], [1, 2, 2]), 'user', 'follows')
+    ...     ('user', 'plays', 'game'): ([0, 1, 1, 2], [0, 0, 2, 1]),
-    >>> g = dgl.hetero_from_relations([plays_g, follows_g])
+    ...     ('user', 'follows', 'user'): ([0, 1, 1], [1, 2, 2])})
    >>> # Set edge features
    >>> g.edges['follows'].data['h'] = torch.tensor([[0.], [1.], [2.]])
@@ -495,9 +595,10 @@ def _create_hetero_subgraph(parent, sgi, induced_nodes, induced_edges):
    DGLGraph
        Graph
    """
-    node_frames, edge_frames = utils.extract_subframes(parent, induced_nodes, induced_edges)
+    node_frames = utils.extract_node_subframes(parent, induced_nodes)
-    hsg = DGLHeteroGraph(sgi.graph, parent.ntypes, parent.etypes,
+    edge_frames = utils.extract_edge_subframes(parent, induced_edges)
-                         node_frames, edge_frames)
+    hsg = DGLHeteroGraph(sgi.graph, parent.ntypes, parent.etypes)
+    utils.set_new_frames(hsg, node_frames=node_frames, edge_frames=edge_frames)
    return hsg
 _init_api("dgl.subgraph")
--- a/python/dgl/transform.py
+++ b/python/dgl/transform.py
@@ -6,7 +6,7 @@ import numpy as np
 from scipy import sparse
 from ._ffi.function import _init_api
-from .base import EID, NID, dgl_warning, DGLError, is_internal_column
+from .base import dgl_warning, DGLError
 from . import convert
 from .heterograph import DGLHeteroGraph, DGLBlock
 from . import ndarray as nd
@@ -59,29 +59,75 @@ def pairwise_squared_distance(x):
 #pylint: disable=invalid-name
 def knn_graph(x, k):
-    """Transforms the given point set to a directed graph, whose coordinates
+    """Convert a tensor into k-nearest-neighbor (KNN) graph(s) according
-    are given as a matrix. The predecessors of each point are its k-nearest
+    to Euclidean distance.
-    neighbors.
-    If a 3D tensor is given instead, then each row would be transformed into
+    The function transforms the coordinates/features of a point set
-    a separate graph.  The graphs will be unioned.
+    into a directed homogeneous graph.  The coordinates of the point
+    set is specified as a matrix whose rows correspond to points and
+    columns correspond to coordinate/feature dimensions.
+    The nodes of the returned graph correspond to the points.  An edge
+    exists if the source node is one of the k-nearest neighbors of the
+    destination node.
+    If you give a 3D tensor, then each submatrix will be transformed
+    into a separate graph.  DGL then composes the graphs into a large
+    graph of multiple connected components.
    Parameters
    ----------
-    x : Tensor
+    x : 2D or 3D Tensor
-        The input tensor.
+        The input tensor.  It can be either on CPU or GPU.
-        If 2D, each row of ``x`` corresponds to a node.
+        * If 2D, ``x[i]`` corresponds to the i-th node in the KNN graph.
-        If 3D, a k-NN graph would be constructed for each row.  Then
+        * If 3D, ``x[i]`` corresponds to the i-th KNN graph and
-        the graphs are unioned.
+          ``x[i][j]`` corresponds to the j-th node in the i-th KNN graph.
    k : int
-        The number of neighbors
+        The number of nearest neighbors per node.
    Returns
    -------
    DGLGraph
-        The graph.  The node IDs are in the same order as ``x``.
+        The graph. The node IDs are in the same order as :attr:`x`.
+        The returned graph is on CPU, regardless of the context of input :attr:`x`.
+    Examples
+    --------
+    The following examples use PyTorch backend.
+    >>> import dgl
+    >>> import torch
+    When :attr:`x` is a 2D tensor, a single KNN graph is constructed.
+    >>> x = torch.tensor([[0.0, 0.0, 1.0],
+    ...                   [1.0, 0.5, 0.5],
+    ...                   [0.5, 0.2, 0.2],
+    ...                   [0.3, 0.2, 0.4]])
+    >>> knn_g = dgl.knn_graph(x, 2)  # Each node has two predecessors
+    >>> knn_g.edges()
+    >>> (tensor([0, 1, 2, 2, 2, 3, 3, 3]), tensor([0, 1, 1, 2, 3, 0, 2, 3]))
+    When :attr:`x` is a 3D tensor, DGL constructs multiple KNN graphs and
+    and then composes them into a graph of multiple connected components.
+    >>> x1 = torch.tensor([[0.0, 0.0, 1.0],
+    ...                    [1.0, 0.5, 0.5],
+    ...                    [0.5, 0.2, 0.2],
+    ...                    [0.3, 0.2, 0.4]])
+    >>> x2 = torch.tensor([[0.0, 1.0, 1.0],
+    ...                    [0.3, 0.3, 0.3],
+    ...                    [0.4, 0.4, 1.0],
+    ...                    [0.3, 0.8, 0.2]])
+    >>> x = torch.stack([x1, x2], dim=0)
+    >>> knn_g = dgl.knn_graph(x, 2)  # Each node has two predecessors
+    >>> knn_g.edges()
+    (tensor([0, 1, 2, 2, 2, 3, 3, 3, 4, 5, 5, 5, 6, 6, 7, 7]),
+     tensor([0, 1, 1, 2, 3, 0, 2, 3, 4, 5, 6, 7, 4, 6, 5, 7]))
    """
    if F.ndim(x) == 2:
        x = F.unsqueeze(x, 0)
@@ -106,28 +152,58 @@ def knn_graph(x, k):
 #pylint: disable=invalid-name
 def segmented_knn_graph(x, k, segs):
-    """Transforms the given point set to a directed graph, whose coordinates
+    """Convert a tensor into multiple k-nearest-neighbor (KNN) graph(s)
-    are given as a matrix.  The predecessors of each point are its k-nearest
+    with different number of nodes.
-    neighbors.
-    The matrices are concatenated along the first axis, and are segmented by
+    Each chunk of :attr:`x` contains coordinates/features of a point set.
-    ``segs``.  Each block would be transformed into a separate graph.  The
+    :attr:`segs` specifies the number of points in each point set. The
-    graphs will be unioned.
+    function constructs a KNN graph for each point set, where the predecessors
+    of each point are its k-nearest neighbors. DGL then composes all KNN graphs
+    into a graph with multiple connected components.
    Parameters
    ----------
-    x : Tensor
+    x : 2D Tensor
-        The input tensor.
+        Coordinates/features of points.  It can be either on CPU or GPU.
    k : int
-        The number of neighbors
+        The number of nearest neighbors per node.
-    segs : iterable of int
+    segs : list of int
-        Number of points of each point set.
+        Number of points in each point set. The numbers in :attr:`segs`
-        Must sum up to the number of rows in ``x``.
+        must sum up to the number of rows in :attr:`x`.
    Returns
    -------
    DGLGraph
-        The graph.  The node IDs are in the same order as ``x``.
+        The graph. The node IDs are in the same order as :attr:`x`.
+        The returned graph is on CPU, regardless of the context of input :attr:`x`.
+    Examples
+    --------
+    The following examples use PyTorch backend.
+    >>> import dgl
+    >>> import torch
+    In the example below, the first point set has three points
+    and the second point set has four points.
+    >>> # Features/coordinates of the first point set
+    >>> x1 = torch.tensor([[0.0, 0.5, 0.2],
+    ...                    [0.1, 0.3, 0.2],
+    ...                    [0.4, 0.2, 0.2]])
+    >>> # Features/coordinates of the second point set
+    >>> x2 = torch.tensor([[0.3, 0.2, 0.1],
+    ...                    [0.5, 0.2, 0.3],
+    ...                    [0.1, 0.1, 0.2],
+    ...                    [0.6, 0.3, 0.3]])
+    >>> x = torch.cat([x1, x2], dim=0)
+    >>> segs = [x1.shape[0], x2.shape[0]]
+    >>> knn_g = dgl.segmented_knn_graph(x, 2, segs)
+    >>> knn_g.edges()
+    (tensor([0, 0, 1, 1, 1, 2, 3, 3, 4, 4, 5, 5, 6, 6]),
+     tensor([0, 1, 0, 1, 2, 2, 3, 5, 4, 6, 3, 5, 4, 6]))
    """
    n_total_points, _ = F.shape(x)
    offset = np.insert(np.cumsum(segs), 0, 0)
@@ -148,40 +224,49 @@ def segmented_knn_graph(x, k, segs):
    return g
 def to_bidirected(g, readonly=None, copy_ndata=False):
-    r""" Convert the graph to a bidirected one.
+    r"""Convert the graph to a bidirectional simple graph, adding reverse edges and
+    removing parallel edges.
-    The function generates a new graph with no edge features.
+    The function generates a new graph with no edge features.  In the new graph,
-    If g has an edge for i->j but no edge for j->i, then the
+    a single edge ``(u, v)`` exists if and only if there exists an edge connecting ``u``
-    returned graph will have both i->j and j->i.
+    to ``v`` or an edge connecting ``v`` to ``u`` in the original graph.
-    For a heterograph with multiple edge types, we can treat edges corresponding
+    For a heterogeneous graph with multiple edge types, DGL treats edges corresponding
    to each type as a separate graph and convert the graph to a bidirected one
    for each of them.
-    Since **to_bidirected is not well defined for unidirectional bipartite graphs**,
+    Since :func:`to_bidirected` **is not well defined for unidirectional
-    an error will be raised if an edge type of the input heterograph is for a
+    bipartite graphs**, DGL will raise an error if an edge type whose source node type is
-    unidirectional bipartite graph.
+    different from the destination node type exists.
    Parameters
    ----------
    g : DGLGraph
        The input graph.
-    readonly : bool, default to be True
+    readonly : bool
        Deprecated. There will be no difference between readonly and non-readonly
+        (Default: True)
    copy_ndata: bool, optional
        If True, the node features of the bidirected graph are copied from the
-        original graph. If False, the bidirected graph will not have any node features.
+        original graph.
-        (Default: False)
-    Notes
+        If False, the bidirected graph will not have any node features.
-    -----
-    Please make sure g is a single graph.
+        (Default: False)
    Returns
    -------
-    dgl.DGLGraph
+    DGLGraph
        The bidirected graph
+    Notes
+    -----
+    If :attr:`copy_ndata` is True, same tensors will be used for
+    the features of the original graph and the returned graph to save memory cost.
+    As a result, users should avoid performing in-place operations on the features of
+    the returned graph, which will corrupt the features of the original graph as well.
    Examples
    --------
    The following examples use PyTorch backend.
@@ -200,12 +285,12 @@ def to_bidirected(g, readonly=None, copy_ndata=False):
    >>> bg1.edges()
    (tensor([0, 1, 2, 1, 2, 0]), tensor([1, 2, 0, 0, 1, 2]))
-    **Heterographs with Multiple Edge Types**
+    **Heterogeneous graphs with Multiple Edge Types**
    >>> g = dgl.heterograph({
-    >>>     ('user', 'wins', 'user'): (th.tensor([0, 2, 0, 2]), th.tensor([1, 1, 2, 0])),
+    ...     ('user', 'wins', 'user'): (th.tensor([0, 2, 0, 2]), th.tensor([1, 1, 2, 0])),
-    >>>     ('user', 'follows', 'user'): (th.tensor([1, 2, 1]), th.tensor([2, 1, 1]))
+    ...     ('user', 'follows', 'user'): (th.tensor([1, 2, 1]), th.tensor([2, 1, 1]))
-    >>> })
+    ... })
    >>> bg1 = dgl.to_bidirected(g)
    >>> bg1.edges(etype='wins')
    (tensor([0, 0, 1, 1, 2, 2]), tensor([1, 2, 0, 2, 0, 1]))
@@ -230,86 +315,83 @@ def to_bidirected(g, readonly=None, copy_ndata=False):
 def add_reverse_edges(g, readonly=None, copy_ndata=True,
                      copy_edata=False, ignore_bipartite=False):
-    r"""Add reverse edges to a graph
+    r"""Add reverse edges to a graph.
    For a graph with edges :math:`(i_1, j_1), \cdots, (i_n, j_n)`, this
    function creates a new graph with edges
    :math:`(i_1, j_1), \cdots, (i_n, j_n), (j_1, i_1), \cdots, (j_n, i_n)`.
-    For a heterograph with multiple edge types, we can treat edges corresponding
+    For a heterogeneous graph with multiple edge types, DGL treats the edges corresponding
    to each type as a separate graph and add reverse edges for each of them.
-    Since **add_reverse_edges is not well defined for unidirectional bipartite graphs**,
+    Since :func:`add_reverse_edges` **is not well defined for unidirectional bipartite graphs**,
-    an error will be raised if an edge type of the input heterograph is for a
+    an error will be raised if an edge type of the input heterogeneous graph is for a
-    unidirectional bipartite graph. We can simply skip the edge types corresponding
+    unidirectional bipartite graph.  DGL simply skips the edge types corresponding
    to unidirectional bipartite graphs by specifying ``ignore_bipartite=True``.
    Parameters
    ----------
    g : DGLGraph
-        The input graph.
+        The input graph.  Can be on either CPU or GPU.
    readonly : bool, default to be True
        Deprecated. There will be no difference between readonly and non-readonly
    copy_ndata: bool, optional
        If True, the node features of the new graph are copied from
        the original graph. If False, the new graph will not have any
        node features.
        (Default: True)
    copy_edata: bool, optional
        If True, the features of the reversed edges will be identical to
        the original ones."
-        If False, the new graph will not have any edge
-        features.
+        If False, the new graph will not have any edge features.
        (Default: False)
    ignore_bipartite: bool, optional
        If True, unidirectional bipartite graphs are ignored and
        no error is raised. If False, an error  will be raised if
-        an edge type of the input heterograph is for a unidirectional
+        an edge type of the input heterogeneous graph is for a unidirectional
        bipartite graph.
    Returns
    -------
-    dgl.DGLGraph
+    DGLGraph
        The graph with reversed edges added.
    Notes
    -----
-    If ``copy_ndata`` is ``True``, same tensors are used as
+    If :attr:`copy_ndata` is True, same tensors are used as
    the node features of the original graph and the new graph.
    As a result, users should avoid performing in-place operations
    on the node features of the new graph to avoid feature corruption.
    On the contrary, edge features are concatenated,
    and they are not shared due to concatenation.
-    For concrete examples, refer to the ``Examples`` section below.
    Examples
    --------
-    **Homographs**
+    **Homogeneous graphs**
    >>> g = dgl.graph(th.tensor([0, 0]), th.tensor([0, 1]))
    >>> bg1 = dgl.add_reverse_edges(g)
    >>> bg1.edges()
    (tensor([0, 0, 0, 1]), tensor([0, 1, 0, 0]))
-    To remove duplicate edges, see :func:to_simple
+    **Heterogeneous graphs with Multiple Edge Types**
-    **Heterographs with Multiple Edge Types**
+    >>> g = dgl.heterograph({
+    ...     ('user', 'wins', 'user'): (th.tensor([0, 2, 0, 2, 2]), th.tensor([1, 1, 2, 1, 0])),
-    g = dgl.heterograph({
+    ...     ('user', 'plays', 'game'): (th.tensor([1, 2, 1]), th.tensor([2, 1, 1])),
-    >>>     ('user', 'wins', 'user'): (th.tensor([0, 2, 0, 2, 2]), th.tensor([1, 1, 2, 1, 0])),
+    ...     ('user', 'follows', 'user'): (th.tensor([1, 2, 1), th.tensor([0, 0, 0]))
-    >>>     ('user', 'plays', 'game'): (th.tensor([1, 2, 1]), th.tensor([2, 1, 1])),
+    ... })
-    >>>     ('user', 'follows', 'user'): (th.tensor([1, 2, 1), th.tensor([0, 0, 0]))
-    >>> })
    >>> g.nodes['game'].data['hv'] = th.ones(3, 1)
    >>> g.edges['wins'].data['h'] = th.tensor([0, 1, 2, 3, 4])
-    The add_reverse_edges operation is applied to the subgraph
+    The :func:`add_reverse_edges` operation is applied to the edge type
-    corresponding to ('user', 'wins', 'user') and the
+    ``('user', 'wins', 'user')`` and the edge type ``('user', 'follows', 'user')``.
-    subgraph corresponding to ('user', 'follows', 'user).
+    The edge type ``('user', 'plays', 'game')`` is ignored.  Both the node features and
-    The unidirectional bipartite subgraph ('user', 'plays', 'game')
+    edge features are shared.
-    is ignored. Both the node features and edge features
-    are shared.
    >>> bg = dgl.add_reverse_edges(g, copy_ndata=True,
                               copy_edata=True, ignore_bipartite=True)
@@ -362,95 +444,108 @@ def add_reverse_edges(g, readonly=None, copy_ndata=True,
    # handle features
    if copy_ndata:
-        # for each ntype
+        node_frames = utils.extract_node_subframes(g, None)
-        for ntype in g.ntypes:
+        utils.set_new_frames(new_g, node_frames=node_frames)
-            # for each data field
-            for k in g.nodes[ntype].data:
-                new_g.nodes[ntype].data[k] = g.nodes[ntype].data[k]
    if copy_edata:
-        # for each etype
+        # find indices
+        eids = []
        for c_etype in canonical_etypes:
+            eid = F.copy_to(F.arange(0, g.number_of_edges(c_etype)), new_g.device)
            if c_etype[0] != c_etype[2]:
-                # for each data field
+                eids.append(eid)
-                for k in g.edges[c_etype].data:
-                    new_g.edges[c_etype].data[k] = g.edges[c_etype].data[k]
            else:
-                for k in g.edges[c_etype].data:
+                eids.append(F.cat([eid, eid], 0))
-                    new_g.edges[c_etype].data[k] = \
-                        F.cat([g.edges[c_etype].data[k], g.edges[c_etype].data[k]], dim=0)
+        edge_frames = utils.extract_edge_subframes(g, eids)
+        utils.set_new_frames(new_g, edge_frames=edge_frames)
    return new_g
 def line_graph(g, backtracking=True, shared=False):
    """Return the line graph of this graph.
-    The graph should be an directed homogeneous graph. Aother type of graphs
+    The line graph ``L(G)`` of a given graph ``G`` is defined as another graph where
-    are not supported right now.
+    the nodes in ``L(G)`` maps to the edges in ``G``.  For any pair of edges ``(u, v)``
+    and ``(v, w)`` in ``G``, the corresponding node of edge ``(u, v)`` in ``L(G)`` will
-    All node features and edge features are not copied to the output
+    have an edge connecting to the corresponding node of edge ``(v, w)``.
    Parameters
    ----------
    g : DGLGraph
-        Input graph.
+        Input graph.  Must be homogeneous.
    backtracking : bool, optional
-        Whether the pair of (v, u) (u, v) edges are treated as linked. Default True.
+        If False, the line graph node corresponding to edge ``(u, v)`` will not have
+        an edge connecting to the line graph node corresponding to edge ``(v, u)``.
+        Default: True.
    shared : bool, optional
        Whether to copy the edge features of the original graph as the node features
        of the result line graph.
    Returns
    -------
-    G : DGLHeteroGraph
+    G : DGLGraph
        The line graph of this graph.
    Notes
    -----
+    If :attr:`shared` is True, same tensors will be used for
+    the features of the original graph and the returned graph to save memory cost.
+    As a result, users should avoid performing in-place operations on the features of
+    the returned graph, which will corrupt the features of the original graph as well.
    The implementation is done on CPU, even if the input and output graphs are on GPU.
    Examples
    --------
-    >>> A = [[0, 0, 1],
+    Assume that the graph has the following adjacency matrix: ::
-    ...      [1, 0, 1],
-    ...      [1, 1, 0]]
+       A = [[0, 0, 1],
+            [1, 0, 1],
+            [1, 1, 0]]
    >>> g = dgl.graph(([0, 1, 1, 2, 2],[2, 0, 2, 0, 1]), 'user', 'follows')
    >>> lg = g.line_graph()
    >>> lg
-    ... Graph(num_nodes=5, num_edges=8,
+    Graph(num_nodes=5, num_edges=8,
-    ... ndata_schemes={}
+    ndata_schemes={}
-    ... edata_schemes={})
+    edata_schemes={})
    >>> lg.edges()
-    ... (tensor([0, 0, 1, 2, 2, 3, 4, 4]), tensor([3, 4, 0, 3, 4, 0, 1, 2]))
+    (tensor([0, 0, 1, 2, 2, 3, 4, 4]), tensor([3, 4, 0, 3, 4, 0, 1, 2]))
-    >>>
    >>> lg = g.line_graph(backtracking=False)
    >>> lg
-    ... Graph(num_nodes=5, num_edges=4,
+    Graph(num_nodes=5, num_edges=4,
-    ... ndata_schemes={}
+    ndata_schemes={}
-    ... edata_schemes={})
+    edata_schemes={})
    >>> lg.edges()
-    ... (tensor([0, 1, 2, 4]), tensor([4, 0, 3, 1]))
+    (tensor([0, 1, 2, 4]), tensor([4, 0, 3, 1]))
    """
    assert g.is_homogeneous(), \
-        'line_heterograph only support directed homogeneous graph right now'
+        'only homogeneous graph is supported'
    dev = g.device
    lg = DGLHeteroGraph(_CAPI_DGLHeteroLineGraph(g._graph.copy_to(nd.cpu()), backtracking))
    lg = lg.to(dev)
    if shared:
-        # copy edge features
+        new_frames = utils.extract_edge_subframes(g, None)
-        lg.ndata.update(g.edata)
+        utils.set_new_frames(lg, node_frames=new_frames)
    return lg
 DGLHeteroGraph.line_graph = line_graph
 def khop_adj(g, k):
-    """Return the matrix of :math:`A^k` where :math:`A` is the adjacency matrix of :math:`g`,
+    """Return the matrix of :math:`A^k` where :math:`A` is the adjacency matrix of the graph
-    where a row represents the destination and a column represents the source.
+    :math:`g`, where rows represent source nodes and columns represent destination nodes.
+    The returned matrix is a 32-bit float dense matrix on CPU.
+    The graph must be homogeneous.
    Parameters
    ----------
-    g : dgl.DGLGraph
+    g : DGLGraph
        The input graph.
    k : int
        The :math:`k` in :math:`A^k`.
@@ -458,14 +553,12 @@ def khop_adj(g, k):
    Returns
    -------
    tensor
-        The returned tensor, dtype is ``np.float32``.
+        The returned tensor.
    Examples
    --------
    >>> import dgl
-    >>> g = dgl.DGLGraph()
+    >>> g = dgl.graph(([0,1,2,3,4,0,1,2,3,4], [0,1,2,3,4,1,2,3,4,0]))
-    >>> g.add_nodes(5)
-    >>> g.add_edges([0,1,2,3,4,0,1,2,3,4], [0,1,2,3,4,1,2,3,4,0])
    >>> dgl.khop_adj(g, 1)
    tensor([[1., 0., 0., 0., 1.],
            [1., 1., 0., 0., 0.],
@@ -479,25 +572,46 @@ def khop_adj(g, k):
            [1., 3., 3., 1., 0.],
            [0., 1., 3., 3., 1.]])
    """
+    assert g.is_homogeneous(), \
+        'only homogeneous graph is supported'
    adj_k = g.adj(scipy_fmt=g.formats()['created'][0]) ** k
    return F.tensor(adj_k.todense().astype(np.float32))
-def khop_graph(g, k):
+def khop_graph(g, k, copy_ndata=True):
-    """Return the graph that includes all :math:`k`-hop neighbors of the given graph as edges.
+    """Return the graph whose edges connect the :attr:`k`-hop neighbors of the original graph.
+    More specifically, an edge from node ``u`` and node ``v`` exists in the new graph if
+    and only if a path with length :attr:`k` exists from node ``u`` to node ``v`` in the
+    original graph.
    The adjacency matrix of the returned graph is :math:`A^k`
    (where :math:`A` is the adjacency matrix of :math:`g`).
    Parameters
    ----------
-    g : dgl.DGLGraph
+    g : DGLGraph
        The input graph.
    k : int
        The :math:`k` in `k`-hop graph.
+    copy_ndata: bool, optional
+        If True, the node features of the new graph are copied from the
+        original graph.
+        If False, the new graph will not have any node features.
+        (Default: True)
    Returns
    -------
-    dgl.DGLGraph
+    DGLGraph
-        The returned ``DGLGraph``.
+        The returned graph.
+    Notes
+    -----
+    If :attr:`copy_ndata` is True, same tensors will be used for
+    the features of the original graph and the returned graph to save memory cost.
+    As a result, users should avoid performing in-place operations on the features of
+    the returned graph, which will corrupt the features of the original graph as well.
    Examples
    --------
@@ -505,9 +619,7 @@ def khop_graph(g, k):
    Below gives an easy example:
    >>> import dgl
-    >>> g = dgl.DGLGraph()
+    >>> g = dgl.graph(([0, 1], [1, 2]))
-    >>> g.add_nodes(3)
-    >>> g.add_edges([0, 1], [1, 2])
    >>> g_2 = dgl.transform.khop_graph(g, 2)
    >>> print(g_2.edges())
    (tensor([0]), tensor([2]))
@@ -515,9 +627,7 @@ def khop_graph(g, k):
    A more complicated example:
    >>> import dgl
-    >>> g = dgl.DGLGraph()
+    >>> g = dgl.graph(([0,1,2,3,4,0,1,2,3,4], [0,1,2,3,4,1,2,3,4,0]))
-    >>> g.add_nodes(5)
-    >>> g.add_edges([0,1,2,3,4,0,1,2,3,4], [0,1,2,3,4,1,2,3,4,0])
    >>> dgl.khop_graph(g, 1)
    DGLGraph(num_nodes=5, num_edges=10,
             ndata_schemes={}
@@ -527,6 +637,8 @@ def khop_graph(g, k):
             ndata_schemes={}
             edata_schemes={})
    """
+    assert g.is_homogeneous(), \
+        'only homogeneous graph is supported'
    n = g.number_of_nodes()
    adj_k = g.adj(transpose=True, scipy_fmt=g.formats()['created'][0]) ** k
    adj_k = adj_k.tocoo()
@@ -535,7 +647,14 @@ def khop_graph(g, k):
    col = np.repeat(adj_k.col, multiplicity)
    # TODO(zihao): we should support creating multi-graph from scipy sparse matrix
    # in the future.
-    return convert.graph((row, col), num_nodes=n)
+    new_g = convert.graph((row, col), num_nodes=n)
+    # handle ndata
+    if copy_ndata:
+        node_frames = utils.extract_node_subframes(g, None)
+        utils.set_new_frames(new_g, node_frames=node_frames)
+    return new_g
 def reverse(g, copy_ndata=True, copy_edata=False, *, share_ndata=None, share_edata=None):
    r"""Return the reverse of a graph.
@@ -544,43 +663,48 @@ def reverse(g, copy_ndata=True, copy_edata=False, *, share_ndata=None, share_eda
    :math:`(i_1, j_1), (i_2, j_2), \cdots` is a new graph with edges
    :math:`(j_1, i_1), (j_2, i_2), \cdots`.
-    For a heterograph with multiple edge types, we can treat edges corresponding
+    For a heterogeneous graph with multiple edge types, DGL treats the edges corresponding
    to each type as a separate graph and compute the reverse for each of them.
-    If the original edge type is (A, B, C), its reverse will have edge type (C, B, A).
+    If the original edge type is ``(A, B, C)``, its reverse will have edge type
+    ``(C, B, A)``.
-    Given a :class:`dgl.DGLGraph` object, we return another :class:`dgl.DGLGraph`
+    Given a :class:`DGLGraph` object, DGL returns another :class:`DGLGraph`
    object representing its reverse.
    Parameters
    ----------
-    g : dgl.DGLGraph
+    g : DGLGraph
        The input graph.
    copy_ndata: bool, optional
        If True, the node features of the reversed graph are copied from the
-        original graph. If False, the reversed graph will not have any node features.
+        original graph.
+        If False, the reversed graph will not have any node features.
        (Default: True)
    copy_edata: bool, optional
        If True, the edge features of the reversed graph are copied from the
-        original graph. If False, the reversed graph will not have any edge features.
+        original graph.
+        If False, the reversed graph will not have any edge features.
        (Default: False)
    Return
    ------
-    dgl.DGLGraph
+    DGLGraph
        The reversed graph.
    Notes
    -----
-    If ``copy_ndata`` or ``copy_edata`` is ``True``, same tensors will be used for
+    If :attr:`copy_ndata` or :attr:`copy_edata` is True, same tensors will be used for
    the features of the original graph and the reversed graph to save memory cost.
-    As a result, users
+    As a result, users should avoid performing in-place operations on the features of
-    should avoid performing in-place operations on the features of the reversed
+    the reversed graph, which will corrupt the features of the original graph as well.
-    graph, which will corrupt the features of the original graph as well. For
-    concrete examples, refer to the ``Examples`` section below.
    Examples
    --------
-    **Homographs or Heterographs with A Single Edge Type**
+    **Homogeneous graphs or Heterogeneous graphs with A Single Edge Type**
    Create a graph to reverse.
@@ -599,7 +723,7 @@ def reverse(g, copy_ndata=True, copy_edata=False, *, share_ndata=None, share_eda
            [2.]])
    The i-th edge in the reversed graph corresponds to the i-th edge in the
-    original graph. When ``copy_edata`` is ``True``, they have the same features.
+    original graph. When :attr:`copy_edata` is True, they have the same features.
    >>> rg.edges()
    (tensor([1, 2, 0]), tensor([0, 1, 2]))
@@ -625,19 +749,17 @@ def reverse(g, copy_ndata=True, copy_edata=False, *, share_ndata=None, share_eda
    >>> 'h2' in g.ndata
    False
-    **Heterographs with Multiple Edge Types**
+    **Heterogenenous graphs with Multiple Edge Types**
    >>> g = dgl.heterograph({
-    >>>     ('user', 'follows', 'user'): (th.tensor([0, 2]), th.tensor([1, 2])),
+    ...     ('user', 'follows', 'user'): (th.tensor([0, 2]), th.tensor([1, 2])),
-    >>>     ('user', 'plays', 'game'): (th.tensor([1, 2, 1]), th.tensor([2, 1, 1]))
+    ...     ('user', 'plays', 'game'): (th.tensor([1, 2, 1]), th.tensor([2, 1, 1]))
-    >>> })
+    ... })
    >>> g.nodes['game'].data['hv'] = th.ones(3, 1)
    >>> g.edges['plays'].data['he'] = th.zeros(3, 1)
-    The reverse of the graph above can be obtained by combining the reverse of the
+    The resulting graph will have edge types
-    subgraph corresponding to ('user', 'follows', 'user') and the subgraph corresponding
+    ``('user', 'follows', 'user)`` and ``('user', 'plays', 'game')``.
-    to ('user', 'plays', 'game'). The reverse for a graph with relation (h, r, t) will
-    have relation (t, r, h).
    >>> rg = dgl.reverse(g, copy_ndata=True)
    >>> rg
@@ -648,7 +770,7 @@ def reverse(g, copy_ndata=True, copy_edata=False, *, share_ndata=None, share_eda
    (tensor([1, 2]), tensor([0, 2]))
    >>> rg.edges(etype='plays')
    (tensor([2, 1, 1]), tensor([1, 2, 1]))
-    >>> rg.nodes['game'].data['hv]
+    >>> rg.nodes['game'].data['hv']
    tensor([[1.],
            [1.],
            [1.]])
@@ -662,9 +784,9 @@ def reverse(g, copy_ndata=True, copy_edata=False, *, share_ndata=None, share_eda
        dgl_warning('share_edata argument has been renamed to copy_edata.')
        copy_edata = share_edata
    if g.is_block:
-        raise DGLError('Reversing a block graph is not allowed.')
        # TODO(0.5 release, xiangsx) need to handle BLOCK
        # currently reversing a block results in undefined behavior
+        raise DGLError('Reversing a block graph is not supported.')
    gidx = g._graph.reverse()
    new_g = DGLHeteroGraph(gidx, g.ntypes, g.etypes)
@@ -672,17 +794,14 @@ def reverse(g, copy_ndata=True, copy_edata=False, *, share_ndata=None, share_eda
    if copy_ndata:
        # for each ntype
        for ntype in g.ntypes:
-            # for each data field
+            new_g.nodes[ntype].data.update(g.nodes[ntype].data)
-            for k in g.nodes[ntype].data:
-                new_g.nodes[ntype].data[k] = g.nodes[ntype].data[k]
    # handle edata
    if copy_edata:
        # for each etype
-        for etype in g.etypes:
+        for utype, etype, vtype in g.canonical_etypes:
-            # for each data field
+            new_g.edges[vtype, etype, utype].data.update(
-            for k in g.edges[etype].data:
+                g.edges[utype, etype, vtype].data)
-                new_g.edges[etype].data[k] = g.edges[etype].data[k]
    return new_g
@@ -707,25 +826,30 @@ def to_simple_graph(g):
    return to_simple(g)
 def to_bidirected_stale(g, readonly=True):
-    """Convert the graph to a bidirected graph.
+    """NOTE: this function only works on the deprecated
+    :class:`dgl.DGLGraphStale` object.
+    Convert the graph to a bidirected graph.
    The function generates a new graph with no node/edge feature.
-    If g has an edge for i->j but no edge for j->i, then the
+    If g has an edge for ``(u, v)`` but no edge for ``(v, u)``, then the
-    returned graph will have both i->j and j->i.
+    returned graph will have both ``(u, v)`` and ``(v, u)``.
-    If the input graph is a multigraph (there are multiple edges from node i to node j),
+    If the input graph is a multigraph (there are multiple edges from node u to node v),
    the returned graph isn't well defined.
    Parameters
    ----------
-    g : DGLGraph
+    g : DGLGraphStale
        The input graph.
-    readonly : bool, default to be True
+    readonly : bool
        Whether the returned bidirected graph is readonly or not.
+        (Default: True)
    Notes
    -----
-    Please make sure g is a single graph, otherwise the return value is undefined.
+    Please make sure g is a simple graph, otherwise the return value is undefined.
    Returns
    -------
@@ -736,10 +860,10 @@ def to_bidirected_stale(g, readonly=True):
    The following two examples use PyTorch backend, one for non-multi graph
    and one for multi-graph.
-    >>> g = dgl.DGLGraph()
+    >>> g = dgl._deprecate.graph.DGLGraph()
    >>> g.add_nodes(2)
    >>> g.add_edges([0, 0], [0, 1])
-    >>> bg1 = dgl.to_bidirected(g)
+    >>> bg1 = dgl.to_bidirected_stale(g)
    >>> bg1.edges()
    (tensor([0, 1, 0]), tensor([0, 0, 1]))
    """
@@ -750,29 +874,30 @@ def to_bidirected_stale(g, readonly=True):
    return DGLGraphStale(newgidx)
 def laplacian_lambda_max(g):
-    """Return the largest eigenvalue of the normalized symmetric laplacian of g.
+    """Return the largest eigenvalue of the normalized symmetric Laplacian of a graph.
+    If the graph is batched from multiple graphs, return the list of the largest eigenvalue
-    The eigenvalue of the normalized symmetric of any graph is less than or equal to 2,
+    for each graph instead.
-    ref: https://en.wikipedia.org/wiki/Laplacian_matrix#Properties
    Parameters
    ----------
    g : DGLGraph
-        The input graph, it should be an undirected graph.
+        The input graph, it should be an undirected graph.  It must be homogeneous.
+        The graph can be batched from multiple graphs.
    Returns
    -------
-    list :
+    list[float]
-        Return a list, where the i-th item indicates the largest eigenvalue
+        A list where the i-th item indicates the largest eigenvalue
-        of i-th graph in g.
+        of i-th graph in :attr:`g`.
+        In the case where the function takes a single graph, it will return a list
+        consisting of a single element.
    Examples
    --------
    >>> import dgl
-    >>> g = dgl.DGLGraph()
+    >>> g = dgl.graph(([0, 1, 2, 3, 4, 0, 1, 2, 3, 4], [1, 2, 3, 4, 0, 4, 0, 1, 2, 3]))
-    >>> g.add_nodes(5)
-    >>> g.add_edges([0, 1, 2, 3, 4, 0, 1, 2, 3, 4], [1, 2, 3, 4, 0, 4, 0, 1, 2, 3])
    >>> dgl.laplacian_lambda_max(g)
    [1.809016994374948]
    """
@@ -806,15 +931,25 @@ def metapath_reachable_graph(g, metapath):
    Parameters
    ----------
-    g : DGLHeteroGraph
+    g : DGLGraph
        The input graph
    metapath : list[str or tuple of str]
        Metapath in the form of a list of edge types
    Returns
    -------
-    DGLHeteroGraph
+    DGLGraph
-        A homogeneous or bipartite graph.
+        A homogeneous or unidirectional bipartite graph.  It will be on CPU regardless of
+        whether the input graph is on CPU or GPU.
+    Examples
+    --------
+    >>> g = dgl.heterograph({
+    ...     ('A', 'AB', 'B'): ([0, 1, 2], [1, 2, 3]),
+    ...     ('B', 'BA', 'A'): ([1, 2, 3], [0, 1, 2])})
+    >>> new_g = dgl.metapath_reachable_graph(g, ['AB', 'BA'])
+    >>> new_g.edges(order='eid')
+    (tensor([0, 1, 2]), tensor([0, 1, 2]))
    """
    adj = 1
    for etype in metapath:
@@ -831,17 +966,18 @@ def metapath_reachable_graph(g, metapath):
                                  idtype=g.idtype, device=g.device)
    # copy srcnode features
-    for key, value in g.nodes[srctype].data.items():
+    new_g.nodes[srctype].data.update(g.nodes[srctype].data)
-        new_g.nodes[srctype].data[key] = value
    # copy dstnode features
    if srctype != dsttype:
-        for key, value in g.nodes[dsttype].data.items():
+        new_g.nodes[dsttype].data.update(g.nodes[dsttype].data)
-            new_g.nodes[dsttype].data[key] = value
    return new_g
 def add_nodes(g, num, data=None, ntype=None):
-    r"""Add new nodes of the same node type.
+    r"""Append new nodes of the given node type.
+    The new nodes will have IDs starting from ``g.number_of_nodes(ntype)``.
    A new graph with newly added nodes is returned.
    Parameters
@@ -856,15 +992,15 @@ def add_nodes(g, num, data=None, ntype=None):
    Return
    ------
-    DGLHeteroGraph
+    DGLGraph
        The graph with newly added nodes.
    Notes
    -----
+    * If the key of :attr:`data` does not contain some existing feature fields,
-    * If the key of ``data`` does not contain some existing feature fields,
    those features for the new nodes will be filled with zeros).
-    * If the key of ``data`` contains new feature fields, those features for
+    * If the key of :attr:`data` contains new feature fields, those features for
    the old nodes will be filled zeros).
    Examples
@@ -885,22 +1021,21 @@ def add_nodes(g, num, data=None, ntype=None):
    5
    If the graph has some node features and new nodes are added without
-    features, their features will be created by initializers defined
+    features, their features will be created with zeros.
-    with :func:`set_n_initializer`.
    >>> g.ndata['h'] = torch.ones(5, 1)
    >>> g = dgl.add_nodes(g, 1)
    >>> g.ndata['h']
    tensor([[1.], [1.], [1.], [1.], [1.], [0.]])
-    We can also assign features for the new nodes in adding new nodes.
+    You can also assign features for the new nodes in adding new nodes.
    >>> g = dgl.add_nodes(g, 1, {'h': torch.ones(1, 1), 'w': torch.ones(1, 1)})
    >>> g.ndata['h']
    tensor([[1.], [1.], [1.], [1.], [1.], [0.], [1.]])
-    Since ``data`` contains new feature fields, the features for old nodes
+    Since :attr:`data` contains new feature fields, the features for old nodes
-    will be created by initializers defined with :func:`set_n_initializer`.
+    will be created with zeros.
    >>> g.ndata['w']
    tensor([[0.], [0.], [0.], [0.], [0.], [0.], [1.]])
@@ -908,14 +1043,11 @@ def add_nodes(g, num, data=None, ntype=None):
    **Heterogeneous Graphs with Multiple Node Types**
    >>> g = dgl.heterograph({
-    >>>     ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]),
+    ...     ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]),
-    >>>                                 torch.tensor([0, 0, 1, 1])),
+    ...                                 torch.tensor([0, 0, 1, 1])),
-    >>>     ('developer', 'develops', 'game'): (torch.tensor([0, 1]),
+    ...     ('developer', 'develops', 'game'): (torch.tensor([0, 1]),
-    >>>                                         torch.tensor([0, 1]))
+    ...                                         torch.tensor([0, 1]))
-    >>>     })
+    ...     })
-    >>> g = dgl.add_nodes(g, 2)
-    DGLError: Node type name must be specified
-    if there are more than one node types.
    >>> g.num_nodes('user')
    3
    >>> g = dgl.add_nodes(g, 2, ntype='user')
@@ -933,10 +1065,12 @@ def add_nodes(g, num, data=None, ntype=None):
    return g
 def add_edges(g, u, v, data=None, etype=None):
-    r"""Add multiple new edges for the specified edge type.
+    r"""Append multiple new edges for the specified edge type.
    A new graph with newly added edges is returned.
-    The i-th new edge will be from ``u[i]`` to ``v[i]``.
+    The i-th new edge will be from ``u[i]`` to ``v[i]``.  The IDs of the new
+    edges will start from ``g.number_of_edges(etype)``.
    Parameters
    ----------
@@ -953,73 +1087,72 @@ def add_edges(g, u, v, data=None, etype=None):
    Return
    ------
-    DGLHeteroGraph
+    DGLGraph
        The graph with newly added edges.
    Notes
    -----
    * If end nodes of adding edges does not exists, add_nodes is invoked
    to add new nodes. The node features of the new nodes will be created
-    by initializers defined with :func:`set_n_initializer` (default
+    with zeros.
-    initializer fills zeros). In certain cases, it is recommanded to
-    add_nodes first and then add_edges.
+    * If the key of :attr:`data` does not contain some existing feature fields,
-    * If the key of ``data`` does not contain some existing feature fields,
+    those features for the new edges will be created with zeros.
-    those features for the new edges will be created by initializers
-    defined with :func:`set_n_initializer` (default initializer fills zeros).
+    * If the key of :attr:`data` contains new feature fields, those features for
-    * If the key of ``data`` contains new feature fields, those features for
+    the old edges will be created with zeros.
-    the old edges will be created by initializers defined with
-    :func:`set_n_initializer` (default initializer fills zeros).
    Examples
    --------
    The following example uses PyTorch backend.
    >>> import dgl
    >>> import torch
    **Homogeneous Graphs or Heterogeneous Graphs with A Single Edge Type**
    >>> g = dgl.graph((torch.tensor([0, 1]), torch.tensor([1, 2])))
    >>> g.num_edges()
    2
    >>> g = dgl.add_edges(g, torch.tensor([1, 3]), torch.tensor([0, 1]))
    >>> g.num_edges()
    4
    Since ``u`` or ``v`` contains a non-existing node ID, the nodes are
    added implicitly.
    >>> g.num_nodes()
    4
    If the graph has some edge features and new edges are added without
-    features, their features will be created by initializers defined
+    features, their features will be created with zeros.
-    with :func:`set_n_initializer`.
    >>> g.edata['h'] = torch.ones(4, 1)
    >>> g = dgl.add_edges(g, torch.tensor([1]), torch.tensor([1]))
    >>> g.edata['h']
    tensor([[1.], [1.], [1.], [1.], [0.]])
-    We can also assign features for the new edges in adding new edges.
+    You can also assign features for the new edges in adding new edges.
    >>> g = dgl.add_edges(g, torch.tensor([0, 0]), torch.tensor([2, 2]),
-    >>>                   {'h': torch.tensor([[1.], [2.]]), 'w': torch.ones(2, 1)})
+    ...                   {'h': torch.tensor([[1.], [2.]]), 'w': torch.ones(2, 1)})
    >>> g.edata['h']
    tensor([[1.], [1.], [1.], [1.], [0.], [1.], [2.]])
-    Since ``data`` contains new feature fields, the features for old edges
-    will be created by initializers defined with :func:`set_n_initializer`.
+    Since :attr:`data` contains new feature fields, the features for old edges
+    will be created with zeros.
    >>> g.edata['w']
    tensor([[0.], [0.], [0.], [0.], [0.], [1.], [1.]])
    **Heterogeneous Graphs with Multiple Edge Types**
    >>> g = dgl.heterograph({
-    >>>     ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]),
+    ...     ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]),
-    >>>                                 torch.tensor([0, 0, 1, 1])),
+    ...                                 torch.tensor([0, 0, 1, 1])),
-    >>>     ('developer', 'develops', 'game'): (torch.tensor([0, 1]),
+    ...     ('developer', 'develops', 'game'): (torch.tensor([0, 1]),
-    >>>                                         torch.tensor([0, 1]))
+    ...                                         torch.tensor([0, 1]))
-    >>>     })
+    ...     })
-    >>> g = dgl.add_edges(g, torch.tensor([3]), torch.tensor([3]))
-    DGLError: Edge type name must be specified
-    if there are more than one edge types.
    >>> g.number_of_edges('plays')
    4
    >>> g = dgl.add_edges(g, torch.tensor([3]), torch.tensor([3]), etype='plays')
@@ -1043,6 +1176,7 @@ def remove_edges(g, eids, etype=None):
    Nodes will not be removed. After removing edges, the rest
    edges will be re-indexed using consecutive integers from 0,
    with their relative order preserved.
    The features for the removed edges will be removed accordingly.
    Parameters
@@ -1055,7 +1189,7 @@ def remove_edges(g, eids, etype=None):
    Return
    ------
-    DGLHeteroGraph
+    DGLGraph
        The graph with edges deleted.
    Examples
@@ -1080,17 +1214,15 @@ def remove_edges(g, eids, etype=None):
    **Heterogeneous Graphs with Multiple Edge Types**
    >>> g = dgl.heterograph({
-    >>>     ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]),
+    ...     ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]),
-    >>>                                 torch.tensor([0, 0, 1, 1])),
+    ...                                 torch.tensor([0, 0, 1, 1])),
-    >>>     ('developer', 'develops', 'game'): (torch.tensor([0, 1]),
+    ...     ('developer', 'develops', 'game'): (torch.tensor([0, 1]),
-    >>>                                         torch.tensor([0, 1]))
+    ...                                         torch.tensor([0, 1]))
-    >>>     })
+    ...     })
-    >>> g = dgl.remove_edges(g, torch.tensor([0, 1]))
-    DGLError: Edge type name must be specified
-    if there are more than one edge types.
    >>> g = dgl.remove_edges(g, torch.tensor([0, 1]), 'plays')
    >>> g.edges('all', etype='plays')
    (tensor([0, 1]), tensor([0, 0]), tensor([0, 1]))
    See Also
    --------
    add_nodes
@@ -1109,7 +1241,6 @@ def remove_nodes(g, nids, ntype=None):
    Edges that connect to the nodes will be removed as well. After removing
    nodes and edges, the rest nodes and edges will be re-indexed using
    consecutive integers from 0, with their relative order preserved.
-    The features for the removed nodes/edges will be removed accordingly.
    The features for the removed nodes/edges will be removed accordingly.
@@ -1123,7 +1254,7 @@ def remove_nodes(g, nids, ntype=None):
    Return
    ------
-    DGLHeteroGraph
+    DGLGraph
        The graph with nodes deleted.
    Examples
@@ -1150,14 +1281,11 @@ def remove_nodes(g, nids, ntype=None):
    **Heterogeneous Graphs with Multiple Node Types**
    >>> g = dgl.heterograph({
-    >>>     ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]),
+    ...     ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]),
-    >>>                                 torch.tensor([0, 0, 1, 1])),
+    ...                                 torch.tensor([0, 0, 1, 1])),
-    >>>     ('developer', 'develops', 'game'): (torch.tensor([0, 1]),
+    ...     ('developer', 'develops', 'game'): (torch.tensor([0, 1]),
-    >>>                                         torch.tensor([0, 1]))
+    ...                                         torch.tensor([0, 1]))
-    >>>     })
+    ...     })
-    >>> g = dgl.remove_nodes(g, torch.tensor([0, 1]))
-    DGLError: Node type name must be specified
-    if there are more than one node types.
    >>> g = dgl.remove_nodes(g, torch.tensor([0, 1]), ntype='game')
    >>> g.num_nodes('user')
    3
@@ -1177,25 +1305,35 @@ def remove_nodes(g, nids, ntype=None):
    return g
 def add_self_loop(g, etype=None):
-    r""" Add self loop for each node in the graph.
+    r"""Add self-loop for each node in the graph for the given edge type.
    A new graph with self-loop is returned.
-    Since **selfloop is not well defined for unidirectional
+    If the graph is heterogeneous, the given edge type must have its source
-    bipartite graphs**, we simply skip the nodes corresponding
+    node type the same as its destination node type.
-    to unidirectional bipartite graphs.
+    Parameters
+    ----------
+    g : DGLGraph
+        The graph.
+    etype : str or tuple of str, optional
+        The type of the edges to remove. Can be omitted if there is
+        only one edge type in the graph.
+        Its source node type must be the same as its destination node type.
    Return
    ------
-    DGLHeteroGraph
+    DGLGraph
        The graph with self-loop.
    Notes
    -----
-    * It is recommanded to ``remove_self_loop`` before invoking
+    * :func:`add_self_loop` adds self loops regardless of whether the self-loop already exists.
-    ``add_self_loop``.
-    * Features for the new edges (self-loop edges) will be created
+      If you would like to have exactly one self-loop for every node, you would need to
-    by initializers defined with :func:`set_n_initializer`
+      call :func:`remove_self_loop` before invoking :func:`add_self_loop`.
-    (default initializer fills zeros).
+    * Features for the new edges (self-loop edges) will be created with zeros.
    Examples
    --------
@@ -1223,10 +1361,10 @@ def add_self_loop(g, etype=None):
    **Heterogeneous Graphs with Multiple Node Types**
    >>> g = dgl.heterograph({
-            ('user', 'follows', 'user'): (torch.tensor([1, 2]),
+    ...     ('user', 'follows', 'user'): (torch.tensor([1, 2]),
-                                        torch.tensor([0, 1])),
+    ...                                   torch.tensor([0, 1])),
-            ('user', 'plays', 'game'): (torch.tensor([0, 1]),
+    ...     ('user', 'plays', 'game'): (torch.tensor([0, 1]),
-                                        torch.tensor([0, 1]))})
+    ...                                 torch.tensor([0, 1]))})
    >>> g = dgl.add_self_loop(g, etype='follows')
    >>> g
    Graph(num_nodes={'user': 3, 'game': 2},
@@ -1266,8 +1404,7 @@ def remove_self_loop(g, etype=None):
    **Homogeneous Graphs or Heterogeneous Graphs with A Single Node Type**
-    >>> g = dgl.graph((torch.tensor([0, 0, 0, 1]), torch.tensor([1, 0, 0, 2])),
+    >>> g = dgl.graph((torch.tensor([0, 0, 0, 1]), torch.tensor([1, 0, 0, 2])))
-                        idtype=idtype, device=F.ctx())
    >>> g.edata['he'] = torch.arange(4).float().reshape(-1, 1)
    >>> g = dgl.remove_self_loop(g)
    >>> g
@@ -1279,12 +1416,12 @@ def remove_self_loop(g, etype=None):
    **Heterogeneous Graphs with Multiple Node Types**
    >>> g = dgl.heterograph({
-    >>>     ('user', 'follows', 'user'): (torch.tensor([0, 1, 1, 1, 2]),
+    ...     ('user', 'follows', 'user'): (torch.tensor([0, 1, 1, 1, 2]),
-    >>>                                 torch.tensor([0, 0, 1, 1, 1])),
+    ...                                   torch.tensor([0, 0, 1, 1, 1])),
-    >>>     ('user', 'plays', 'game'): (torch.tensor([0, 1]),
+    ...     ('user', 'plays', 'game'): (torch.tensor([0, 1]),
-    >>>                                         torch.tensor([0, 1]))
+    ...                                 torch.tensor([0, 1]))
-    >>>     })
+    ...     })
-    >>> g = dgl.remove_self_loop(g)
+    >>> g = dgl.remove_self_loop(g, etype='follows')
    >>> g.num_nodes('user')
    3
    >>> g.num_nodes('game')
@@ -1311,37 +1448,7 @@ def remove_self_loop(g, etype=None):
 DGLHeteroGraph.remove_self_loop = remove_self_loop
-def reorder_nodes(g, new_node_ids):
+def compact_graphs(graphs, always_preserve=None, copy_ndata=True, copy_edata=True):
-    """ Generate a new graph with new node Ids.
-    We assign each node in the input graph with a new node Id. This results in
-    a new graph.
-    Parameters
-    ----------
-    g : DGLGraph
-        The input graph
-    new_node_ids : a tensor
-        The new node Ids
-    Returns
-    -------
-    DGLGraph
-        The graph with new node Ids.
-    """
-    assert len(new_node_ids) == g.number_of_nodes(), \
-            "The number of new node ids must match #nodes in the graph."
-    new_node_ids = utils.toindex(new_node_ids)
-    sorted_ids, idx = F.sort_1d(new_node_ids.tousertensor())
-    assert F.asnumpy(sorted_ids[0]) == 0 \
-            and F.asnumpy(sorted_ids[-1]) == g.number_of_nodes() - 1, \
-            "The new node Ids are incorrect."
-    new_gidx = _CAPI_DGLReorderGraph(g._graph, new_node_ids.todgltensor())
-    new_g = DGLGraphStale(new_gidx)
-    new_g.ndata['orig_id'] = idx
-    return new_g
-def compact_graphs(graphs, always_preserve=None):
    """Given a list of graphs with the same set of nodes, find and eliminate the common
    isolated nodes across all graphs.
@@ -1352,34 +1459,58 @@ def compact_graphs(graphs, always_preserve=None):
    It finds all the nodes that have zero in-degree and zero out-degree in all the given
    graphs, and eliminates them from all the graphs.
-    Useful for graph sampling where we have a giant graph but we only wish to perform
+    Useful for graph sampling where you have a giant graph but you only wish to perform
    message passing on a smaller graph with a (tiny) subset of nodes.
-    The node and edge features are not preserved.
    Parameters
    ----------
-    graphs : DGLHeteroGraph or list[DGLHeteroGraph]
+    graphs : DGLGraph or list[DGLGraph]
-        The graph, or list of graphs
+        The graph, or list of graphs.
+        All graphs must be on CPU.
+        All graphs must have the same set of nodes.
    always_preserve : Tensor or dict[str, Tensor], optional
        If a dict of node types and node ID tensors is given, the nodes of given
        node types would not be removed, regardless of whether they are isolated.
-        If a Tensor is given, assume that all the graphs have one (same) node type.
+        If a Tensor is given, DGL assumes that all the graphs have one (same) node type.
+    copy_ndata: bool, optional
+        If True, the node features of the returned graphs are copied from the
+        original graphs.
+        If False, the returned graphs will not have any node features.
+        (Default: True)
+    copy_edata: bool, optional
+        If True, the edge features of the reversed graph are copied from the
+        original graph.
+        If False, the reversed graph will not have any edge features.
+        (Default: True)
    Returns
    -------
-    DGLHeteroGraph or list[DGLHeteroGraph]
+    DGLGraph or list[DGLGraph]
        The compacted graph or list of compacted graphs.
        Each returned graph would have a feature ``dgl.NID`` containing the mapping
        of node IDs for each type from the compacted graph(s) to the original graph(s).
        Note that the mapping is the same for all the compacted graphs.
-    Bugs
+        All the returned graphs are on CPU.
-    ----
+    Notes
+    -----
    This function currently requires that the same node type of all graphs should have
    the same node type ID, i.e. the node types are *ordered* the same.
+    If :attr:`copy_edata` is True, same tensors will be used for
+    the features of the original graphs and the returned graphs to save memory cost.
+    As a result, users should avoid performing in-place operations on the edge features of
+    the returned graph, which will corrupt the edge features of the original graph as well.
    Examples
    --------
    The following code constructs a bipartite graph with 20 users and 10 games, but
@@ -1404,7 +1535,7 @@ def compact_graphs(graphs, always_preserve=None):
    (tensor([0, 1]), tensor([0, 1]), tensor([0, 1]))
    When compacting multiple graphs, nodes that do not have any connections in any
-    of the given graphs are removed.  So if we compact ``g`` and the following ``g2``
+    of the given graphs are removed.  So if you compact ``g`` and the following ``g2``
    graphs together:
    >>> g2 = dgl.bipartite([(1, 6), (6, 8)], 'user', 'plays', 'game', num_nodes=(20, 10))
@@ -1430,6 +1561,7 @@ def compact_graphs(graphs, always_preserve=None):
        return []
    if graphs[0].is_block:
        raise DGLError('Compacting a block graph is not allowed.')
+    assert all(g.device == F.cpu() for g in graphs), 'all the graphs must be on CPU'
    # Ensure the node types are ordered the same.
    # TODO(BarclayII): we ideally need to remove this constraint.
@@ -1469,70 +1601,60 @@ def compact_graphs(graphs, always_preserve=None):
    new_graphs = [
        DGLHeteroGraph(new_graph_index, graph.ntypes, graph.etypes)
        for new_graph_index, graph in zip(new_graph_indexes, graphs)]
-    for g in new_graphs:
-        for i, ntype in enumerate(graphs[0].ntypes):
+    if copy_ndata:
-            g.nodes[ntype].data[NID] = induced_nodes[i]
+        for g, new_g in zip(graphs, new_graphs):
+            node_frames = utils.extract_node_subframes(g, induced_nodes)
+            utils.set_new_frames(new_g, node_frames=node_frames)
+    if copy_edata:
+        for g, new_g in zip(graphs, new_graphs):
+            edge_frames = utils.extract_edge_subframes(g, None)
+            utils.set_new_frames(new_g, edge_frames=edge_frames)
    if return_single:
        new_graphs = new_graphs[0]
    return new_graphs
-def to_block(g, dst_nodes=None, include_dst_in_src=True, copy_ndata=True, copy_edata=True):
+def to_block(g, dst_nodes=None, include_dst_in_src=True):
-    """Convert a graph into a bipartite-structured "block" for message passing.
+    """Convert a graph into a bipartite-structured *block* for message passing.
-    A block graph is uni-directional bipartite graph consisting of two sets of nodes
-    SRC and DST. Each set can have many node types while all the edges are from SRC
-    nodes to DST nodes.
-    Specifically, for each relation graph of canonical edge type ``(utype, etype, vtype)``,
-    node type ``utype`` belongs to SRC while ``vtype`` belongs to DST.
-    Edges from node type ``utype`` to node type ``vtype`` are preserved. If
-    ``utype == vtype``, the result graph will have two node types of the same name ``utype``,
-    but one belongs to SRC while the other belongs to DST. This is because although
-    they have the same name, their node ids are relabeled differently (see below). In
-    both cases, the canonical edge type in the new graph is still
-    ``(utype, etype, vtype)``, so there is no difference when referring to it.
-    Moreover, the function also relabels node ids in each type to make the graph more compact.
+    A block is a graph consisting of two sets of nodes: the
-    Specifically, the nodes of type ``vtype`` would contain the nodes that have at least one
+    *input* nodes and *output* nodes.  The input and output nodes can have multiple
-    inbound edge of any type, while ``utype`` would contain all the DST nodes of type ``vtype``,
+    node types.  All the edges connect from input nodes to output nodes.
-    as well as the nodes that have at least one outbound edge to any DST node.
-    Since DST nodes are included in SRC nodes, a common requirement is to fetch
+    Specifically, the input nodes and output nodes will have the same node types as the
-    the DST node features from the SRC nodes features. To avoid expensive sparse lookup,
+    ones in the original graph.  DGL maps each edge ``(u, v)`` with edge type
-    the function assures that the DST nodes in both SRC and DST sets have the same ids.
+    ``(utype, etype, vtype)`` in the original graph to the edge with type
-    As a result, given the node feature tensor ``X`` of type ``utype``,
+    ``etype`` connecting from node ID ``u`` of type ``utype`` in the input side to node
-    the following code finds the corresponding DST node features of type ``vtype``:
+    ID ``v`` of type ``vtype`` in the output side.
-    .. code::
+    The output nodes of the block will only contain the nodes that have at least one
+    inbound edge of any type.  The input nodes of the block will only contain the nodes
+    that appear in the output nodes, as well as the nodes that have at least one outbound
+    edge connecting to one of the output nodes.
-        X[:block.number_of_nodes('DST/vtype')]
+    If the :attr:`dst_nodes` argument is not None, it specifies the output nodes instead.
-    If the ``dst_nodes`` argument is given, the DST nodes would contain the given nodes.
-    Otherwise, the DST nodes would be determined by DGL via the rules above.
    Parameters
    ----------
-    graph : DGLHeteroGraph
+    graph : DGLGraph
-        The graph.
+        The graph.  Must be on CPU.
    dst_nodes : Tensor or dict[str, Tensor], optional
-        Optional DST nodes. If a tensor is given, the graph must have only one node type.
+        The list of output nodes.
+        If a tensor is given, the graph must have only one node type.
+        If given, it must be a superset of all the nodes that have at least one inbound
+        edge.  An error will be raised otherwise.
    include_dst_in_src : bool
-        If False, do not include DST nodes in SRC nodes.
+        If False, do not include output nodes in input nodes.
-        (Default: True)
-    copy_ndata : bool, optional
-        If True, the source and destination node features of the block are copied from the
-        original graph.
-        If False, the block will not have any node features.
-        (Default: True)
-    copy_edata: bool, optional
-        If True, the edge features of the block are copied from the origianl graph.
-        If False, the simple graph will not have any edge features.
        (Default: True)
    Returns
    -------
-    DGLHeteroGraph
+    DGLBlock
        The new graph describing the block.
        The node IDs induced for each type in both sides would be stored in feature
@@ -1540,10 +1662,11 @@ def to_block(g, dst_nodes=None, include_dst_in_src=True, copy_ndata=True, copy_e
        The edge IDs induced for each type would be stored in feature ``dgl.EID``.
-    Notes
+    Raises
-    -----
+    ------
-    This function is primarily for creating the structures for efficient
+    DGLError
-    computation of message passing.  See [TODO] for a detailed example.
+        If :attr:`dst_nodes` is specified but it is not a superset of all the nodes that
+        have at least one inbound edge.
    Examples
    --------
@@ -1552,13 +1675,13 @@ def to_block(g, dst_nodes=None, include_dst_in_src=True, copy_ndata=True, copy_e
    >>> g = dgl.graph([(0, 1), (1, 2), (2, 3)])
    >>> block = dgl.to_block(g, torch.LongTensor([3, 2]))
-    The right hand side nodes would be exactly the same as the ones given: [3, 2].
+    The output nodes would be exactly the same as the ones given: [3, 2].
    >>> induced_dst = block.dstdata[dgl.NID]
    >>> induced_dst
    tensor([3, 2])
-    The first few nodes of the left hand side nodes would also be exactly the same as
+    The first few input nodes would also be exactly the same as
    the ones given.  The rest of the nodes are the ones necessary for message passing
    into nodes 3, 2.  This means that the node 1 would be included.
@@ -1566,15 +1689,15 @@ def to_block(g, dst_nodes=None, include_dst_in_src=True, copy_ndata=True, copy_e
    >>> induced_src
    tensor([3, 2, 1])
-    We can notice that the first two nodes are identical to the given nodes as well as
+    You can notice that the first two nodes are identical to the given nodes as well as
-    the right hand side nodes.
+    the output nodes.
    The induced edges can also be obtained by the following:
    >>> block.edata[dgl.EID]
    tensor([2, 1])
-    This indicates that edge (2, 3) and (1, 2) are included in the result graph.  We can
+    This indicates that edge (2, 3) and (1, 2) are included in the result graph.  You can
    verify that the first edge in the block indeed maps to the edge (2, 3), and the
    second edge in the block indeed maps to the edge (1, 2):
@@ -1583,12 +1706,12 @@ def to_block(g, dst_nodes=None, include_dst_in_src=True, copy_ndata=True, copy_e
    (tensor([2, 1]), tensor([3, 2]))
    Converting a heterogeneous graph to a block is similar, except that when specifying
-    the right hand side nodes, you have to give a dict:
+    the output nodes, you have to give a dict:
    >>> g = dgl.bipartite([(0, 1), (1, 2), (2, 3)], utype='A', vtype='B')
-    If you don't specify any node of type A on the right hand side, the node type ``A``
+    If you don't specify any node of type A on the output side, the node type ``A``
-    in the block would have zero nodes on the DST side.
+    in the block would have zero nodes on the output side.
    >>> block = dgl.to_block(g, {'B': torch.LongTensor([3, 2])})
    >>> block.number_of_dst_nodes('A')
@@ -1598,16 +1721,25 @@ def to_block(g, dst_nodes=None, include_dst_in_src=True, copy_ndata=True, copy_e
    >>> block.dstnodes['B'].data[dgl.NID]
    tensor([3, 2])
-    The left hand side would contain all the nodes on the right hand side:
+    The input side would contain all the nodes on the output side:
    >>> block.srcnodes['B'].data[dgl.NID]
    tensor([3, 2])
-    As well as all the nodes that have connections to the nodes on the right hand side:
+    As well as all the nodes that have connections to the nodes on the output side:
    >>> block.srcnodes['A'].data[dgl.NID]
    tensor([2, 1])
+    Notes
+    -----
+    :func:`to_block` is most commonly used in customizing neighborhood sampling
+    for stochastic training on a large graph.  Please refer to User Guide Chapter 6
+    for a more thorough discussion driven by the methodology of stochastic training on a
+    large graph.
    """
+    assert g.device == F.cpu(), 'the graph must be on CPU'
    if dst_nodes is None:
        # Find all nodes that appeared as destinations
        dst_nodes = defaultdict(list)
@@ -1630,7 +1762,7 @@ def to_block(g, dst_nodes=None, include_dst_in_src=True, copy_ndata=True, copy_e
    for ntype in g.ntypes:
        nodes = dst_nodes.get(ntype, None)
        if nodes is not None:
-            dst_nodes_nd.append(F.zerocopy_to_dgl_ndarray(nodes))
+            dst_nodes_nd.append(F.to_dgl_nd(nodes))
        else:
            dst_nodes_nd.append(nd.NULL[g._idtype_str])
@@ -1642,109 +1774,74 @@ def to_block(g, dst_nodes=None, include_dst_in_src=True, copy_ndata=True, copy_e
    new_graph = DGLBlock(new_graph_index, new_ntypes, g.etypes)
    assert new_graph.is_unibipartite  # sanity check
-    src_node_id = {
+    src_node_ids = [F.from_dgl_nd(src) for src in src_nodes_nd]
-        ntype: F.zerocopy_from_dgl_ndarray(src)
+    dst_node_ids = [F.from_dgl_nd(dst) for dst in dst_nodes_nd]
-        for ntype, src in zip(g.ntypes, src_nodes_nd)}
+    edge_ids = [F.from_dgl_nd(eid) for eid in induced_edges_nd]
-    dst_node_id = {
-        ntype: dst_nodes.get(ntype, F.tensor([], dtype=g.idtype))
-        for ntype in g.ntypes}
-    edge_id = {
-        canonical_etype: F.zerocopy_from_dgl_ndarray(edges)
-        for canonical_etype, edges in zip(g.canonical_etypes, induced_edges_nd)}
-    if copy_ndata:
-        for ntype in g.ntypes:
-            src = src_node_id[ntype]
-            dst = dst_node_id[ntype]
-            for key, value in g.nodes[ntype].data.items():
-                if is_internal_column(key):
-                    continue
-                ctx = F.context(value)
-                new_graph.srcnodes[ntype].data[key] = F.gather_row(value, F.copy_to(src, ctx))
-                new_graph.dstnodes[ntype].data[key] = F.gather_row(value, F.copy_to(dst, ctx))
-    if copy_edata:
-        for canonical_etype in g.canonical_etypes:
-            eid = edge_id[canonical_etype]
-            for key, value in g.edges[canonical_etype].data.items():
-                if is_internal_column(key):
-                    continue
-                ctx = F.context(value)
-                new_graph.edges[canonical_etype].data[key] = F.gather_row(
-                    value, F.copy_to(eid, ctx))
-    for i, ntype in enumerate(g.ntypes):
-        new_graph.srcnodes[ntype].data[NID] = F.zerocopy_from_dgl_ndarray(src_nodes_nd[i])
-        if ntype in dst_nodes:
-            new_graph.dstnodes[ntype].data[NID] = dst_nodes[ntype]
-        else:
-            # For empty dst node sets, still create empty mapping arrays.
-            new_graph.dstnodes[ntype].data[NID] = F.tensor([], dtype=g.idtype)
-    for i, canonical_etype in enumerate(g.canonical_etypes):
+    node_frames = utils.extract_node_subframes_for_block(g, src_node_ids, dst_node_ids)
-        induced_edges = F.zerocopy_from_dgl_ndarray(induced_edges_nd[i])
+    edge_frames = utils.extract_edge_subframes(g, edge_ids)
-        new_graph.edges[canonical_etype].data[EID] = induced_edges
+    utils.set_new_frames(new_graph, node_frames=node_frames, edge_frames=edge_frames)
    return new_graph
 def to_simple(g, return_counts='count', writeback_mapping=False, copy_ndata=True, copy_edata=False):
-    r"""Convert a graph to a simple graph without duplicate edges.
+    r"""Convert a graph to a simple graph, removing the parallel edges.
-    For a heterograph with multiple edge types, we
-    treat edges corresponding
-    to each type as a separate graph and convert each
-    of them to a simple graph.
-    When writeback_mapping=True, an extra mapping is returned.
+    For a heterogeneous graph with multiple edge types, DGL removes the parallel edges
-    For the edges in the original graph,
+    with the same edge type.
-    a writeback mapping is a tensor recording their new
-    ids in the simple graph. If the graph has
-    only one edge type, a single tensor is returned.
-    If the graph has multiple edge types, a dictionary
-    of tensor is returned using canonical edge types
-    as the key.
-    Given a :class:`dgl.DGLGraph` object, we return
-    another :class:`dgl.DGLGraph` object representing the
-    simple graph corresponding to it.
+    Optionally, the number of parallel edges and/or the mapping from the edges in the simple graph
+    to the edges in the original graph is returned.
    Parameters
    ----------
    g : DGLGraph
-        The input graph.
+        The input graph.  Must be on CPU.
    return_counts : str, optional
        If given, the count of each edge in the original graph
        will be stored as edge features under the name
-        eturn_counts.
+        ``return_counts``.  The old features with the same name will be replaced.
        (Default: "count")
    writeback_mapping: bool, optional
-        If True, a write back mapping is returned for each edge
+        If True, a write-back mapping is returned for each edge
-        type subgraph. If False, only the simple graph is returned.
+        type subgraph.  The write-back mapping is a tensor recording
+        the mapping from the IDs of the edges in the new graph to
+        the IDs of the edges in the original graph.  If the graph is
+        heterogeneous, DGL returns a dictionary of edge types and such
+        tensors.
+        If False, only the simple graph is returned.
        (Default: False)
    copy_ndata: bool, optional
        If True, the node features of the simple graph are copied
-        from the original graph. If False, the simple
+        from the original graph.
-        graph will not have any node features.
+        If False, the simple graph will not have any node features.
        (Default: True)
    copy_edata: bool, optional
        If True, the edge features of the simple graph are copied
        from the original graph. If there exists duplicate edges between
        two nodes (u, v), the feature of the edge is randomly selected
        from one of the duplicate edges.
        If False, the simple graph will not have any edge features.
        (Default: False)
    Returns
    -------
    DGLGraph
-        A simple graph.
+        The graph.
    tensor or dict of tensor
-        If writeback_mapping is True, the writeback
+        The writeback mapping.
-        mapping is returned. If the graph has only
-        one edge type, a tensor is returned. If the
+        Only returned if ``writeback_mapping`` is True.
-        graph has multiple edge types, a dictionary
-        of tensor is return.
+    Notes
+    -----
    If ``copy_ndata`` is ``True``, same tensors will be used for
    the features of the original graph and the to_simpled graph. As a result, users
    should avoid performing in-place operations on the features of the to_simpled
@@ -1753,7 +1850,7 @@ def to_simple(g, return_counts='count', writeback_mapping=False, copy_ndata=True
    Examples
    --------
-    **Homographs or Heterographs with A Single Edge Type**
+    **Homogeneous Graphs or Heterogeneous Graphs with A Single Edge Type**
    Create a graph for demonstrating to_simple API.
    In the original graph, there are multiple edges between 1 and 2.
@@ -1804,20 +1901,16 @@ def to_simple(g, return_counts='count', writeback_mapping=False, copy_ndata=True
    >>> 'h2' in g.ndata
    False
-    **Heterographs with Multiple Edge Types**
+    **Heterogeneous Graphs with Multiple Edge Types**
    >>> g = dgl.heterograph({
-    >>>     ('user', 'wins', 'user'): (th.tensor([0, 2, 0, 2, 2]), th.tensor([1, 1, 2, 1, 0])),
+    ...     ('user', 'wins', 'user'): (th.tensor([0, 2, 0, 2, 2]), th.tensor([1, 1, 2, 1, 0])),
-    >>>     ('user', 'plays', 'game'): (th.tensor([1, 2, 1]), th.tensor([2, 1, 1]))
+    ...     ('user', 'plays', 'game'): (th.tensor([1, 2, 1]), th.tensor([2, 1, 1]))
-    >>> })
+    ... })
    >>> g.nodes['game'].data['hv'] = th.ones(3, 1)
    >>> g.edges['plays'].data['he'] = th.zeros(3, 1)
-    The to_simple operation is applied to the subgraph
+    The return counts is stored in the default edge feature 'count' for each edge type.
-    corresponding to ('user', 'wins', 'user') and the
-    subgraph corresponding to ('user', 'plays', 'game').
-    The return counts is stored in the default edge feature
-    'count'.
    >>> sg, wm = dgl.to_simple(g, copy_ndata=False, writeback_mapping=True)
    >>> sg
@@ -1840,31 +1933,30 @@ def to_simple(g, return_counts='count', writeback_mapping=False, copy_ndata=True
    {('user', 'wins', 'user'): tensor([1, 2, 1, 1])
     ('user', 'plays', 'game'): tensor([1, 1, 1])}
    """
+    assert g.device == F.cpu(), 'the graph must be on CPU'
    if g.is_block:
        raise DGLError('Cannot convert a block graph to a simple graph.')
    simple_graph_index, counts, edge_maps = _CAPI_DGLToSimpleHetero(g._graph)
    simple_graph = DGLHeteroGraph(simple_graph_index, g.ntypes, g.etypes)
-    counts = [F.zerocopy_from_dgl_ndarray(count) for count in counts]
+    counts = [F.from_dgl_nd(count) for count in counts]
-    edge_maps = [F.zerocopy_from_dgl_ndarray(edge_map) for edge_map in edge_maps]
+    edge_maps = [F.from_dgl_nd(edge_map) for edge_map in edge_maps]
-    if return_counts is not None:
-        for count, canonical_etype in zip(counts, g.canonical_etypes):
-            simple_graph.edges[canonical_etype].data[return_counts] = count
    if copy_ndata:
-        for ntype in g.ntypes:
+        node_frames = utils.extract_node_subframes(g, None)
-            for key in g.nodes[ntype].data:
+        utils.set_new_frames(simple_graph, node_frames=node_frames)
-                simple_graph.nodes[ntype].data[key] = g.nodes[ntype].data[key]
    if copy_edata:
-        for i, c_etype in enumerate(g.canonical_etypes):
+        eids = []
-            for key in g.edges[c_etype].data:
+        for i in range(len(g.canonical_etypes)):
            feat_idx = F.asnumpy(edge_maps[i])
            _, indices = np.unique(feat_idx, return_index=True)
-                simple_graph.edges[c_etype].data[key] = \
+            eids.append(F.zerocopy_from_numpy(indices))
-                    F.gather_row(g.edges[c_etype].data[key],
-                                 F.copy_to(F.tensor(indices),
+        edge_frames = utils.extract_edge_subframes(g, eids)
-                                           F.context(g.edges[c_etype].data[key])))
+        utils.set_new_frames(simple_graph, edge_frames=edge_frames)
+    if return_counts is not None:
+        for count, canonical_etype in zip(counts, g.canonical_etypes):
+            simple_graph.edges[canonical_etype].data[return_counts] = count
    if writeback_mapping:
        # single edge type

--- a/python/dgl/utils/internal.py
+++ b/python/dgl/utils/internal.py
@@ -754,8 +754,8 @@ def relabel(x):
                               F.copy_to(F.arange(0, len(unique_x), dtype), ctx))
    return unique_x, old_to_new
-def extract_subframes(graph, nodes, edges):
+def extract_node_subframes(graph, nodes):
-    """Extract node/edge features of the given nodes and edges from :attr:`graph`
+    """Extract node features of the given nodes from :attr:`graph`
    and return them in frames.
    Note that this function does not perform actual tensor memory copy but using `Frame.subframe`
@@ -771,17 +771,11 @@ def extract_subframes(graph, nodes, edges):
        Node IDs. If not None, the list length must be equal to the number of node types
        in the graph. The returned frames store the node IDs in the ``dgl.NID`` field
        unless it is None, which means the whole frame is shallow-copied.
-    edges : list[Tensor] or None
-        Edge IDs. If not None, the list length must be equal to the number of edge types
-        in the graph. The returned frames store the edge IDs in the ``dgl.NID`` field
-        unless it is None, which means the whole frame is shallow-copied.
    Returns
    -------
    list[Frame]
        Extracted node frames.
-    list[Frame]
-        Extracted edge frames.
    """
    if nodes is None:
        node_frames = [nf.clone() for nf in graph._node_frames]
@@ -791,6 +785,67 @@ def extract_subframes(graph, nodes, edges):
            subf = graph._node_frames[i].subframe(ind_nodes)
            subf[NID] = ind_nodes
            node_frames.append(subf)
+    return node_frames
+def extract_node_subframes_for_block(graph, srcnodes, dstnodes):
+    """Extract the input node features and output node features of the given nodes from
+    :attr:`graph` and return them in frames ready for a block.
+    Note that this function does not perform actual tensor memory copy but using `Frame.subframe`
+    to get the features. If :attr:`srcnodes` or :attr:`dstnodes` is None, it performs a
+    shallow copy of the original node frames that only copies the dictionary structure
+    but not the tensor contents.
+    Parameters
+    ----------
+    graph : DGLGraph
+        The graph to extract features from.
+    srcnodes : list[Tensor]
+        Input node IDs. The list length must be equal to the number of node types
+        in the graph. The returned frames store the node IDs in the ``dgl.NID`` field.
+    dstnodes : list[Tensor]
+        Output node IDs. The list length must be equal to the number of node types
+        in the graph. The returned frames store the node IDs in the ``dgl.NID`` field.
+    Returns
+    -------
+    list[Frame]
+        Extracted node frames.
+    """
+    node_frames = []
+    for i, ind_nodes in enumerate(srcnodes):
+        subf = graph._node_frames[i].subframe(ind_nodes)
+        subf[NID] = ind_nodes
+        node_frames.append(subf)
+    for i, ind_nodes in enumerate(dstnodes):
+        subf = graph._node_frames[i].subframe(ind_nodes)
+        subf[NID] = ind_nodes
+        node_frames.append(subf)
+    return node_frames
+def extract_edge_subframes(graph, edges):
+    """Extract edge features of the given edges from :attr:`graph`
+    and return them in frames.
+    Note that this function does not perform actual tensor memory copy but using `Frame.subframe`
+    to get the features. If :attr:`edges` is None, it performs a shallow copy of the
+    original edge frames that only copies the dictionary structure but not the tensor
+    contents.
+    Parameters
+    ----------
+    graph : DGLGraph
+        The graph to extract features from.
+    edges : list[Tensor] or None
+        Edge IDs. If not None, the list length must be equal to the number of edge types
+        in the graph. The returned frames store the edge IDs in the ``dgl.NID`` field
+        unless it is None, which means the whole frame is shallow-copied.
+    Returns
+    -------
+    list[Frame]
+        Extracted edge frames.
+    """
    if edges is None:
        edge_frames = [nf.clone() for nf in graph._edge_frames]
    else:
@@ -799,7 +854,32 @@ def extract_subframes(graph, nodes, edges):
            subf = graph._edge_frames[i].subframe(ind_edges)
            subf[EID] = ind_edges
            edge_frames.append(subf)
-    return node_frames, edge_frames
+    return edge_frames
+def set_new_frames(graph, *, node_frames=None, edge_frames=None):
+    """Set the node and edge frames of a given graph to new ones.
+    Parameters
+    ----------
+    graph : DGLGraph
+        The graph whose node and edge frames are to be updated.
+    node_frames : list[Frame], optional
+        New node frames.
+        Default is None, where the node frames are not updated.
+    edge_frames : list[Frame], optional
+        New edge frames
+        Default is None, where the edge frames are not updated.
+    """
+    if node_frames is not None:
+        assert len(node_frames) == len(graph.ntypes), \
+            "[BUG] number of node frames different from number of node types"
+        graph._node_frames = node_frames
+    if edge_frames is not None:
+        assert len(edge_frames) == len(graph.etypes), \
+            "[BUG] number of edge frames different from number of edge types"
+        graph._edge_frames = edge_frames
 def set_num_threads(num_threads):
    """Set the number of OMP threads in the process.

--- a/tests/compute/test_transform.py
+++ b/tests/compute/test_transform.py
@@ -4,6 +4,7 @@ import numpy as np
 import os
 import dgl
 import dgl.function as fn
+import dgl.partition
 import backend as F
 from dgl.graph_index import from_scipy_sparse_matrix
 import unittest
@@ -329,8 +330,8 @@ def test_add_reverse_edges():
    ub, vb = bg.all_edges(order='eid', etype=('user', 'plays', 'game'))
    assert F.array_equal(u, ub)
    assert F.array_equal(v, vb)
-    assert len(bg.edges['plays'].data) == 0
+    assert set(bg.edges['plays'].data.keys()) == {dgl.EID}
-    assert len(bg.edges['follows'].data) == 0
+    assert set(bg.edges['follows'].data.keys()) == {dgl.EID}
    # donot share ndata and edata
    bg = dgl.add_reverse_edges(g, copy_ndata=False, copy_edata=False, ignore_bipartite=True)
@@ -448,7 +449,7 @@ def test_khop_adj():
    feat = F.randn((N, 5))
    g = dgl.DGLGraph(nx.erdos_renyi_graph(N, 0.3))
    for k in range(3):
-        adj = F.tensor(dgl.khop_adj(g, k))
+        adj = F.tensor(F.swapaxes(dgl.khop_adj(g, k), 0, 1))
        # use original graph to do message passing for k times.
        g.ndata['h'] = feat
        for _ in range(k):
@@ -484,6 +485,7 @@ def create_large_graph(num_nodes):
    row = np.random.choice(num_nodes, num_nodes * 10)
    col = np.random.choice(num_nodes, num_nodes * 10)
    spm = spsp.coo_matrix((np.ones(len(row)), (row, col)))
+    spm.sum_duplicates()
    return dgl.graph(spm)
@@ -495,6 +497,7 @@ def get_nodeflow(g, node_ids, num_layers):
            seed_nodes=node_ids)
    return next(iter(sampler))
+# Disabled since everything will be on heterogeneous graphs
 @unittest.skipIf(F._default_context_str == 'gpu', reason="GPU not implemented")
 def test_partition_with_halo():
    g = create_large_graph(1000)