[Doc] Scan the API docs and make many changes (#2080)

* WIP: api * dgl.sampling, dgl.data * dgl.sampling; dgl.dataloading * sampling packages * convert * subgraph * deprecate * subgraph APIs * All docstrings for convert/subgraph/transform * almost all funcs under dgl namespace * WIP: DGLGraph * done graph query * message passing functions * lint * fix merge error * fix test * lint * fix Co-authored-by: Quan Gan <coin2028@hotmail.com>

[Doc] Scan the API docs and make many changes (#2080)
* WIP: api * dgl.sampling, dgl.data * dgl.sampling; dgl.dataloading * sampling packages * convert * subgraph * deprecate * subgraph APIs * All docstrings for convert/subgraph/transform * almost all funcs under dgl namespace * WIP: DGLGraph * done graph query * message passing functions * lint * fix merge error * fix test * lint * fix Co-authored-by: Quan Gan <coin2028@hotmail.com>
f13b9b62 · Minjie Wang · GitHub · 35e25914 · f13b9b62 · f13b9b62
Unverified Commit f13b9b62 authored Aug 20, 2020 by Minjie Wang Committed by GitHub Aug 20, 2020
11 changed files
--- a/python/dgl/data/graph_serialize.py
+++ b/python/dgl/data/graph_serialize.py
 """For Graph Serialization"""
 from __future__ import absolute_import
 import os
-from ..base import dgl_warning
+from ..base import dgl_warning, DGLError
 from ..heterograph import DGLHeteroGraph
 from .._ffi.object import ObjectBase, register_object
 from .._ffi.function import _init_api
@@ -66,16 +66,23 @@ class GraphData(ObjectBase):
 def save_graphs(filename, g_list, labels=None):
-    r"""
+    r"""Save graphs and optionally their labels to file.
-    Save DGLGraphs and graph labels to file
+    Besides saving to local files, DGL supports writing the graphs directly
+    to S3 (by providing a ``"s3://..."`` path) or to HDFS (by providing
+    ``"hdfs://..."`` a path).
+    The function saves both the graph structure and node/edge features to file
+    in DGL's own binary format. For graph-level features, pass them via
+    the :attr:`labels` argument.
    Parameters
    ----------
    filename : str
-        File name to store graphs.
+        The file name to store the graphs and labels.
    g_list: list
-        DGLGraph or list of DGLGraph/DGLHeteroGraph
+        The graphs to be saved.
-    labels: dict[str, tensor]
+    labels: dict[str, Tensor]
        labels should be dict of tensors, with str as keys
    Examples
@@ -83,7 +90,7 @@ def save_graphs(filename, g_list, labels=None):
    >>> import dgl
    >>> import torch as th
-    Create :code:`DGLGraph`/:code:`DGLHeteroGraph` objects and initialize node
+    Create :class:`DGLGraph` objects and initialize node
    and edge features.
    >>> g1 = dgl.graph(([0, 1, 2], [1, 2, 3]))
@@ -96,55 +103,66 @@ def save_graphs(filename, g_list, labels=None):
    >>> graph_labels = {"glabel": th.tensor([0, 1])}
    >>> save_graphs("./data.bin", [g1, g2], graph_labels)
+    See Also
+    --------
+    load_graphs
    """
    # if it is local file, do some sanity check
    if filename.startswith('s3://') is False:
-        assert not os.path.isdir(filename), "filename {} is an existing directory.".format(filename)
+        if os.path.isdir(filename):
+            raise DGLError("Filename {} is an existing directory.".format(filename))
        f_path, _ = os.path.split(filename)
        if not os.path.exists(f_path):
            os.makedirs(f_path)
    g_sample = g_list[0] if isinstance(g_list, list) else g_list
-    if type(g_sample) == DGLHeteroGraph: # Doesn't support DGLHeteroGraph's derived class
+    if type(g_sample) == DGLHeteroGraph:  # Doesn't support DGLHeteroGraph's derived class
        save_heterographs(filename, g_list, labels)
    else:
-        raise Exception(
+        raise DGLError(
-            "Invalid argument g_list. Must be a DGLGraph or a list of DGLGraphs/DGLHeteroGraphs")
+            "Invalid argument g_list. Must be a DGLGraph or a list of DGLGraphs.")
 def load_graphs(filename, idx_list=None):
-    """
+    """Load graphs and optionally their labels from file saved by :func:`save_graphs`.
-    Load DGLGraphs from file
+    Besides loading from local files, DGL supports loading the graphs directly
+    from S3 (by providing a ``"s3://..."`` path) or from HDFS (by providing
+    ``"hdfs://..."`` a path).
    Parameters
    ----------
    filename: str
-        filename to load graphs
+        The file name to load graphs from.
-    idx_list: list of int
+    idx_list: list[int], optional
-        list of index of graph to be loaded. If not specified, will
+        The indices of the graphs to be loaded if the file contains multiple graphs.
-        load all graphs from file
+        Default is loading all the graphs stored in the file.
    Returns
    --------
-    graph_list: list of DGLGraphs / DGLHeteroGraph
+    graph_list: list[DGLGraph]
        The loaded graphs.
    labels: dict[str, Tensor]
        The graph labels stored in file. If no label is stored, the dictionary is empty.
-        Regardless of whether the ``idx_list`` argument is given or not, the returned dictionary
+        Regardless of whether the ``idx_list`` argument is given or not,
-        always contains labels of all the graphs.
+        the returned dictionary always contains the labels of all the graphs.
    Examples
    ----------
-    Following the example in save_graphs.
+    Following the example in :func:`save_graphs`.
    >>> from dgl.data.utils import load_graphs
    >>> glist, label_dict = load_graphs("./data.bin") # glist will be [g1, g2]
    >>> glist, label_dict = load_graphs("./data.bin", [0]) # glist will be [g1]
+    See Also
+    --------
+    save_graphs
    """
    # if it is local file, do some sanity check
-    assert filename.startswith('s3://') or os.path.exists(filename), "file {} does not exist.".format(filename)
+    if not (filename.startswith('s3://') or os.path.exists(filename)):
+        raise DGLError("File {} does not exist.".format(filename))
    version = _CAPI_GetFileVersion(filename)
    if version == 1:
@@ -155,7 +173,7 @@ def load_graphs(filename, idx_list=None):
    elif version == 2:
        return load_graph_v2(filename, idx_list)
    else:
-        raise Exception("Invalid DGL Version Number")
+        raise DGLError("Invalid DGL Version Number.")
 def load_graph_v2(filename, idx_list=None):

--- a/python/dgl/dataloading/__init__.py
+++ b/python/dgl/dataloading/__init__.py
-"""Classes that involves iterating over nodes or edges in a graph and generates
+"""The ``dgl.dataloading`` package contains:
-computation dependency of necessary nodes with neighborhood sampling methods.
-This includes
+* Data loader classes for iterating over a set of nodes or edges in a graph and generates
+  computation dependency via neighborhood sampling methods.
-* :py:class:`~dgl.dataloading.pytorch.NodeDataLoader` for iterating over the nodes in
-  a graph in minibatches.
-* :py:class:`~dgl.dataloading.pytorch.EdgeDataLoader` for iterating over the edges in
-  a graph in minibatches.
 * Various sampler classes that perform neighborhood sampling for multi-layer GNNs.
 * Negative samplers for link prediction.
-NOTE: this module is experimental and the interfaces may be subject to changes in
+For a holistic explanation on how different components work together.
-future releases.
+Read the user guide :ref:`guide-minibatch`.
+.. note::
+    This package is experimental and the interfaces may be subject
+    to changes in future releases. It currently only has implementations in PyTorch.
 """
 from .neighbor import *
 from .dataloader import *

--- a/python/dgl/generators.py
+++ b/python/dgl/generators.py
 """Module for various graph generator functions."""
-# pylint: disable= dangerous-default-value
 from . import backend as F
 from . import convert
@@ -7,13 +6,14 @@ from . import random
 __all__ = ['rand_graph', 'rand_bipartite']
-def rand_graph(num_nodes, num_edges, idtype=F.int64, device=F.cpu(),
+def rand_graph(num_nodes, num_edges, idtype=F.int64, device=F.cpu()):
-               formats=['coo', 'csr', 'csc']):
+    """Generate a random graph of the given number of nodes/edges and return.
-    """Generate a random graph of the given number of nodes/edges.
-    It uniformly chooses ``num_edges`` from all pairs and form a graph.
+    It uniformly chooses ``num_edges`` from all possible node pairs and form a graph.
+    The random choice is without replacement, which means there will be no multi-edge
+    in the resulting graph.
-    TODO(minjie): support RNG as one of the arguments.
+    To control the randomness, set the random seed via :func:`dgl.seed`.
    Parameters
    ----------
@@ -22,34 +22,51 @@ def rand_graph(num_nodes, num_edges, idtype=F.int64, device=F.cpu(),
    num_edges : int
        The number of edges
    idtype : int32, int64, optional
-        Integer ID type. Must be int32 or int64. Default: int64.
+        The data type for storing the structure-related graph information
+        such as node and edge IDs. It should be a framework-specific data type object
+        (e.g., torch.int32). By default, DGL uses int64.
    device : Device context, optional
-        Device on which the graph is created. Default: CPU.
+        The device of the resulting graph. It should be a framework-specific device
-    formats : str or list of str
+        object (e.g., torch.device). By default, DGL stores the graph on CPU.
-        It can be ``'coo'``/``'csr'``/``'csc'`` or a sublist of them,
-        Force the storage formats.  Default: ``['coo', 'csr', 'csc']``.
    Returns
    -------
-    DGLHeteroGraph
+    DGLGraph
-        Generated random graph.
+        The generated random graph.
+    See Also
+    --------
+    rand_bipartite
+    Examples
+    --------
+    >>> import dgl
+    >>> dgl.rand_graph(100, 10)
+    Graph(num_nodes=100, num_edges=10,
+          ndata_schemes={}
+          edata_schemes={})
    """
+    #TODO(minjie): support RNG as one of the arguments.
    eids = random.choice(num_nodes * num_nodes, num_edges, replace=False)
-    rows = F.copy_to(F.astype(eids / num_nodes, idtype), device)
+    eids = F.zerocopy_to_numpy(eids)
-    cols = F.copy_to(F.astype(eids % num_nodes, idtype), device)
+    rows = F.zerocopy_from_numpy(eids // num_nodes)
-    g = convert.graph((rows, cols),
+    cols = F.zerocopy_from_numpy(eids % num_nodes)
-                      num_nodes=num_nodes,
+    rows = F.copy_to(F.astype(rows, idtype), device)
-                      idtype=idtype, device=device)
+    cols = F.copy_to(F.astype(cols, idtype), device)
-    return g.formats(formats)
+    return convert.graph((rows, cols),
+                         num_nodes=num_nodes,
+                         idtype=idtype, device=device)
 def rand_bipartite(utype, etype, vtype,
                   num_src_nodes, num_dst_nodes, num_edges,
-                   idtype=F.int64, device=F.cpu(),
+                   idtype=F.int64, device=F.cpu()):
-                   formats=['csr', 'coo', 'csc']):
+    """Generate a random uni-directional bipartite graph and return.
-    """Generate a random bipartite graph of the given number of src/dst nodes and
-    number of edges.
+    It uniformly chooses ``num_edges`` from all possible node pairs and form a graph.
+    The random choice is without replacement, which means there will be no multi-edge
+    in the resulting graph.
-    It uniformly chooses ``num_edges`` from all pairs and form a graph.
+    To control the randomness, set the random seed via :func:`dgl.seed`.
    Parameters
    ----------
@@ -60,28 +77,43 @@ def rand_bipartite(utype, etype, vtype,
    vtype : str, optional
        The name of the destination node type.
    num_src_nodes : int
-        The number of source nodes, the :math:`|U|` in :math:`G=(U,V,E)`.
+        The number of source nodes.
    num_dst_nodes : int
-        The number of destination nodes, the :math:`|V|` in :math:`G=(U,V,E)`.
+        The number of destination nodes.
    num_edges : int
        The number of edges
    idtype : int32, int64, optional
-        Integer ID type. Must be int32 or int64. Default: int64.
+        The data type for storing the structure-related graph information
+        such as node and edge IDs. It should be a framework-specific data type object
+        (e.g., torch.int32). By default, DGL uses int64.
    device : Device context, optional
-        Device on which the graph is created. Default: CPU.
+        The device of the resulting graph. It should be a framework-specific device
-    formats : str or list of str
+        object (e.g., torch.device). By default, DGL stores the graph on CPU.
-        It can be ``'coo'``/``'csr'``/``'csc'`` or a sublist of them,
-        Force the storage formats.  Default: ``['coo', 'csr', 'csc']``.
    Returns
    -------
-    DGLHeteroGraph
+    DGLGraph
-        Generated random bipartite graph.
+        The generated random bipartite graph.
+    See Also
+    --------
+    rand_graph
+    Examples
+    --------
+    >>> import dgl
+    >>> dgl.rand_bipartite('user', 'buys', 'game', 50, 100, 10)
+    Graph(num_nodes={'game': 100, 'user': 50},
+          num_edges={('user', 'buys', 'game'): 10},
+          metagraph=[('user', 'game', 'buys')])
    """
+    #TODO(minjie): support RNG as one of the arguments.
    eids = random.choice(num_src_nodes * num_dst_nodes, num_edges, replace=False)
-    rows = F.copy_to(F.astype(eids / num_dst_nodes, idtype), device)
+    eids = F.zerocopy_to_numpy(eids)
-    cols = F.copy_to(F.astype(eids % num_dst_nodes, idtype), device)
+    rows = F.zerocopy_from_numpy(eids // num_dst_nodes)
-    g = convert.heterograph({(utype, etype, vtype): (rows, cols)},
+    cols = F.zerocopy_from_numpy(eids % num_dst_nodes)
-                            {utype: num_src_nodes, vtype: num_dst_nodes},
+    rows = F.copy_to(F.astype(rows, idtype), device)
-                            idtype=idtype, device=device)
+    cols = F.copy_to(F.astype(cols, idtype), device)
-    return g.formats(formats)
+    return convert.heterograph({(utype, etype, vtype): (rows, cols)},
+                               {utype: num_src_nodes, vtype: num_dst_nodes},
+                               idtype=idtype, device=device)
--- a/python/dgl/heterograph.py
+++ b/python/dgl/heterograph.py
 """Classes for heterogeneous graphs."""
 #pylint: disable= too-many-lines
-from collections import defaultdict, Iterable
+from collections import defaultdict
-from collections.abc import Mapping
+from collections.abc import Mapping, Iterable
 from contextlib import contextmanager
 import copy
 import numbers
@@ -21,158 +21,17 @@ from .view import HeteroNodeView, HeteroNodeDataView, HeteroEdgeView, HeteroEdge
 __all__ = ['DGLHeteroGraph', 'combine_names']
 class DGLHeteroGraph(object):
-    """Base heterogeneous graph class.
+    """Class for storing graph structure and node/edge feature data.
-    **Do NOT instantiate from this class directly; use** :mod:`conversion methods
-    <dgl.convert>` **instead.**
-    A Heterogeneous graph is defined as a graph with node types and edge
-    types.
-    If two edges share the same edge type, then their source nodes, as well
-    as their destination nodes, also have the same type (the source node
-    types don't have to be the same as the destination node types).
-    Examples
-    --------
-    Suppose that we want to construct the following heterogeneous graph:
-    .. graphviz::
-       digraph G {
-           Alice -> Bob [label=follows]
-           Bob -> Carol [label=follows]
-           Alice -> Tetris [label=plays]
-           Bob -> Tetris [label=plays]
-           Bob -> Minecraft [label=plays]
-           Carol -> Minecraft [label=plays]
-           Nintendo -> Tetris [label=develops]
-           Mojang -> Minecraft [label=develops]
-           {rank=source; Alice; Bob; Carol}
-           {rank=sink; Nintendo; Mojang}
-       }
-    And suppose that one maps the users, games and developers to the following
-    IDs:
-    =========  =====  ===  =====
-    User name  Alice  Bob  Carol
-    =========  =====  ===  =====
-    User ID    0      1    2
-    =========  =====  ===  =====
-    =========  ======  =========
-    Game name  Tetris  Minecraft
-    =========  ======  =========
-    Game ID    0       1
-    =========  ======  =========
-    ==============  ========  ======
-    Developer name  Nintendo  Mojang
-    ==============  ========  ======
-    Developer ID    0         1
-    ==============  ========  ======
-    One can construct the graph as follows:
-    >>> g = dgl.heterograph({
-    ...     ('user', 'follows', 'user'): ([0, 1], [1, 2]),
-    ...     ('user', 'plays', 'game'): ([0, 1, 1, 2], [0, 0, 1, 1]),
-    ...     ('developer', 'develops', 'game'): ([0, 1], [0, 1]),
-    ...     })
-    Then one can query the graph structure by specifying the ``ntype`` or ``etype`` arguments:
-    >>> g.number_of_nodes('user')
-    3
-    >>> g.number_of_edges('plays')
-    4
-    >>> g.out_degrees(etype='develops')  # out-degrees of source nodes of 'develops' edge type
-    tensor([1, 1])
-    >>> g.in_edges(0, etype='develops')  # in-edges of destination node 0 of 'develops' edge type
-    (tensor([0]), tensor([0]))
-    Or on the sliced graph for an edge type:
-    >>> g['plays'].number_of_edges()
-    4
-    >>> g['develops'].out_degrees()
-    tensor([1, 1])
-    >>> g['develops'].in_edges(0)
-    (tensor([0]), tensor([0]))
-    Node type names must be distinct (no two types have the same name). Edge types could
-    have the same name but they must be distinguishable by the ``(src_type, edge_type, dst_type)``
-    triplet (called *canonical edge type*).
-    For example, suppose a graph that has two types of relation "user-watches-movie"
-    and "user-watches-TV" as follows:
-    >>> GG = dgl.heterograph({
-    ...     ('user', 'watches', 'movie'): ([0, 1, 1], [1, 0, 1]),
-    ...     ('user', 'watches', 'TV'): ([0, 1], [0, 1])
-    ... })
-    To distinguish between the two "watches" edge type, one must specify a full triplet:
-    >>> GG.number_of_edges(('user', 'watches', 'movie'))
-    3
-    >>> GG.number_of_edges(('user', 'watches', 'TV'))
-    2
-    >>> GG['user', 'watches', 'movie'].out_degrees()
-    tensor([1, 2])
-    Using only one single edge type string "watches" is ambiguous and will cause error:
-    >>> GG.number_of_edges('watches')  # AMBIGUOUS!!
-    In many cases, there is only one type of nodes or one type of edges, and the ``ntype``
-    and ``etype`` argument could be omitted. This is very common when using the sliced
-    graph, which usually contains only one edge type, and sometimes only one node type:
-    >>> g['follows'].number_of_nodes()  # OK!! because g['follows'] only has one node type 'user'
-    3
-    >>> g['plays'].number_of_nodes()  # ERROR!! There are two types 'user' and 'game'.
-    >>> g['plays'].number_of_edges()  # OK!! because there is only one edge type 'plays'
-    TODO(minjie): docstring about uni-directional bipartite graph
-    Metagraph
-    ---------
-    For each heterogeneous graph, one can often infer the *metagraph*, the template of
-    edge connections showing how many types of nodes and edges exist in the graph, and
-    how each edge type could connect between node types.
-    One can analyze the example gameplay graph above and figure out the metagraph as
-    follows:
-    .. graphviz::
-       digraph G {
-           User -> User [label=follows]
-           User -> Game [label=plays]
-           Developer -> Game [label=develops]
-       }
+    There are a few ways to create create a DGLGraph:
-    Parameters
+    * To create a homogeneous graph from Tensor data, use :func:`dgl.graph`.
-    ----------
+    * To create a heterogeneous graph from Tensor data, use :func:`dgl.heterograph`.
-    gidx : HeteroGraphIndex
+    * To create a graph from other data sources, use ``dgl.*`` create ops. See
-        Graph index object.
+      :ref:`api-graph-create-ops`.
-    ntypes : list of str, pair of list of str
-        Node type list. ``ntypes[i]`` stores the name of node type i.
+    Read the user guide chapter :ref:`guide-graph` for an in-depth explanation about its
-        If a pair is given, the graph created is a uni-directional bipartite graph,
+    usage.
-        and its SRC node types and DST node types are given as in the pair.
-    etypes : list of str
-        Edge type list. ``etypes[i]`` stores the name of edge type i.
-    node_frames : list[Frame], optional
-        Node feature storage. If None, empty frame is created.
-        Otherwise, ``node_frames[i]`` stores the node features
-        of node type i. (default: None)
-    edge_frames : list[Frame], optional
-        Edge feature storage. If None, empty frame is created.
-        Otherwise, ``edge_frames[i]`` stores the edge features
-        of edge type i. (default: None)
    """
    is_block = False
@@ -184,6 +43,27 @@ class DGLHeteroGraph(object):
                 node_frames=None,
                 edge_frames=None,
                 **deprecate_kwargs):
+        """Internal constructor for creating a DGLGraph.
+        Parameters
+        ----------
+        gidx : HeteroGraphIndex
+            Graph index object.
+        ntypes : list of str, pair of list of str
+            Node type list. ``ntypes[i]`` stores the name of node type i.
+            If a pair is given, the graph created is a uni-directional bipartite graph,
+            and its SRC node types and DST node types are given as in the pair.
+        etypes : list of str
+            Edge type list. ``etypes[i]`` stores the name of edge type i.
+        node_frames : list[Frame], optional
+            Node feature storage. If None, empty frame is created.
+            Otherwise, ``node_frames[i]`` stores the node features
+            of node type i. (default: None)
+        edge_frames : list[Frame], optional
+            Edge feature storage. If None, empty frame is created.
+            Otherwise, ``edge_frames[i]`` stores the edge features
+            of edge type i. (default: None)
+        """
        if isinstance(gidx, DGLHeteroGraph):
            raise DGLError('The input is already a DGLGraph. No need to create it again.')
        if not isinstance(gidx, heterograph_index.HeteroGraphIndex):
@@ -851,12 +731,17 @@ class DGLHeteroGraph(object):
    @property
    def ntypes(self):
-        """Return the node types of the graph.
+        """Return all the node type names in the graph.
        Returns
        -------
-        list of str
+        list[str]
-            Each ``str`` is a node type.
+            All the node type names in a list.
+        Notes
+        -----
+        DGL internally assigns an integer ID for each node type. The returned
+        node type names are sorted according to their IDs.
        Examples
        --------
@@ -877,19 +762,27 @@ class DGLHeteroGraph(object):
    @property
    def etypes(self):
-        """Return the edge types of the graph.
+        """Return all the edge type names in the graph.
        Returns
        -------
-        list of str
+        list[str]
-            Each ``str`` is an edge type.
+            All the edge type names in a list.
        Notes
        -----
-        An edge type can appear in multiple canonical edge types. For example, ``'interacts'``
+        DGL internally assigns an integer ID for each edge type. The returned
-        can appear in two canonical edge types ``('drug', 'interacts', 'drug')`` and
+        edge type names are sorted according to their IDs.
-        ``('protein', 'interacts', 'protein')``. It is recommended to use
-        :func:`~dgl.DGLGraph.canonical_etypes` in this case.
+        The complete format to specify an relation is a string triplet ``(str, str, str)``
+        for source node type, edge type and destination node type. DGL calls this
+        format *canonical edge type*. An edge type can appear in multiple canonical edge types.
+        For example, ``'interacts'`` can appear in two canonical edge types
+        ``('drug', 'interacts', 'drug')`` and ``('protein', 'interacts', 'protein')``.
+        See Also
+        --------
+        canonical_etypes
        Examples
        --------
@@ -910,16 +803,24 @@ class DGLHeteroGraph(object):
    @property
    def canonical_etypes(self):
-        """Return the canonical edge types of the graph.
+        """Return all the canonical edge types in the graph.
-        A canonical edge type is a 3-tuple of str ``src_type, edge_type, dst_type``, where
+        A canonical edge type is a string triplet ``(str, str, str)``
-        ``src_type``, ``edge_type``, ``dst_type`` are the type of the source nodes, edges
+        for source node type, edge type and destination node type.
-        and destination nodes respectively.
        Returns
        -------
-        list of 3-tuple of str
+        list[(str, str, str)]
-            Each 3-tuple of str is a canonical edge type.
+            All the canonical edge type triplets in a list.
+        Notes
+        -----
+        DGL internally assigns an integer ID for each edge type. The returned
+        edge type names are sorted according to their IDs.
+        See Also
+        --------
+        etypes
        Examples
        --------
@@ -942,15 +843,24 @@ class DGLHeteroGraph(object):
    @property
    def srctypes(self):
-        """Return the source node types.
+        """Return all the source node type names in this graph.
+        If the graph can further divide its node types into two subsets A and B where
+        all the edeges are from nodes of types in A to nodes of types in B, we call
+        this graph a *uni-bipartite* graph and the nodes in A being the *source*
+        nodes and the ones in B being the *destination* nodes. If the graph is not
+        uni-bipartite, the source and destination nodes are just the entire set of
+        nodes in the graph.
        Returns
        -------
-        list of str
+        list[str]
+            All the source node type names in a list.
-            * If the graph is a uni-bipartite graph, it returns the source node types.
+        See Also
-              For a definition of uni-bipartite, see :func:`is_unibipartite`.
+        --------
-            * Otherwise, it returns all node types in the graph.
+        dsttypes
+        is_unibipartite
        Examples
        --------
@@ -984,16 +894,24 @@ class DGLHeteroGraph(object):
    @property
    def dsttypes(self):
-        """Return the destination node types.
+        """Return all the destination node type names in this graph.
+        If the graph can further divide its node types into two subsets A and B where
+        all the edeges are from nodes of types in A to nodes of types in B, we call
+        this graph a *uni-bipartite* graph and the nodes in A being the *source*
+        nodes and the ones in B being the *destination* nodes. If the graph is not
+        uni-bipartite, the source and destination nodes are just the entire set of
+        nodes in the graph.
        Returns
        -------
-        list of str
+        list[str]
-            Each str is a node type.
+            All the destination node type names in a list.
-            * If the graph is a uni-bipartite graph, it returns the destination node types.
+        See Also
-              For a definition of uni-bipartite, see :func:`is_unibipartite`.
+        --------
-            * Otherwise, it returns all node types in the graph.
+        srctypes
+        is_unibipartite
        Examples
        --------
@@ -1065,29 +983,24 @@ class DGLHeteroGraph(object):
    def to_canonical_etype(self, etype):
        """Convert an edge type to the corresponding canonical edge type in the graph.
-        A canonical edge type is a 3-tuple of strings ``src_type, edge_type, dst_type``, where
+        A canonical edge type is a string triplet ``(str, str, str)``
-        ``src_type``, ``edge_type``, ``dst_type`` are separately the type of source
+        for source node type, edge type and destination node type.
-        nodes, edges and destination nodes.
+        The function expects the given edge type name can uniquely identify a canonical edge
+        type. DGL will raise error if this is not the case.
        Parameters
        ----------
-        etype : str or 3-tuple of str
+        etype : str or (str, str, str)
            If :attr:`etype` is an edge type (str), it returns the corresponding canonical edge
-            type in the graph. If :attr:`etype` is already a canonical edge type
+            type in the graph. If :attr:`etype` is already a canonical edge type,
-            (3-tuple of str), it simply returns :attr:`etype`.
+            it directly returns the input unchanged.
        Returns
        -------
-        3-tuple of str
+        (str, str, str)
            The canonical edge type corresponding to the edge type.
-        Notes
-        -----
-        If :attr:`etype` is an edge type, the API expects it to appear only once in the graph. For
-        example, in a graph with canonical edge types ``('A', 'follows', 'B')``,
-        ``('A', 'follows', 'C')`` and ``('B', 'watches', 'D')``, ``'follows'`` is an invalid value
-        for :attr:`etype` while ``'watches'`` is a valid one.
        Examples
        --------
        The following example uses PyTorch backend.
@@ -1131,7 +1044,7 @@ class DGLHeteroGraph(object):
            return ret
    def get_ntype_id(self, ntype):
-        """Return the id of the given node type.
+        """Return the ID of the given node type.
        ntype can also be None. If so, there should be only one node type in the
        graph.
@@ -1165,7 +1078,7 @@ class DGLHeteroGraph(object):
        return ntid
    def get_ntype_id_from_src(self, ntype):
-        """Return the id of the given SRC node type.
+        """Internal function to return the ID of the given SRC node type.
        ntype can also be None. If so, there should be only one node type in the
        SRC category. Callable even when the self graph is not uni-bipartite.
@@ -1190,7 +1103,7 @@ class DGLHeteroGraph(object):
        return ntid
    def get_ntype_id_from_dst(self, ntype):
-        """Return the id of the given DST node type.
+        """Internal function to return the ID of the given DST node type.
        ntype can also be None. If so, there should be only one node type in the
        DST category. Callable even when the self graph is not uni-bipartite.
@@ -2057,16 +1970,16 @@ class DGLHeteroGraph(object):
    #################################################################
    def number_of_nodes(self, ntype=None):
-        """Alias of :func:`num_nodes`"""
+        """Alias of :meth:`num_nodes`"""
        return self.num_nodes(ntype)
    def num_nodes(self, ntype=None):
-        """Return the number of nodes.
+        """Return the number of nodes of in the graph.
        Parameters
        ----------
        ntype : str, optional
-            The node type for query. If given, it returns the number of nodes for a particular
+            The node type name. If given, it returns the number of nodes of the
            type. If not given (default), it returns the total number of nodes of all types.
        Returns
@@ -2104,17 +2017,24 @@ class DGLHeteroGraph(object):
            return self._graph.number_of_nodes(self.get_ntype_id(ntype))
    def number_of_src_nodes(self, ntype=None):
-        """Alias of :func:`num_src_nodes`"""
+        """Alias of :meth:`num_src_nodes`"""
        return self.num_src_nodes(ntype)
    def num_src_nodes(self, ntype=None):
-        """Return the number of nodes of the given source node type.
+        """Return the number of source nodes in the graph.
+        If the graph can further divide its node types into two subsets A and B where
+        all the edeges are from nodes of types in A to nodes of types in B, we call
+        this graph a *uni-bipartite* graph and the nodes in A being the *source*
+        nodes and the ones in B being the *destination* nodes. If the graph is not
+        uni-bipartite, the source and destination nodes are just the entire set of
+        nodes in the graph.
        Parameters
        ----------
        ntype : str, optional
-            The source node type for query. If given, it returns the number of nodes for a
+            The source node type name. If given, it returns the number of nodes for
-            particular source node type. If not given (default), it returns the number of
+            the source node type. If not given (default), it returns the number of
            nodes summed over all source node types.
        Returns
@@ -2122,6 +2042,11 @@ class DGLHeteroGraph(object):
        int
            The number of nodes
+        See Also
+        --------
+        num_dst_nodes
+        is_unibipartite
        Examples
        --------
        The following example uses PyTorch backend.
@@ -2162,20 +2087,32 @@ class DGLHeteroGraph(object):
        return self.num_dst_nodes(ntype)
    def num_dst_nodes(self, ntype=None):
-        """Return the number of nodes of the given destination node type.
+        """Return the number of destination nodes in the graph.
+        If the graph can further divide its node types into two subsets A and B where
+        all the edeges are from nodes of types in A to nodes of types in B, we call
+        this graph a *uni-bipartite* graph and the nodes in A being the *source*
+        nodes and the ones in B being the *destination* nodes. If the graph is not
+        uni-bipartite, the source and destination nodes are just the entire set of
+        nodes in the graph.
        Parameters
        ----------
        ntype : str, optional
-            The destination node type for query. If given, it returns the number of nodes for a
+            The destination node type name. If given, it returns the number of nodes of
-            particular destination node type. If not given (default), it returns the number of
+            the destination node type. If not given (default), it returns the number of
-            nodes summed over all destination node types.
+            nodes summed over all the destination node types.
        Returns
        -------
        int
            The number of nodes
+        See Also
+        --------
+        num_src_nodes
+        is_unibipartite
        Examples
        --------
        The following example uses PyTorch backend.
@@ -2216,16 +2153,19 @@ class DGLHeteroGraph(object):
        return self.num_edges(etype)
    def num_edges(self, etype=None):
-        """Return the number of edges.
+        """Return the number of edges in the graph.
        Parameters
        ----------
-        etype : str or tuple of str, optional
+        etype : str or (str, str, str), optional
-            The edge type for query, which can be an edge type (str) or a canonical edge type
+            The type name of the edges. The allowed type name formats are:
-            (3-tuple of str). When an edge type appears in multiple canonical edge types, one
-            must use a canonical edge type. If given, it returns the number of edges for a
+            * ``(str, str, str)`` for source node type, edge type and destination node type.
-            particular edge type. If not given (default), it returns the total number of edges
+            * or one ``str`` edge type name if the name can uniquely identify a
-            of all types.
+              triplet format in the graph.
+            If not provided, return the total number of edges regardless of the types
+            in the graph.
        Returns
        -------
@@ -2277,22 +2217,21 @@ class DGLHeteroGraph(object):
    @property
    def is_multigraph(self):
-        """Whether the graph is a multigraph
+        """Return whether the graph is a multigraph with parallel edges.
-        In a multigraph, there can be multiple edges from a node ``u`` to a node ``v``.
-        For a heterogeneous graph of multiple canonical edge types, we consider it as a
+        A multigraph has more than one edges between the same pair of nodes, called
-        multigraph if there are multiple edges from a node ``u`` to a node ``v`` for any
+        *parallel edges*.  For heterogeneous graphs, parallel edge further requires
-        canonical edge type.
+        the canonical edge type to be the same (see :meth:`canonical_etypes` for the
+        definition).
        Returns
        -------
        bool
-            Whether the graph is a multigraph.
+            True if the graph is a multigraph.
        Notes
        -----
-        Checking whether the graph is a multigraph can be expensive for a large one.
+        Checking whether the graph is a multigraph could be expensive for a large one.
        Examples
        --------
@@ -2330,14 +2269,14 @@ class DGLHeteroGraph(object):
    @property
    def is_homogeneous(self):
-        """Whether the graph is a homogeneous graph.
+        """Return whether the graph is a homogeneous graph.
        A homogeneous graph only has one node type and one edge type.
        Returns
        -------
        bool
-            Whether the graph is a homogeneous graph.
+            True if the graph is a homogeneous graph.
        Examples
        --------
@@ -2366,7 +2305,7 @@ class DGLHeteroGraph(object):
    @property
    def is_readonly(self):
-        """Deprecated: DGLGraph will always be mutable.
+        """**DEPRECATED**: DGLGraph will always be mutable.
        Returns
        -------
@@ -2424,38 +2363,33 @@ class DGLHeteroGraph(object):
        return self._graph.dtype
    def __contains__(self, vid):
-        """Deprecated: please directly call :func:`has_nodes`.
+        """**DEPRECATED**: please directly call :func:`has_nodes`."""
-        """
        dgl_warning('DGLGraph.__contains__ is deprecated.'
                    ' Please directly call has_nodes.')
        return self.has_nodes(vid)
    def has_nodes(self, vid, ntype=None):
-        """Whether the graph has some particular node(s) of a given type.
+        """Return whether the graph contains the given nodes.
        Parameters
        ----------
        vid : node ID(s)
-            The node ID(s) for query. The allowed formats are:
+            The nodes IDs. The allowed nodes ID formats are:
+            * ``int``: The ID of a single node.
+            * Int Tensor: Each element is a node ID. The tensor must have the same device type
+              and ID data type as the graph's.
+            * iterable[int]: Each element is a node ID.
-            - ``int``: The ID of a single node.
-            - ``Tensor``: A 1D tensor that contains the IDs of multiple nodes, whose data type and
-              device should be the same as the :py:attr:`idtype` and device of the graph.
-            - ``iterable[int]``: A sequence (e.g. list, tuple, numpy.ndarray)
-              of integers that contains the IDs of multiple nodes.
        ntype : str, optional
-            The node type for query. It is required if the graph has
+            The node type name. Can be omitted if there is
-            multiple node types.
+            only one type of nodes in the graph.
        Returns
        -------
        bool or bool Tensor
+            A tensor of bool flags where each element is True if the node is in the graph.
-            - If :attr:`vid` is an ``int``, the result will be a ``bool`` indicating
+            If the input is a single node, return one bool value.
-              whether the graph has the particular node.
-            - If :attr:`vid` is a 1D ``Tensor`` or ``iterable[int]`` of node IDs,
-              the result will be a bool Tensor whose i-th element indicates whether
-              the graph has node :attr:`vid[i]` of the given type.
        Examples
        --------
@@ -2494,50 +2428,47 @@ class DGLHeteroGraph(object):
    def has_node(self, vid, ntype=None):
        """Whether the graph has a particular node of a given type.
-        DEPRECATED: see :func:`~DGLGraph.has_nodes`
+        **DEPRECATED**: see :func:`~DGLGraph.has_nodes`
        """
        dgl_warning("DGLGraph.has_node is deprecated. Please use DGLGraph.has_nodes")
        return self.has_nodes(vid, ntype)
    def has_edges_between(self, u, v, etype=None):
-        """Whether the graph has some particular edge(s) of a given type.
+        """Return whether the graph contains the given edges.
        Parameters
        ----------
-        u : source node ID(s)
+        u : node IDs
-            The source node(s) of the edges for query. The allowed formats are:
+            The source node IDs of the edges. The allowed formats are:
-            - ``int``: The source node of an edge for query.
+            * ``int``: A single node.
-            - ``Tensor``: A 1D tensor that contains the source node(s) of edge(s) for query.
+            * Int Tensor: Each element is a node ID. The tensor must have the same device type
-              The data type and device of the tensor must be the same as the :py:attr:`idtype` and
+              and ID data type as the graph's.
-              device of the graph. Its i-th element represents the source node ID of the
+            * iterable[int]: Each element is a node ID.
-              i-th edge for query.
-            - ``iterable[int]`` : Similar to the tensor, but stores node IDs in a sequence
+        v : node IDs
-              (e.g. list, tuple, numpy.ndarray).
+            The destination node IDs of the edges. The allowed formats are:
-        v : destination node ID(s)
-            The destination node(s) of the edges for query. It's a counterpart of :attr:`u`
+            * ``int``: A single node.
-            for destination nodes and should have the same format as :attr:`u`. If :attr:`u`
+            * Int Tensor: Each element is a node ID. The tensor must have the same device type
-            and :attr:`v` are not int, they should have the same length.
+              and ID data type as the graph's.
-        etype : str or tuple of str, optional
+            * iterable[int]: Each element is a node ID.
-            The edge type for query, which can be an edge type (str) or a canonical edge type
-            (3-tuple of str). When an edge type appears in multiple canonical edge types, one
+        etype : str or (str, str, str), optional
-            must use a canonical edge type. If the graph has multiple edge types, one must
+            The type names of the edges. The allowed type name formats are:
-            specify the argument. Otherwise, it can be omitted.
+            * ``(str, str, str)`` for source node type, edge type and destination node type.
+            * or one ``str`` edge type name if the name can uniquely identify a
+              triplet format in the graph.
+            Can be omitted if the graph has only one type of edges.
        Returns
        -------
        bool or bool Tensor
+            A tensor of bool flags where each element is True if the node is in the graph.
-            - If :attr:`u` and :attr:`v` are ``int`` objects, the result will be a ``bool``
+            If the input is a single node, return one bool value.
-              indicating whether there is an edge from ``u`` to ``v`` of the given edge type.
-            - If :attr:`u` and :attr:`v` are ``Tensor`` or ``iterable[int]`` objects, the
-              result will be a bool Tensor whose i-th element indicates whether there is an
-              edge from ``u[i]`` to ``v[i]`` of the given edge type.
-        Notes
-        -----
-        The value(s) of :attr:`u` and :attr:`v` need to be separately smaller than the
-        number of nodes of the source and destination type.
        Examples
        --------
@@ -2595,7 +2526,7 @@ class DGLHeteroGraph(object):
    def has_edge_between(self, u, v, etype=None):
        """Whether the graph has edges of type ``etype``.
-        DEPRECATED: please use :func:`~DGLGraph.has_edge_between`.
+        **DEPRECATED**: please use :func:`~DGLGraph.has_edge_between`.
        """
        dgl_warning("DGLGraph.has_edge_between is deprecated. "
                    "Please use DGLGraph.has_edges_between")
@@ -2610,12 +2541,16 @@ class DGLHeteroGraph(object):
        Parameters
        ----------
        v : int
-            The destination node for query.
+            The node ID.
-        etype : str or tuple of str, optional
+        etype : str or (str, str, str), optional
-            The edge type for query, which can be an edge type (str) or a canonical edge type
+            The type names of the edges. The allowed type name formats are:
-            (3-tuple of str). When an edge type appears in multiple canonical edge types, one
-            must use a canonical edge type. If the graph has multiple edge types, one must
+            * ``(str, str, str)`` for source node type, edge type and destination node type.
-            specify the argument. Otherwise, it can be omitted.
+            * or one ``str`` edge type name if the name can uniquely identify a
+              triplet format in the graph.
+            Can be omitted if the graph has only one type of edges.
        Returns
        -------
@@ -2664,12 +2599,15 @@ class DGLHeteroGraph(object):
        Parameters
        ----------
        v : int
-            The source node for query.
+            The node ID.
-        etype : str or tuple of str, optional
+        etype : str or (str, str, str), optional
-            The edge type for query, which can be an edge type (str) or a canonical edge type
+            The type names of the edges. The allowed type name formats are:
-            (3-tuple of str). When an edge type appears in multiple canonical edge types, one
-            must use a canonical edge type. If the graph has multiple edge types, one must
+            * ``(str, str, str)`` for source node type, edge type and destination node type.
-            specify the argument. Otherwise, it can be omitted.
+            * or one ``str`` edge type name if the name can uniquely identify a
+              triplet format in the graph.
+            Can be omitted if the graph has only one type of edges.
        Returns
        -------
@@ -2720,48 +2658,51 @@ class DGLHeteroGraph(object):
                             return_uv=return_uv, etype=etype)
    def edge_ids(self, u, v, force_multi=None, return_uv=False, etype=None):
-        """Return the ID(s) of edge(s) from the given source node(s) to the given destination
+        """Return the edge ID(s) given the two endpoints of the edge(s).
-        node(s) with the specified edge type.
        Parameters
        ----------
-        u : source node ID(s)
+        u : node IDs
-            The source node(s) of the edges for query. The allowed formats are:
+            The source node IDs of the edges. The allowed formats are:
-            - ``int``: The source node of an edge for query.
+            * ``int``: A single node.
-            - ``Tensor``: A 1D tensor that contains the source node(s) of edge(s) for query, whose
+            * Int Tensor: Each element is a node ID. The tensor must have the same device type
-              data type an device should be the same as the :py:attr:`idtype` and device of
+              and ID data type as the graph's.
-              the graph. Its i-th element is the source node of the i-th edge for query.
+            * iterable[int]: Each element is a node ID.
-            - ``iterable[int]``: Similar to the tensor, but stores node IDs in a sequence
-              (e.g. list, tuple, numpy.ndarray).
+        v : node IDs
-        v : destination node ID(s)
+            The destination node IDs of the edges. The allowed formats are:
-            The destination node(s) of the edges for query. It's a counterpart of :attr:`u`
-            for destination nodes and should have the same format as :attr:`u`. If :attr:`u`
+            * ``int``: A single node.
-            and :attr:`v` are not int, they should have the same length.
+            * Int Tensor: Each element is a node ID. The tensor must have the same device type
+              and ID data type as the graph's.
+            * iterable[int]: Each element is a node ID.
        force_multi : bool, optional
-            Deprecated, use :attr:`return_uv` instead. Whether to allow the graph to be a
+            **DEPRECATED**, use :attr:`return_uv` instead. Whether to allow the graph to be a
            multigraph, i.e. there can be multiple edges from one node to another.
        return_uv : bool, optional
            Whether to return the source and destination node IDs along with the edges. If
            False (default), it assumes that the graph is a simple graph and there is only
            one edge from one node to another. If True, there can be multiple edges found
            from one node to another.
-        etype : str or tuple of str, optional
+        etype : str or (str, str, str), optional
-            The edge type for query, which can be an edge type (str) or a canonical edge type
+            The type names of the edges. The allowed type name formats are:
-            (3-tuple of str). When an edge type appears in multiple canonical edge types, one
-            must use a canonical edge type. If the graph has multiple edge types, one must
+            * ``(str, str, str)`` for source node type, edge type and destination node type.
-            specify the argument. Otherwise, it can be omitted.
+            * or one ``str`` edge type name if the name can uniquely identify a
+              triplet format in the graph.
+            Can be omitted if the graph has only one type of edges.
        Returns
        -------
-        tensor, or (tensor, tensor, tensor)
+        Tensor, or (Tensor, Tensor, Tensor)
-            * If ``return_uv=False``, it returns a 1D tensor that contains the IDs of the edges.
+            * If ``return_uv=False``, it returns the edge IDs in a tensor, where the i-th
-              If :attr:`u` and :attr:`v` are int, the tensor has length 1. Otherwise, the i-th
+              element is the ID of the edge ``(u[i], v[i])``.
-              element of the tensor is the ID of the edge ``(u[i], v[i])``.
            * If ``return_uv=True``, it returns a tuple of three 1D tensors ``(eu, ev, e)``.
              ``e[i]`` is the ID of an edge from ``eu[i]`` to ``ev[i]``. It returns all edges
-              from ``eu[i]`` to ``ev[i]`` in this case.
+              (including parallel edges) from ``eu[i]`` to ``ev[i]`` in this case.
        Notes
        -----
@@ -2840,34 +2781,35 @@ class DGLHeteroGraph(object):
            return F.as_scalar(eid) if is_int else eid
    def find_edges(self, eid, etype=None):
-        """Return the source and destination node(s) of some particular edge(s)
+        """Return the source and destination node ID(s) given the edge ID(s).
-        with the specified edge type.
        Parameters
        ----------
        eid : edge ID(s)
-            The IDs of the edges for query. The function expects that :attr:`eid` contains
+            The edge IDs. The allowed formats are:
-            valid edge IDs only, i.e. among consecutive integers :math:`0, 1, ... E - 1`, where
-            :math:`E` is the number of edges with the specified edge type.
+            * ``int``: A single ID.
+            * Int Tensor: Each element is an ID. The tensor must have the same device type
-            - ``int``: An edge ID for query.
+              and ID data type as the graph's.
-            - ``Tensor``: A 1D tensor that contains the edge IDs for query, whose data
+            * iterable[int]: Each element is an ID.
-              type and device should be the same as the :py:attr:`idtype` and device of the graph.
-            - ``iterable[int]``: Similar to the tensor, but stores edge IDs in a sequence
+        etype : str or (str, str, str), optional
-              (e.g. list, tuple, numpy.ndarray).
+            The type names of the edges. The allowed type name formats are:
-        etype : str or tuple of str, optional
-            The edge type for query, which can be an edge type (str) or a canonical edge type
+            * ``(str, str, str)`` for source node type, edge type and destination node type.
-            (3-tuple of str). When an edge type appears in multiple canonical edge types, one
+            * or one ``str`` edge type name if the name can uniquely identify a
-            must use a canonical edge type.
+              triplet format in the graph.
+            Can be omitted if the graph has only one type of edges.
        Returns
        -------
        Tensor
-            The source node IDs of the edges, whose i-th element is the source node of the edge
+            The source node IDs of the edges. The i-th element is the source node ID of
-            with ID ``eid[i]``.
+            the i-th edge.
        Tensor
-            The destination node IDs of the edges, whose i-th element is the destination node of
+            The destination node IDs of the edges. The i-th element is the destination node
-            the edge with ID ``eid[i]``.
+            ID of the i-th edge.
        Examples
        --------
@@ -2910,20 +2852,19 @@ class DGLHeteroGraph(object):
        return src, dst
    def in_edges(self, v, form='uv', etype=None):
-        """Return the incoming edges of some particular node(s) with the specified edge type.
+        """Return the incoming edges of the given nodes.
        Parameters
        ----------
-        v : destination node ID(s)
+        v : node ID(s)
-            The destination node(s) for query. The allowed formats are:
+            The node IDs. The allowed formats are:
-            - ``int``: The destination node for query.
+            * ``int``: A single node.
-            - ``Tensor``: A 1D tensor that contains the destination node(s) for query, whose data
+            * Int Tensor: Each element is a node ID. The tensor must have the same device type
-              type and device should be the same as the :py:attr:`idtype` and device of the graph.
+              and ID data type as the graph's.
-            - ``iterable[int]``: Similar to the tensor, but stores node IDs in a sequence
+            * iterable[int]: Each element is a node ID.
-              (e.g. list, tuple, numpy.ndarray).
        form : str, optional
-            The return form, which can be one of the following:
+            The result format, which can be one of the following:
            - ``'eid'``: The returned result is a 1D tensor :math:`EID`, representing
              the IDs of all edges.
@@ -2933,11 +2874,14 @@ class DGLHeteroGraph(object):
            - ``'all'``: The returned result is a 3-tuple of 1D tensors :math:`(U, V, EID)`,
              representing the source nodes, destination nodes and IDs of all edges.
              For each :math:`i`, :math:`(U[i], V[i])` forms an edge with ID :math:`EID[i]`.
-        etype : str or tuple of str, optional
+        etype : str or (str, str, str), optional
-            The edge type for query, which can be an edge type (str) or a canonical edge type
+            The type names of the edges. The allowed type name formats are:
-            (3-tuple of str). When an edge type appears in multiple canonical edge types, one
-            must use a canonical edge type. If the graph has multiple edge types, one must
+            * ``(str, str, str)`` for source node type, edge type and destination node type.
-            specify the argument. Otherwise, it can be omitted.
+            * or one ``str`` edge type name if the name can uniquely identify a
+              triplet format in the graph.
+            Can be omitted if the graph has only one type of edges.
        Returns
        -------
@@ -2992,18 +2936,17 @@ class DGLHeteroGraph(object):
            raise DGLError('Invalid form: {}. Must be "all", "uv" or "eid".'.format(form))
    def out_edges(self, u, form='uv', etype=None):
-        """Return the outgoing edges of some particular node(s) with the specified edge type.
+        """Return the outgoing edges of the given nodes.
        Parameters
        ----------
-        u : source node ID(s)
+        u : node ID(s)
-            The source node(s) for query. The allowed formats are:
+            The node IDs. The allowed formats are:
-            - ``int``: The source node for query.
+            * ``int``: A single node.
-            - ``Tensor``: A 1D tensor that contains the source node(s) for query, whose data
+            * Int Tensor: Each element is a node ID. The tensor must have the same device type
-              type and device should be the same as the :py:attr:`idtype` and device of the graph.
+              and ID data type as the graph's.
-            - ``iterable[int]``: Similar to the tensor, but stores node IDs in a sequence
+            * iterable[int]: Each element is a node ID.
-              (e.g. list, tuple, numpy.ndarray).
        form : str, optional
            The return form, which can be one of the following:
@@ -3015,11 +2958,14 @@ class DGLHeteroGraph(object):
            - ``'all'``: The returned result is a 3-tuple of 1D tensors :math:`(U, V, EID)`,
              representing the source nodes, destination nodes and IDs of all edges.
              For each :math:`i`, :math:`(U[i], V[i])` forms an edge with ID :math:`EID[i]`.
-        etype : str or tuple of str, optional
+        etype : str or (str, str, str), optional
-            The edge type for query, which can be an edge type (str) or a canonical edge type
+            The type names of the edges. The allowed type name formats are:
-            (3-tuple of str). When an edge type appears in multiple canonical edge types, one
-            must use a canonical edge type. If the graph has multiple edge types, one must
+            * ``(str, str, str)`` for source node type, edge type and destination node type.
-            specify the argument. Otherwise, it can be omitted.
+            * or one ``str`` edge type name if the name can uniquely identify a
+              triplet format in the graph.
+            Can be omitted if the graph has only one type of edges.
        Returns
        -------
@@ -3159,43 +3105,41 @@ class DGLHeteroGraph(object):
    def in_degree(self, v, etype=None):
        """Return the in-degree of node ``v`` with edges of type ``etype``.
-        DEPRECATED: Please use in_degrees
+        **DEPRECATED**: Please use in_degrees
        """
        dgl_warning("DGLGraph.in_degree is deprecated. Please use DGLGraph.in_degrees")
        return self.in_degrees(v, etype)
    def in_degrees(self, v=ALL, etype=None):
-        """Return the in-degree(s) of some particular node(s) with the specified edge type.
+        """Return the in-degree(s) of the given nodes.
+        It computes the in-degree(s) w.r.t. to the edges of the given edge type.
        Parameters
        ----------
-        v : destination node ID(s), optional
+        v : node IDs
-            The destination node(s) for query. The allowed formats are:
+            The node IDs. The allowed formats are:
-            - ``int``: The destination node for query.
+            * ``int``: A single node.
-            - ``Tensor``: A 1D tensor that contains the destination node(s) for query, whose data
+            * Int Tensor: Each element is a node ID. The tensor must have the same device type
-              type and device should be the same as the :py:attr:`idtype` and device of the graph.
+              and ID data type as the graph's.
-            - ``iterable[int]``: Similar to the tensor, but stores node IDs in a sequence
+            * iterable[int]: Each element is a node ID.
-              (e.g. list, tuple, numpy.ndarray).
-            By default, it considers all nodes.
+            If not given, return the in-degrees of all the nodes.
-        etype : str or tuple of str, optional
+        etype : str or (str, str, str), optional
-            The edge type for query, which can be an edge type (str) or a canonical edge type
+            The type name of the edges. The allowed type name formats are:
-            (3-tuple of str). When an edge type appears in multiple canonical edge types, one
-            must use a canonical edge type. If the graph has multiple edge types, one must
+            * ``(str, str, str)`` for source node type, edge type and destination node type.
-            specify the argument. Otherwise, it can be omitted.
+            * or one ``str`` edge type name if the name can uniquely identify a
+              triplet format in the graph.
+            Can be omitted if the graph has only one type of edges.
        Returns
        -------
-        tensor or int
+        int or Tensor
-            The in-degree(s) of the node(s).
+            The in-degree(s) of the node(s) in a Tensor. The i-th element is the in-degree
+            of the i-th input node. If :attr:`v` is an ``int``, return an ``int`` too.
-            - If :attr:`v` is an ``int`` object, the return result will be an ``int``
-              object as well.
-            - If :attr:`v` is a ``Tensor`` or ``iterable[int]`` object, the return result
-              will be a 1D ``Tensor``. The data type of the result will be the same as the
-              idtype of the graph. The i-th element of the tensor is the in-degree of the
-              node ``v[i]``.
        Examples
        --------
@@ -3251,36 +3195,35 @@ class DGLHeteroGraph(object):
        return self.out_degrees(u, etype)
    def out_degrees(self, u=ALL, etype=None):
-        """Return the out-degree(s) of some particular node(s) with the specified edge type.
+        """Return the out-degree(s) of the given nodes.
+        It computes the out-degree(s) w.r.t. to the edges of the given edge type.
        Parameters
        ----------
-        u : source node ID(s), optional
+        u : node IDs
+            The node IDs. The allowed formats are:
-            - ``int``: The source node for query.
+            * ``int``: A single node.
-            - ``Tensor``: A 1D tensor that contains the source node(s) for query, whose data
+            * Int Tensor: Each element is a node ID. The tensor must have the same device type
-              type and device should be the same as the :py:attr:`idtype` and device of the graph.
+              and ID data type as the graph's.
-            - ``iterable[int]``: Similar to the tensor, but stores node IDs in a sequence
+            * iterable[int]: Each element is a node ID.
-              (e.g. list, tuple, numpy.ndarray).
-            By default, it considers all nodes.
+            If not given, return the in-degrees of all the nodes.
-        etype : str or tuple of str, optional
+        etype : str or (str, str, str), optional
-            The edge type for query, which can be an edge type (str) or a canonical edge type
+            The type names of the edges. The allowed type name formats are:
-            (3-tuple of str). When an edge type appears in multiple canonical edge types, one
-            must use a canonical edge type. If the graph has multiple edge types, one must
+            * ``(str, str, str)`` for source node type, edge type and destination node type.
-            specify the argument. Otherwise, it can be omitted.
+            * or one ``str`` edge type name if the name can uniquely identify a
+              triplet format in the graph.
+            Can be omitted if the graph has only one type of edges.
        Returns
        -------
-        tensor or int
+        int or Tensor
-            The out-degree(s) of the node(s).
+            The out-degree(s) of the node(s) in a Tensor. The i-th element is the out-degree
+            of the i-th input node. If :attr:`v` is an ``int``, return an ``int`` too.
-            - If :attr:`u` is an ``int`` object, the return result will be an ``int``
-              object as well.
-            - If :attr:`u` is a ``Tensor`` or ``iterable[int]`` object, the return result
-              will be a 1D ``Tensor``. The data type of the result will be the same as the
-              idtype of the graph. The i-th element of the tensor is the out-degree of the
-              node ``v[i]``.
        Examples
        --------
@@ -3330,7 +3273,7 @@ class DGLHeteroGraph(object):
            return deg
    def adjacency_matrix(self, transpose=True, ctx=F.cpu(), scipy_fmt=None, etype=None):
-        """Alias of :func:`adj`"""
+        """Alias of :meth:`adj`"""
        return self.adj(transpose, ctx, scipy_fmt, etype)
    def adj(self, transpose=True, ctx=F.cpu(), scipy_fmt=None, etype=None):
@@ -3351,12 +3294,15 @@ class DGLHeteroGraph(object):
        scipy_fmt : str, optional
            If specified, return a scipy sparse matrix in the given format.
            Otherwise, return a backend dependent sparse tensor. (Default: None)
-        etype : str or tuple of str, optional
+        etype : str or (str, str, str), optional
-            The edge type for query, which can be an edge type (str) or a canonical edge type
+            The type names of the edges. The allowed type name formats are:
-            (3-tuple of str). When an edge type appears in multiple canonical edge types, one
-            must use a canonical edge type. If given, it returns the number of edges for a
+            * ``(str, str, str)`` for source node type, edge type and destination node type.
-            particular edge type. If not given (default), it returns the total number of edges
+            * or one ``str`` edge type name if the name can uniquely identify a
-            of all types.
+              triplet format in the graph.
+            Can be omitted if the graph has only one type of edges.
        Returns
        -------
@@ -3398,6 +3344,7 @@ class DGLHeteroGraph(object):
        else:
            return self._graph.adjacency_matrix_scipy(etid, transpose, scipy_fmt, False)
    def adjacency_matrix_scipy(self, transpose=True, fmt='csr', return_edge_ids=None):
        """DEPRECATED: please use ``dgl.adjacency_matrix(transpose, scipy_fmt=fmt)``.
        """
@@ -3407,10 +3354,6 @@ class DGLHeteroGraph(object):
        return self.adjacency_matrix(transpose=transpose, scipy_fmt=fmt)
-    def incidence_matrix(self, typestr, ctx=F.cpu(), etype=None):
-        """Alias of :func:`inc`"""
-        return self.inc(typestr, ctx, etype)
    def inc(self, typestr, ctx=F.cpu(), etype=None):
        """Return the incidence matrix representation of edges with the given
        edge type.
@@ -3446,12 +3389,14 @@ class DGLHeteroGraph(object):
            Can be either ``in``, ``out`` or ``both``
        ctx : context, optional
            The context of returned incidence matrix. (Default: cpu)
-        etype : str or tuple of str, optional
+        etype : str or (str, str, str), optional
-            The edge type for query, which can be an edge type (str) or a canonical edge type
+            The type names of the edges. The allowed type name formats are:
-            (3-tuple of str). When an edge type appears in multiple canonical edge types, one
-            must use a canonical edge type. If given, it returns the number of edges for a
+            * ``(str, str, str)`` for source node type, edge type and destination node type.
-            particular edge type. If not given (default), it returns the total number of edges
+            * or one ``str`` edge type name if the name can uniquely identify a
-            of all types.
+              triplet format in the graph.
+            Can be omitted if the graph has only one type of edges.
        Returns
        -------
@@ -3485,6 +3430,8 @@ class DGLHeteroGraph(object):
        etid = self.get_etype_id(etype)
        return self._graph.incidence_matrix(etid, typestr, ctx)[0]
+    incidence_matrix = inc
    #################################################################
    # Features
    #################################################################
@@ -3497,8 +3444,8 @@ class DGLHeteroGraph(object):
        Parameters
        ----------
        ntype : str, optional
-            The node type for query. If the graph has multiple node types, one must
+            The node type name. Can be omitted if there is only one type of nodes
-            specify the argument. Otherwise, it can be omitted.
+            in the graph.
        Returns
        -------
@@ -3544,11 +3491,15 @@ class DGLHeteroGraph(object):
        Parameters
        ----------
-        etype : str or tuple of str, optional
+        etype : str or (str, str, str), optional
-            The edge type for query, which can be an edge type (str) or a canonical edge type
+            The type names of the edges. The allowed type name formats are:
-            (3-tuple of str). When an edge type appears in multiple canonical edge types, one
-            must use a canonical edge type. If the graph has multiple edge types, one must
+            * ``(str, str, str)`` for source node type, edge type and destination node type.
-            specify the argument. Otherwise, it can be omitted.
+            * or one ``str`` edge type name if the name can uniquely identify a
+              triplet format in the graph.
+            Can be omitted if the graph has only one type of edges.
        Returns
        -------
@@ -3614,9 +3565,7 @@ class DGLHeteroGraph(object):
            The name of the feature that the initializer applies. If not given, the
            initializer applies to all features.
        ntype : str, optional
-            The type of the nodes that the initializer applies. If the graph has
+            The type name of the nodes. Can be omitted if the graph has only one type of nodes.
-            multiple node types, one must specify the argument. Otherwise, it can
-            be omitted.
        Notes
        -----
@@ -3703,11 +3652,15 @@ class DGLHeteroGraph(object):
        field : str, optional
            The name of the feature that the initializer applies. If not given, the
            initializer applies to all features.
-        etype : str or tuple of str, optional
+        etype : str or (str, str, str), optional
-            The edge type for query, which can be an edge type (str) or a canonical edge type
+            The type names of the edges. The allowed type name formats are:
-            (3-tuple of str). When an edge type appears in multiple canonical edge types, one
-            must use a canonical edge type. If the graph has multiple edge types, one must
+            * ``(str, str, str)`` for source node type, edge type and destination node type.
-            specify the argument. Otherwise, it can be omitted.
+            * or one ``str`` edge type name if the name can uniquely identify a
+              triplet format in the graph.
+            Can be omitted if the graph has only one type of edges.
        Notes
        -----
@@ -3945,27 +3898,50 @@ class DGLHeteroGraph(object):
    #################################################################
    def apply_nodes(self, func, v=ALL, ntype=None, inplace=False):
-        """Apply the function on the nodes with the same type to update their
+        """Update the features of the specified nodes by the provided function.
-        features.
-        If None is provided for ``func``, nothing will happen.
        Parameters
        ----------
-        func : callable or None
+        func : callable
-            Apply function on the nodes. The function should be
+            The function to update node features. It must be
-            a :mod:`Node UDF <dgl.udf>`.
+            a :ref:`apiudf`.
-        v : int or iterable of int or tensor, optional
+        v : node IDs
-            The (type-specific) node (ids) on which to apply ``func``. (Default: ALL)
+            The node IDs. The allowed formats are:
+            * ``int``: A single node.
+            * Int Tensor: Each element is a node ID. The tensor must have the same device type
+              and ID data type as the graph's.
+            * iterable[int]: Each element is a node ID.
+            If not given (default), use all the nodes in the graph.
        ntype : str, optional
-            The node type. Can be omitted if there is only one node type
+            The node type name. Can be omitted if there is
-            in the graph. (Default: None)
+            only one type of nodes in the graph.
        inplace : bool, optional
-            **DEPRECATED**. If True, update will be done in place, but autograd will break.
+            **DEPRECATED**.
-            (Default: False)
        Examples
        --------
+        The following example uses PyTorch backend.
+        >>> import dgl
+        >>> import torch
+        **Homogeneous graph**
+        >>> g = dgl.graph(([0, 1, 2, 3], [1, 2, 3, 4]))
+        >>> g.ndata['h'] = torch.ones(5, 2)
+        >>> g.apply_nodes(lambda nodes: {'x' : nodes.data['h'] * 2})
+        >>> g.ndata['x']
+        tensor([[2., 2.],
+                [2., 2.],
+                [2., 2.],
+                [2., 2.],
+                [2., 2.]])
+        **Heterogeneous graph**
        >>> g = dgl.heterograph({('user', 'follows', 'user'): ([0, 1], [1, 2])})
        >>> g.nodes['user'].data['h'] = torch.ones(3, 5)
        >>> g.apply_nodes(lambda nodes: {'h': nodes.data['h'] * 2}, ntype='user')
@@ -3990,27 +3966,76 @@ class DGLHeteroGraph(object):
        self._set_n_repr(ntid, v, ndata)
    def apply_edges(self, func, edges=ALL, etype=None, inplace=False):
-        """Apply the function on the edges with the same type to update their
+        """Update the features of the specified edges by the provided function.
-        features.
-        If None is provided for ``func``, nothing will happen.
        Parameters
        ----------
-        func : callable
+        func : dgl.function.BuiltinFunction or callable
-            Apply function on the edge. The function should be
+            The function to generate new edge features. It must be either
-            an :mod:`Edge UDF <dgl.udf>`.
+            a :ref:`api-built-in` or a :ref:`apiudf`.
-        edges : optional
+        edges : edges
-            Edges on which to apply ``func``. See :func:`send` for valid
+            The edges to update features on. The allowed input formats are:
-            edge specification. (Default: ALL)
-        etype : str or tuple of str, optional
+            * ``int``: A single edge ID.
-            The edge type. Can be omitted if there is only one edge type
+            * Int Tensor: Each element is an edge ID.  The tensor must have the same device type
-            in the graph. (Default: None)
+              and ID data type as the graph's.
+            * iterable[int]: Each element is an edge ID.
+            * (Tensor, Tensor): The node-tensors format where the i-th elements
+              of the two tensors specify an edge.
+            * (iterable[int], iterable[int]): Similar to the node-tensors format but
+              stores edge endpoints in python iterables.
+            Default value specifies all the edges in the graph.
+        etype : str or (str, str, str), optional
+            The type name of the edges. The allowed type name formats are:
+            * ``(str, str, str)`` for source node type, edge type and destination node type.
+            * or one ``str`` edge type name if the name can uniquely identify a
+              triplet format in the graph.
+            Can be omitted if the graph has only one type of edges.
        inplace: bool, optional
-            **DEPRECATED**. Must be False.
+            **DEPRECATED**.
+        Notes
+        -----
+        DGL recommends using DGL's bulit-in function for the :attr:`func` argument,
+        because DGL will invoke efficient kernels that avoids copying node features to
+        edge features in this case.
        Examples
        --------
+        The following example uses PyTorch backend.
+        >>> import dgl
+        >>> import torch
+        **Homogeneous graph**
+        >>> g = dgl.graph(([0, 1, 2, 3], [1, 2, 3, 4]))
+        >>> g.ndata['h'] = torch.ones(5, 2)
+        >>> g.apply_edges(lambda edges: {'x' : edges.src['h'] + edges.dst['h']})
+        >>> g.edata['x']
+        tensor([[2., 2.],
+                [2., 2.],
+                [2., 2.],
+                [2., 2.]])
+        Use built-in function
+        >>> import dgl.function as fn
+        >>> g.apply_edges(fn.u_add_v('h', 'h', 'x'))
+        >>> g.edata['x']
+        tensor([[2., 2.],
+                [2., 2.],
+                [2., 2.],
+                [2., 2.]])
+        **Heterogeneous graph**
        >>> g = dgl.heterograph({('user', 'plays', 'game'): ([0, 1, 1, 2], [0, 0, 2, 1])})
        >>> g.edges[('user', 'plays', 'game')].data['h'] = torch.ones(4, 5)
        >>> g.apply_edges(lambda edges: {'h': edges.data['h'] * 2})
@@ -4048,40 +4073,50 @@ class DGLHeteroGraph(object):
                      apply_node_func=None,
                      etype=None,
                      inplace=False):
-        """Send messages along edges of the specified type, and let destinations
+        """Send messages along the specified edges and reduce them on
-        receive them.
+        the destination nodes to update their features.
-        Optionally, apply a function to update the node features after "receive".
-        This is a convenient combination for performing
-        :mod:`send <dgl.DGLHeteroGraph.send>` along the ``edges`` and
-        :mod:`recv <dgl.DGLHeteroGraph.recv>` for the destinations of the ``edges``.
-        **Only works if the graph has one edge type.**  For multiple types, use
-        .. code::
-           g['edgetype'].send_and_recv(edges, message_func, reduce_func,
-                                       apply_node_func, inplace=inplace)
        Parameters
        ----------
-        edges : See :func:`send` for valid edge specification.
+        edges : edges
-            Edges on which to apply ``func``.
+            The edges to send and receive messages on. The allowed input formats are:
-        message_func : callable
-            Message function on the edges. The function should be
+            * ``int``: A single edge ID.
-            an :mod:`Edge UDF <dgl.udf>`.
+            * Int Tensor: Each element is an edge ID.  The tensor must have the same device type
-        reduce_func : callable
+              and ID data type as the graph's.
-            Reduce function on the node. The function should be
+            * iterable[int]: Each element is an edge ID.
-            a :mod:`Node UDF <dgl.udf>`.
+            * (Tensor, Tensor): The node-tensors format where the i-th elements
+              of the two tensors specify an edge.
+            * (iterable[int], iterable[int]): Similar to the node-tensors format but
+              stores edge endpoints in python iterables.
+        message_func : dgl.function.BuiltinFunction or callable
+            The message function to generate messages along the edges.
+            It must be either a :ref:`api-built-in` or a :ref:`apiudf`.
+        reduce_func : dgl.function.BuiltinFunction or callable
+            The reduce function to aggregate the messages.
+            It must be either a :ref:`api-built-in` or a :ref:`apiudf`.
        apply_node_func : callable, optional
-            Apply function on the nodes. The function should be
+            An optional apply function to further update the node features
-            a :mod:`Node UDF <dgl.udf>`. (Default: None)
+            after the message reduction. It must be a :ref:`apiudf`.
-        etype : str or tuple of str, optional
+        etype : str or (str, str, str), optional
-            The edge type. Can be omitted if there is only one edge type
+            The type name of the edges. The allowed type name formats are:
-            in the graph. (Default: None)
+            * ``(str, str, str)`` for source node type, edge type and destination node type.
+            * or one ``str`` edge type name if the name can uniquely identify a
+              triplet format in the graph.
+            Can be omitted if the graph has only one type of edges.
        inplace: bool, optional
-            **DEPRECATED**. Must be False.
+            **DEPRECATED**.
+        Notes
+        -----
+        DGL recommends using DGL's bulit-in function for the :attr:`message_func`
+        and the :attr:`reduce_func` arguments,
+        because DGL will invoke efficient kernels that avoids copying node features to
+        edge features in this case.
        Examples
        --------
@@ -4090,6 +4125,29 @@ class DGLHeteroGraph(object):
        >>> import dgl.function as fn
        >>> import torch
+        **Homogeneous graph**
+        >>> g = dgl.graph(([0, 1, 2, 3], [1, 2, 3, 4]))
+        >>> g.ndata['x'] = torch.ones(5, 2)
+        >>> # Specify edges using (Tensor, Tensor).
+        >>> g.send_and_recv(([1, 2], [2, 3]), fn.copy_u('x', 'm'), fn.sum('m', 'h'))
+        >>> g.ndata['h']
+        tensor([[0., 0.],
+                [0., 0.],
+                [1., 1.],
+                [1., 1.],
+                [0., 0.]])
+        >>> # Specify edges using IDs.
+        >>> g.send_and_recv([0, 2, 3], fn.copy_u('x', 'm'), fn.sum('m', 'h'))
+        >>> g.ndata['h']
+        tensor([[0., 0.],
+                [1., 1.],
+                [0., 0.],
+                [1., 1.],
+                [1., 1.]])
+        **Heterogeneous graph**
        >>> g = dgl.heterograph({
        ...     ('user', 'follows', 'user'): ([0, 1], [1, 2]),
        ...     ('user', 'plays', 'game'): ([0, 1, 1, 2], [0, 0, 1, 1])
@@ -4128,45 +4186,50 @@ class DGLHeteroGraph(object):
             apply_node_func=None,
             etype=None,
             inplace=False):
-        """Pull messages from the node(s)' predecessors and then update their features.
+        """Pull messages from the specified node(s)' predecessors along the
+        specified edge type, aggregate them to update the node features.
-        Optionally, apply a function to update the node features after receive.
-        This is equivalent to :mod:`send_and_recv <dgl.DGLHeteroGraph.send_and_recv>`
-        on the incoming edges of ``v`` with the specified type.
-        Other notes:
-        * `reduce_func` will be skipped for nodes with no incoming messages.
-        * If all ``v`` have no incoming message, this will downgrade to an :func:`apply_nodes`.
-        * If some ``v`` have no incoming message, their new feature value will be calculated
-          by the column initializer (see :func:`set_n_initializer`). The feature shapes and
-          dtypes will be inferred.
-        **Only works if the graph has one edge type.** For multiple types, use
-        .. code::
-           g['edgetype'].pull(v, message_func, reduce_func, apply_node_func, inplace=inplace)
        Parameters
        ----------
-        v : int, container or tensor, optional
+        v : node IDs
-            The node(s) to be updated.
+            The node IDs. The allowed formats are:
-        message_func : callable
-            Message function on the edges. The function should be
+            * ``int``: A single node.
-            an :mod:`Edge UDF <dgl.udf>`.
+            * Int Tensor: Each element is a node ID. The tensor must have the same device type
-        reduce_func : callable
+              and ID data type as the graph's.
-            Reduce function on the node. The function should be
+            * iterable[int]: Each element is a node ID.
-            a :mod:`Node UDF <dgl.udf>`.
+        message_func : dgl.function.BuiltinFunction or callable
+            The message function to generate messages along the edges.
+            It must be either a :ref:`api-built-in` or a :ref:`apiudf`.
+        reduce_func : dgl.function.BuiltinFunction or callable
+            The reduce function to aggregate the messages.
+            It must be either a :ref:`api-built-in` or a :ref:`apiudf`.
        apply_node_func : callable, optional
-            Apply function on the nodes. The function should be
+            An optional apply function to further update the node features
-            a :mod:`Node UDF <dgl.udf>`. (Default: None)
+            after the message reduction. It must be a :ref:`apiudf`.
-        etype : str or tuple of str, optional
+        etype : str or (str, str, str), optional
-            The edge type. Can be omitted if there is only one edge type
+            The type name of the edges. The allowed type name formats are:
-            in the graph. (Default: None)
+            * ``(str, str, str)`` for source node type, edge type and destination node type.
+            * or one ``str`` edge type name if the name can uniquely identify a
+              triplet format in the graph.
+            Can be omitted if the graph has only one type of edges.
        inplace: bool, optional
-            **DEPRECATED**. Must be False.
+            **DEPRECATED**.
+        Notes
+        -----
+        * If some of the given nodes :attr:`v` has no in-edges, DGL does not invoke
+          message and reduce functions for these nodes and fill their aggregated messages
+          with zero. Users can control the filled values via :meth:`set_n_initializer`.
+          DGL still invokes :attr:`apply_node_func` if provided.
+        * DGL recommends using DGL's bulit-in function for the :attr:`message_func`
+          and the :attr:`reduce_func` arguments,
+          because DGL will invoke efficient kernels that avoids copying node features to
+          edge features in this case.
        Examples
        --------
@@ -4175,7 +4238,19 @@ class DGLHeteroGraph(object):
        >>> import dgl.function as fn
        >>> import torch
-        Instantiate a heterograph.
+        **Homogeneous graph**
+        >>> g = dgl.graph(([0, 1, 2, 3], [1, 2, 3, 4]))
+        >>> g.ndata['x'] = torch.ones(5, 2)
+        >>> g.pull([0, 3, 4], fn.copy_u('x', 'm'), fn.sum('m', 'h'))
+        >>> g.ndata['h']
+        tensor([[0., 0.],
+                [0., 0.],
+                [0., 0.],
+                [1., 1.],
+                [1., 1.]])
+        **Heterogeneous graph**
        >>> g = dgl.heterograph({
        ...     ('user', 'follows', 'user'): ([0, 1], [1, 2]),
@@ -4214,36 +4289,46 @@ class DGLHeteroGraph(object):
             apply_node_func=None,
             etype=None,
             inplace=False):
-        """Send message from the node(s) to their successors and update them.
+        """Send message from the specified node(s) to their successors
+        along the specified edge type and update their node features.
-        This is equivalent to performing
-        :mod:`send_and_recv <DGLHeteroGraph.send_and_recv>` along the outbound
-        edges from ``u``.
-        **Only works if the graph has one edge type.** For multiple types, use
-        .. code::
-           g['edgetype'].push(u, message_func, reduce_func, apply_node_func, inplace=inplace)
        Parameters
        ----------
-        u : int, container or tensor
+        v : node IDs
-            The node(s) to push out messages.
+            The node IDs. The allowed formats are:
-        message_func : callable
-            Message function on the edges. The function should be
+            * ``int``: A single node.
-            an :mod:`Edge UDF <dgl.udf>`.
+            * Int Tensor: Each element is a node ID. The tensor must have the same device type
-        reduce_func : callable
+              and ID data type as the graph's.
-            Reduce function on the node. The function should be
+            * iterable[int]: Each element is a node ID.
-            a :mod:`Node UDF <dgl.udf>`.
+        message_func : dgl.function.BuiltinFunction or callable
+            The message function to generate messages along the edges.
+            It must be either a :ref:`api-built-in` or a :ref:`apiudf`.
+        reduce_func : dgl.function.BuiltinFunction or callable
+            The reduce function to aggregate the messages.
+            It must be either a :ref:`api-built-in` or a :ref:`apiudf`.
        apply_node_func : callable, optional
-            Apply function on the nodes. The function should be
+            An optional apply function to further update the node features
-            a :mod:`Node UDF <dgl.udf>`. (Default: None)
+            after the message reduction. It must be a :ref:`apiudf`.
-        etype : str, optional
+        etype : str or (str, str, str), optional
-            The edge type. Can be omitted if there is only one edge type
+            The type name of the edges. The allowed type name formats are:
-            in the graph. (Default: None)
+            * ``(str, str, str)`` for source node type, edge type and destination node type.
+            * or one ``str`` edge type name if the name can uniquely identify a
+              triplet format in the graph.
+            Can be omitted if the graph has only one type of edges.
        inplace: bool, optional
-            **DEPRECATED**. Must be False.
+            **DEPRECATED**.
+        Notes
+        -----
+        DGL recommends using DGL's bulit-in function for the :attr:`message_func`
+        and the :attr:`reduce_func` arguments,
+        because DGL will invoke efficient kernels that avoids copying node features to
+        edge features in this case.
        Examples
        --------
@@ -4252,7 +4337,19 @@ class DGLHeteroGraph(object):
        >>> import dgl.function as fn
        >>> import torch
-        Instantiate a heterograph.
+        **Homogeneous graph**
+        >>> g = dgl.graph(([0, 1, 2, 3], [1, 2, 3, 4]))
+        >>> g.ndata['x'] = torch.ones(5, 2)
+        >>> g.push([0, 1], fn.copy_u('x', 'm'), fn.sum('m', 'h'))
+        >>> g.ndata['h']
+        tensor([[0., 0.],
+                [1., 1.],
+                [1., 1.],
+                [0., 0.],
+                [0., 0.]])
+        **Heterogeneous graph**
        >>> g = dgl.heterograph({('user', 'follows', 'user'): ([0, 0], [1, 2])})
        >>> g.nodes['user'].data['h'] = torch.tensor([[0.], [1.], [2.]])
@@ -4275,42 +4372,59 @@ class DGLHeteroGraph(object):
                   reduce_func,
                   apply_node_func=None,
                   etype=None):
-        """Send messages through all edges and update all nodes.
+        """Send messages along all the edges of the specified type
+        and update all the nodes of the corresponding destination type.
-        Optionally, apply a function to update the node features after receive.
-        This is equivalent to
-        :mod:`send_and_recv <dgl.DGLHeteroGraph.send_and_recv>` over all edges
-        of the specified type.
-        **Only works if the graph has one edge type.** For multiple types, use
-        .. code::
-           g['edgetype'].update_all(message_func, reduce_func, apply_node_func)
        Parameters
        ----------
-        message_func : callable
+        message_func : dgl.function.BuiltinFunction or callable
-            Message function on the edges. The function should be
+            The message function to generate messages along the edges.
-            an :mod:`Edge UDF <dgl.udf>`.
+            It must be either a :ref:`api-built-in` or a :ref:`apiudf`.
-        reduce_func : callable
+        reduce_func : dgl.function.BuiltinFunction or callable
-            Reduce function on the node. The function should be
+            The reduce function to aggregate the messages.
-            a :mod:`Node UDF <dgl.udf>`.
+            It must be either a :ref:`api-built-in` or a :ref:`apiudf`.
        apply_node_func : callable, optional
-            Apply function on the nodes. The function should be
+            An optional apply function to further update the node features
-            a :mod:`Node UDF <dgl.udf>`. (Default: None)
+            after the message reduction. It must be a :ref:`apiudf`.
-        etype : str, optional
+        etype : str or (str, str, str), optional
-            The edge type. Can be omitted if there is only one edge type
+            The type name of the edges. The allowed type name formats are:
-            in the graph. (Default: None)
+            * ``(str, str, str)`` for source node type, edge type and destination node type.
+            * or one ``str`` edge type name if the name can uniquely identify a
+              triplet format in the graph.
+            Can be omitted if the graph has only one type of edges.
+        Notes
+        -----
+        * If some of the nodes in the graph has no in-edges, DGL does not invoke
+          message and reduce functions for these nodes and fill their aggregated messages
+          with zero. Users can control the filled values via :meth:`set_n_initializer`.
+          DGL still invokes :attr:`apply_node_func` if provided.
+        * DGL recommends using DGL's bulit-in function for the :attr:`message_func`
+          and the :attr:`reduce_func` arguments,
+          because DGL will invoke efficient kernels that avoids copying node features to
+          edge features in this case.
        Examples
        --------
-        >>> import torch
        >>> import dgl
        >>> import dgl.function as fn
+        >>> import torch
-        Instantiate a heterograph.
+        **Homogeneous graph**
+        >>> g = dgl.graph(([0, 1, 2, 3], [1, 2, 3, 4]))
+        >>> g.ndata['x'] = torch.ones(5, 2)
+        >>> g.update_all(fn.copy_u('x', 'm'), fn.sum('m', 'h'))
+        >>> g.ndata['h']
+        tensor([[0., 0.],
+                [1., 1.],
+                [1., 1.],
+                [1., 1.],
+                [1., 1.]])
+        **Heterogeneous graph**
        >>> g = dgl.heterograph({('user', 'follows', 'user'): ([0, 1, 2], [1, 2, 2])})
@@ -4335,35 +4449,48 @@ class DGLHeteroGraph(object):
    #################################################################
    def multi_update_all(self, etype_dict, cross_reducer, apply_node_func=None):
-        r"""Send and receive messages along all edges.
+        r"""Send messages along all the edges, reduce them by first type-wisely
+        then across different types, and then update the node features of all
-        This is equivalent to
+        the nodes.
-        :mod:`multi_send_and_recv <dgl.DGLHeteroGraph.multi_send_and_recv>`
-        over all edges.
        Parameters
        ----------
        etype_dict : dict
-            Mapping an edge type (str or tuple of str) to the type specific
+            Arguments for edge-type-wise message passing. The keys are edge types
-            configuration (3-tuples). Each 3-tuple represents
+            while the values are message passing arguments.
-            (msg_func, reduce_func, apply_node_func):
+            The allowed key formats are:
-            * msg_func: callable
-                  Message function on the edges. The function should be
+            * ``(str, str, str)`` for source node type, edge type and destination node type.
-                  an :mod:`Edge UDF <dgl.udf>`.
+            * or one ``str`` edge type name if the name can uniquely identify a
-            * reduce_func: callable
+              triplet format in the graph.
-                  Reduce function on the nodes. The function should be
-                  a :mod:`Node UDF <dgl.udf>`.
+            The value must be a tuple ``(message_func, reduce_func, [apply_node_func])``, where
+            * message_func : dgl.function.BuiltinFunction or callable
+                The message function to generate messages along the edges.
+                It must be either a :ref:`api-built-in` or a :ref:`apiudf`.
+            * reduce_func : dgl.function.BuiltinFunction or callable
+                The reduce function to aggregate the messages.
+                It must be either a :ref:`api-built-in` or a :ref:`apiudf`.
            * apply_node_func : callable, optional
-                  Apply function on the nodes. The function should be
+                An optional apply function to further update the node features
-                  a :mod:`Node UDF <dgl.udf>`. (Default: None)
+                after the message reduction. It must be a :ref:`apiudf`.
        cross_reducer : str
            Cross type reducer. One of ``"sum"``, ``"min"``, ``"max"``, ``"mean"``, ``"stack"``.
-        apply_node_func : callable
+        apply_node_func : callable, optional
-            Apply function on the nodes. The function should be
+            An optional apply function after the messages are reduced both
-            a :mod:`Node UDF <dgl.udf>`. (Default: None)
+            type-wisely and across different types.
-        inplace: bool, optional
+            It must be a :ref:`apiudf`.
-            **DEPRECATED**. Must be False.
+        Notes
+        -----
+        DGL recommends using DGL's bulit-in function for the message_func
+        and the reduce_func in the type-wise message passing arguments,
+        because DGL will invoke efficient kernels that avoids copying node features to
+        edge features in this case.
        Examples
        --------
@@ -4431,21 +4558,27 @@ class DGLHeteroGraph(object):
        Parameters
        ----------
-        nodes_generator : iterable, each element is a list or a tensor of node ids
+        nodes_generator : iterable[node IDs]
-            The generator of node frontiers. It specifies which nodes perform
+            The generator of node frontiers. Each frontier is a set of node IDs
-            :func:`pull` at each timestep.
+            stored in Tensor or python iterables.
-        message_func : callable
+            It specifies which nodes perform :func:`pull` at each step.
-            Message function on the edges. The function should be
+        message_func : dgl.function.BuiltinFunction or callable
-            an :mod:`Edge UDF <dgl.udf>`.
+            The message function to generate messages along the edges.
-        reduce_func : callable
+            It must be either a :ref:`api-built-in` or a :ref:`apiudf`.
-            Reduce function on the node. The function should be
+        reduce_func : dgl.function.BuiltinFunction or callable
-            a :mod:`Node UDF <dgl.udf>`.
+            The reduce function to aggregate the messages.
+            It must be either a :ref:`api-built-in` or a :ref:`apiudf`.
        apply_node_func : callable, optional
-            Apply function on the nodes. The function should be
+            An optional apply function to further update the node features
-            a :mod:`Node UDF <dgl.udf>`. (Default: None)
+            after the message reduction. It must be a :ref:`apiudf`.
-        etype : str, optional
+        etype : str or (str, str, str), optional
-            The edge type. Can be omitted if there is only one edge type
+            The type name of the edges. The allowed type name formats are:
-            in the graph. (Default: None)
+            * ``(str, str, str)`` for source node type, edge type and destination node type.
+            * or one ``str`` edge type name if the name can uniquely identify a
+              triplet format in the graph.
+            Can be omitted if the graph has only one type of edges.
        Examples
        --------
@@ -4492,18 +4625,23 @@ class DGLHeteroGraph(object):
        ----------
        edges_generator : generator
            The generator of edge frontiers.
-        message_func : callable
+        message_func : dgl.function.BuiltinFunction or callable
-            Message function on the edges. The function should be
+            The message function to generate messages along the edges.
-            an :mod:`Edge UDF <dgl.udf>`.
+            It must be either a :ref:`api-built-in` or a :ref:`apiudf`.
-        reduce_func : callable
+        reduce_func : dgl.function.BuiltinFunction or callable
-            Reduce function on the node. The function should be
+            The reduce function to aggregate the messages.
-            a :mod:`Node UDF <dgl.udf>`.
+            It must be either a :ref:`api-built-in` or a :ref:`apiudf`.
        apply_node_func : callable, optional
-            Apply function on the nodes. The function should be
+            An optional apply function to further update the node features
-            a :mod:`Node UDF <dgl.udf>`. (Default: None)
+            after the message reduction. It must be a :ref:`apiudf`.
-        etype : str, optional
+        etype : str or (str, str, str), optional
-            The edge type. Can be omitted if there is only one edge type
+            The type name of the edges. The allowed type name formats are:
-            in the graph. (Default: None)
+            * ``(str, str, str)`` for source node type, edge type and destination node type.
+            * or one ``str`` edge type name if the name can uniquely identify a
+              triplet format in the graph.
+            Can be omitted if the graph has only one type of edges.
        Examples
        --------
@@ -4563,7 +4701,7 @@ class DGLHeteroGraph(object):
        Returns
        -------
-        tensor
+        Tensor
            A 1D tensor that contains the ID(s) of the node(s) that satisfy the predicate.
        Examples
@@ -4630,30 +4768,31 @@ class DGLHeteroGraph(object):
            Its output tensor should be a 1D boolean tensor with
            each element indicating whether the corresponding edge in
            the batch satisfies the predicate.
-        edges : edge ID(s) or edge end nodes, optional
+        edges : edges
-            The edge(s) for query. The allowed formats are:
+            The edges to send and receive messages on. The allowed input formats are:
-            - Tensor: A 1D tensor that contains the IDs of the edge(s) for query, whose data
+            * ``int``: A single edge ID.
-              type and device should be the same as the :py:attr:`idtype` and device of the graph.
+            * Int Tensor: Each element is an edge ID.  The tensor must have the same device type
-            - iterable[int]: Similar to the tensor, but stores edge IDs in a sequence
+              and ID data type as the graph's.
-              (e.g. list, tuple, numpy.ndarray).
+            * iterable[int]: Each element is an edge ID.
-            - (Tensor, Tensor): A 2-tuple of the source and destination nodes of multiple
+            * (Tensor, Tensor): The node-tensors format where the i-th elements
-              edges for query. Each tensor is a 1D tensor containing node IDs. DGL calls this
+              of the two tensors specify an edge.
-              format "tuple of node-tensors". The data type and device of the tensors should
+            * (iterable[int], iterable[int]): Similar to the node-tensors format but
-              be the same as the :py:attr:`idtype` and device of the graph.
+              stores edge endpoints in python iterables.
-            - (iterable[int], iterable[int]): Similar to the tuple of node-tensors format,
-              but stores node IDs in two sequences (e.g. list, tuple, numpy.ndarray).
+            By default, it considers all the edges.
+        etype : str or (str, str, str), optional
-            By default, it considers all edges.
+            The type name of the edges. The allowed type name formats are:
-        etype : str or tuple of str, optional
-            The edge type for query, which can be an edge type (str) or a canonical edge type
+            * ``(str, str, str)`` for source node type, edge type and destination node type.
-            (3-tuple of str). When an edge type appears in multiple canonical edge types, one
+            * or one ``str`` edge type name if the name can uniquely identify a
-            must use a canonical edge type. If the graph has multiple edge types, one must
+              triplet format in the graph.
-            specify the argument. Otherwise, it can be omitted.
+            Can be omitted if the graph has only one type of edges.
        Returns
        -------
-        tensor
+        Tensor
            A 1D tensor that contains the ID(s) of the edge(s) that satisfy the predicate.
        Examples

--- a/python/dgl/nn/__init__.py
+++ b/python/dgl/nn/__init__.py
-"""Package for neural network common components."""
+"""The ``dgl.nn`` package contains framework-specific implementations for
+common Graph Neural Network layers (or module in PyTorch, Block in MXNet).
+Users can directly import ``dgl.nn.<layer_name>`` (e.g., ``dgl.nn.GraphConv``),
+and the package will dispatch the layer name to the actual implementation
+according to the backend framework currently in use.
+Note that there are coverage differences among frameworks. If you encounter
+an ``ImportError: cannot import name 'XXX'`` error, that means the layer is
+not available to the current backend. If you wish a module to appear in DGL,
+please `create an issue <https://github.com/dmlc/dgl/issues>`_ started with
+"[Feature Request] NN Module XXXModel". If you want to contribute a NN module,
+please `create a pull request <https://github.com/dmlc/dgl/pulls>`_ started
+with "[NN] XXX module".
+"""
 import importlib
 import sys
 import os

--- a/python/dgl/random.py
+++ b/python/dgl/random.py
@@ -8,14 +8,12 @@ from . import ndarray as nd
 __all__ = ['seed']
 def seed(val):
-    """Set the seed of randomized methods in DGL.
+    """Set the random seed of DGL.
-    The randomized methods include various samplers and random walk routines.
    Parameters
    ----------
    val : int
-        The seed
+        The seed.
    """
    _CAPI_SetSeed(val)
@@ -41,8 +39,6 @@ def choice(a, size, replace=True, prob=None):  # pylint: disable=invalid-name
    It out-performs numpy for non-uniform sampling in general cases.
-    TODO(minjie): support RNG as one of the arguments.
    Parameters
    ----------
    a : 1-D tensor or int
@@ -61,6 +57,7 @@ def choice(a, size, replace=True, prob=None):  # pylint: disable=invalid-name
    samples : 1-D tensor
        The generated random samples
    """
+    #TODO(minjie): support RNG as one of the arguments.
    if isinstance(size, tuple):
        num = np.prod(size)
    else:

--- a/python/dgl/readout.py
+++ b/python/dgl/readout.py
@@ -28,9 +28,9 @@ def readout_nodes(graph, feat, weight=None, *, op='sum', ntype=None):
    feat : str
        Node feature name.
    weight : str, optional
-        Node weight name. If None, no weighting will be performed,
+        Node weight name. None means aggregating without weights.
-        otherwise, weight each node feature with field :attr:`feat`.
+        Otherwise, multiply each node feature by node feature :attr:`weight`
-        for aggregation. The weight feature shape must be compatible with
+        before aggregation. The weight feature shape must be compatible with
        an element-wise multiplication with the feature tensor.
    op : str, optional
        Readout operator. Can be 'sum', 'max', 'min', 'mean'.
@@ -39,7 +39,7 @@ def readout_nodes(graph, feat, weight=None, *, op='sum', ntype=None):
    Returns
    -------
-    tensor
+    Tensor
        Result tensor.
    Examples
@@ -101,22 +101,28 @@ def readout_edges(graph, feat, weight=None, *, op='sum', etype=None):
    Parameters
    ----------
    graph : DGLGraph.
-        Input graph.
+        The input graph.
    feat : str
-        Edge feature name.
+        The edge feature name.
    weight : str, optional
-        Edge weight name. If None, no weighting will be performed,
+        The edge weight feature name. If None, no weighting will be performed,
        otherwise, weight each edge feature with field :attr:`feat`.
        for summation. The weight feature shape must be compatible with
        an element-wise multiplication with the feature tensor.
    op : str, optional
        Readout operator. Can be 'sum', 'max', 'min', 'mean'.
-    etype : str, tuple of str, optional
+    etype : str or (str, str, str), optional
-        Edge type. Can be omitted if there is only one edge type in the graph.
+        The type names of the edges. The allowed type name formats are:
+        * ``(str, str, str)`` for source node type, edge type and destination node type.
+        * or one ``str`` edge type name if the name can uniquely identify a
+          triplet format in the graph.
+        Can be omitted if the graph has only one type of edges.
    Returns
    -------
-    tensor
+    Tensor
        Result tensor.
    Examples
@@ -166,31 +172,55 @@ def readout_edges(graph, feat, weight=None, *, op='sum', etype=None):
 def sum_nodes(graph, feat, weight=None, *, ntype=None):
    """Syntax sugar for ``dgl.readout_nodes(graph, feat, weight, ntype=ntype, op='sum')``.
+    See Also
+    --------
+    readout_nodes
    """
    return readout_nodes(graph, feat, weight, ntype=ntype, op='sum')
 def sum_edges(graph, feat, weight=None, *, etype=None):
    """Syntax sugar for ``dgl.readout_edges(graph, feat, weight, etype=etype, op='sum')``.
+    See Also
+    --------
+    readout_edges
    """
    return readout_edges(graph, feat, weight, etype=etype, op='sum')
 def mean_nodes(graph, feat, weight=None, *, ntype=None):
    """Syntax sugar for ``dgl.readout_nodes(graph, feat, weight, ntype=ntype, op='mean')``.
+    See Also
+    --------
+    readout_nodes
    """
    return readout_nodes(graph, feat, weight, ntype=ntype, op='mean')
 def mean_edges(graph, feat, weight=None, *, etype=None):
    """Syntax sugar for ``dgl.readout_edges(graph, feat, weight, etype=etype, op='mean')``.
+    See Also
+    --------
+    readout_edges
    """
    return readout_edges(graph, feat, weight, etype=etype, op='mean')
 def max_nodes(graph, feat, weight=None, *, ntype=None):
    """Syntax sugar for ``dgl.readout_nodes(graph, feat, weight, ntype=ntype, op='max')``.
+    See Also
+    --------
+    readout_nodes
    """
    return readout_nodes(graph, feat, weight, ntype=ntype, op='max')
 def max_edges(graph, feat, weight=None, *, etype=None):
    """Syntax sugar for ``dgl.readout_edges(graph, feat, weight, etype=etype, op='max')``.
+    See Also
+    --------
+    readout_edges
    """
    return readout_edges(graph, feat, weight, etype=etype, op='max')
@@ -210,15 +240,15 @@ def softmax_nodes(graph, feat, *, ntype=None):
    Parameters
    ----------
    graph : DGLGraph.
-        Input graph.
+        The input graph.
    feat : str
-        Node feature name.
+        The node feature name.
    ntype : str, optional
-        Node type. Can be omitted if there is only one node type in the graph.
+        The node type name. Can be omitted if there is only one node type in the graph.
    Returns
    -------
-    tensor
+    Tensor
        Result tensor.
    Examples
@@ -269,15 +299,21 @@ def softmax_edges(graph, feat, *, etype=None):
    Parameters
    ----------
    graph : DGLGraph.
-        Input graph.
+        The input graph.
    feat : str
-        Edge feature name.
+        The edge feature name.
-    etype : str, typle of str, optional
+    etype : str or (str, str, str), optional
-        Edge type. Can be omitted if there is only one edge type in the graph.
+        The type names of the edges. The allowed type name formats are:
+        * ``(str, str, str)`` for source node type, edge type and destination node type.
+        * or one ``str`` edge type name if the name can uniquely identify a
+          triplet format in the graph.
+        Can be omitted if the graph has only one type of edges.
    Returns
    -------
-    tensor
+    Tensor
        Result tensor.
    Examples
@@ -535,9 +571,10 @@ def _topk_on(graph, typestr, feat, k, descending, sortby, ntype_or_etype):
           topk_indices
 def topk_nodes(graph, feat, k, *, descending=True, sortby=None, ntype=None):
-    """Perform a graph-wise top-k on node features :attr:`feat` in
+    """Return a graph-level representation by a graph-wise top-k on
-    :attr:`graph` by feature at index :attr:`sortby`. If :attr:
+    node features :attr:`feat` in :attr:`graph` by feature at index :attr:`sortby`.
-    `descending` is set to False, return the k smallest elements instead.
+    If :attr:`descending` is set to False, return the k smallest elements instead.
    If :attr:`sortby` is set to None, the function would perform top-k on
    all dimensions independently, equivalent to calling
@@ -569,6 +606,11 @@ def topk_nodes(graph, feat, k, *, descending=True, sortby=None, ntype=None):
        :math:`B` is the batch size of the input graph, :math:`D`
        is the feature size.
+    Notes
+    -----
+    If an example has :math:`n` nodes and :math:`n<k`, the ``sorted_feat``
+    tensor will pad the :math:`n+1` to :math:`k` th rows with zero;
    Examples
    --------
@@ -631,20 +673,16 @@ def topk_nodes(graph, feat, k, *, descending=True, sortby=None, ntype=None):
              [0.0880, 0.6379, 0.4451, 0.6893, 0.5197]]]), tensor([[[1, 0, 1, 3, 1],
              [3, 2, 0, 2, 2],
              [2, 3, 2, 1, 3]]]))
-    Notes
-    -----
-    If an example has :math:`n` nodes and :math:`n<k`, the ``sorted_feat``
-    tensor will pad the :math:`n+1` to :math:`k`th rows with zero;
    """
    return _topk_on(graph, 'nodes', feat, k,
                    descending=descending, sortby=sortby,
                    ntype_or_etype=ntype)
 def topk_edges(graph, feat, k, *, descending=True, sortby=None, etype=None):
-    """Perform a graph-wise top-k on node features :attr:`feat` in
+    """Return a graph-level representation by a graph-wise top-k
-    :attr:`graph` by feature at index :attr:`sortby`. If :attr:
+    on edge features :attr:`feat` in :attr:`graph` by feature at index :attr:`sortby`.
-    `descending` is set to False, return the k smallest elements instead.
+    If :attr:`descending` is set to False, return the k smallest elements instead.
    If :attr:`sortby` is set to None, the function would perform top-k on
    all dimensions independently, equivalent to calling
@@ -676,6 +714,11 @@ def topk_edges(graph, feat, k, *, descending=True, sortby=None, etype=None):
        :math:`B` is the batch size of the input graph, :math:`D`
        is the feature size.
+    Notes
+    -----
+    If an example has :math:`n` nodes and :math:`n<k`, the ``sorted_feat``
+    tensor will pad the :math:`n+1` to :math:`k` th rows with zero;
    Examples
    --------
@@ -738,11 +781,6 @@ def topk_edges(graph, feat, k, *, descending=True, sortby=None, etype=None):
              [0.0880, 0.6379, 0.4451, 0.6893, 0.5197]]]), tensor([[[1, 0, 1, 3, 1],
              [3, 2, 0, 2, 2],
              [2, 3, 2, 1, 3]]]))
-    Notes
-    -----
-    If an example has :math:`n` nodes and :math:`n<k`, the ``sorted_feat``
-    tensor will pad the :math:`n+1` to :math:`k`th rows with zero;
    """
    return _topk_on(graph, 'edges', feat, k,
                    descending=descending, sortby=sortby,

--- a/python/dgl/sampling/__init__.py
+++ b/python/dgl/sampling/__init__.py
-"""This module contains the implementations of various sampling operators.
+"""The ``dgl.sampling`` package contains operators and utilities for
+sampling from a graph via random walks, neighbor sampling, etc. They
+are typically used together with the ``DataLoader`` s in the
+``dgl.dataloading`` package. The user guide :ref:`guide-minibatch`
+gives a holistic explanation on how different components work together.
 """
 from .randomwalks import *
 from .pinsage import *
 from .neighbor import *
--- a/python/dgl/subgraph.py
+++ b/python/dgl/subgraph.py
@@ -18,92 +18,102 @@ __all__ = ['node_subgraph', 'edge_subgraph', 'node_type_subgraph', 'edge_type_su
           'in_subgraph', 'out_subgraph']
 def node_subgraph(graph, nodes):
-    """Return the subgraph induced on given nodes.
+    """Return a subgraph induced on the given nodes.
-    The metagraph of the returned subgraph is the same as the parent graph.
+    A node-induced subgraph is a subset of the nodes of a graph together with
-    Features are copied from the original graph.
+    any edges whose endpoints are both in this subset. In addition to extracting
+    the subgraph, DGL conducts the following:
+    * Relabel the extracted nodes to IDs starting from zero.
+    * Copy the features of the extracted nodes and edges to the resulting graph.
+      The copy is *lazy* and incurs data movement only when needed.
+    * Store the IDs of the extracted nodes and edges in the ``ndata`` and ``edata``
+      of the resulting graph under name ``dgl.NID`` and ``dgl.EID``, respectively.
+    If the graph is heterogeneous, DGL extracts a subgraph per relation and composes
+    them as the resulting graph. Thus, the resulting graph has the same set of relations
+    as the input one.
    Parameters
    ----------
    graph : DGLGraph
        The graph to extract subgraphs from.
-    nodes : list or dict[str->list or iterable]
+    nodes : nodes or dict[str, nodes]
-        A dictionary mapping node types to node ID array for constructing
+        The nodes to form the subgraph. The allowed nodes formats are:
-        subgraph. All nodes must exist in the graph.
-        If the graph only has one node type, one can just specify a list,
+        * Int Tensor: Each element is a node ID. The tensor must have the same device type
-        tensor, or any iterable of node IDs intead.
+          and ID data type as the graph's.
+        * iterable[int]: Each element is a node ID.
+        * Bool Tensor: Each :math:`i^{th}` element is a bool flag indicating whether
+          node :math:`i` is in the subgraph.
-        The node ID array can be either an interger tensor or a bool tensor.
+        If the graph is homogeneous, one can directly pass the above formats.
-        When a bool tensor is used, it is automatically converted to
+        Otherwise, the argument must be a dictionary with keys being node types
-        an interger tensor using the semantic of np.where(nodes_idx == True).
+        and values being the nodes.
-        Note: When using bool tensor, only backend (torch, tensorflow, mxnet)
-        tensors are supported.
    Returns
    -------
    G : DGLGraph
        The subgraph.
-        The nodes and edges in the subgraph are relabeled using consecutive
-        integers from 0.
-        One can retrieve the mapping from subgraph node/edge ID to parent
-        node/edge ID via ``dgl.NID`` and ``dgl.EID`` node/edge features of the
-        subgraph.
    Examples
    --------
    The following example uses PyTorch backend.
-    Instantiate a heterograph.
+    >>> import dgl
+    >>> import torch
+    Extract a subgraph from a homogeneous graph.
+    >>> g = dgl.graph(([0, 1, 2, 3, 4], [1, 2, 3, 4, 0]))  # 5-node cycle
+    >>> sg = dgl.node_subgraph(g, [0, 1, 4])
+    >>> sg
+    Graph(num_nodes=3, num_edges=2,
+          ndata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)}
+          edata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)})
+    >>> sg.edges()
+    (tensor([0, 2]), tensor([1, 0]))
+    >>> sg.ndata[dgl.NID]  # original node IDs
+    tensor([0, 1, 4])
+    >>> sg.edata[dgl.EID]  # original edge IDs
+    tensor([0, 4])
+    Specify nodes using a boolean mask.
+    >>> nodes = torch.tensor([True, True, False, False, True])  # choose nodes [0, 1, 4]
+    >>> dgl.node_subgraph(g, nodes)
+    Graph(num_nodes=3, num_edges=2,
+          ndata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)}
+          edata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)})
+    The resulting subgraph also copies features from the parent graph.
+    >>> g.ndata['x'] = torch.arange(10).view(5, 2)
+    >>> sg = dgl.node_subgraph(g, [0, 1, 4])
+    >>> sg
+    Graph(num_nodes=3, num_edges=2,
+          ndata_schemes={'x': Scheme(shape=(2,), dtype=torch.int64),
+                         '_ID': Scheme(shape=(), dtype=torch.int64)}
+          edata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)})
+    >>> sg.ndata['x']
+    tensor([[0, 1],
+            [2, 3],
+            [8, 9]])
+    Extract a subgraph from a hetergeneous graph.
    >>> g = dgl.heterograph({
    >>>     ('user', 'plays', 'game'): ([0, 1, 1, 2], [0, 0, 2, 1]),
    >>>     ('user', 'follows', 'user'): ([0, 1, 1], [1, 2, 2])
    >>> })
-    >>> # Set node features
+    >>> sub_g = dgl.node_subgraph(g, {'user': [1, 2]})
-    >>> g.nodes['user'].data['h'] = torch.tensor([[0.], [1.], [2.]])
+    >>> sub_g
-    Get subgraphs.
-    >>> g.subgraph({'user': [4, 5]})
-    Traceback (most recent call last):
-        ...
-    dgl._ffi.base.DGLError: ...
-    >>> sub_g = g.subgraph({'user': [1, 2]})
-    >>> print(sub_g)
-    Graph(num_nodes={'user': 2, 'game': 0},
-          num_edges={('user', 'plays', 'game'): 0, ('user', 'follows', 'user'): 2},
-          metagraph=[('user', 'game'), ('user', 'user')])
-    Get subgraphs using boolean mask tensor.
-    >>> sub_g = g.subgraph({'user': th.tensor([False, True, True])})
-    >>> print(sub_g)
    Graph(num_nodes={'user': 2, 'game': 0},
          num_edges={('user', 'plays', 'game'): 0, ('user', 'follows', 'user'): 2},
          metagraph=[('user', 'game'), ('user', 'user')])
-    Get the original node/edge indices.
-    >>> sub_g['follows'].ndata[dgl.NID] # Get the node indices in the raw graph
-    tensor([1, 2])
-    >>> sub_g['follows'].edata[dgl.EID] # Get the edge indices in the raw graph
-    tensor([1, 2])
-    Get the copied node features.
-    >>> sub_g.nodes['user'].data['h']
-    tensor([[1.],
-            [2.]])
-    >>> sub_g.nodes['user'].data['h'] += 1
-    >>> g.nodes['user'].data['h']          # Features are not shared.
-    tensor([[0.],
-            [1.],
-            [2.]])
    See Also
    --------
    edge_subgraph
@@ -129,106 +139,124 @@ def node_subgraph(graph, nodes):
    induced_edges = sgi.induced_edges
    return _create_hetero_subgraph(graph, sgi, induced_nodes, induced_edges)
-DGLHeteroGraph.subgraph = node_subgraph
+DGLHeteroGraph.subgraph = utils.alias_func(node_subgraph)
 def edge_subgraph(graph, edges, preserve_nodes=False):
-    """Return the subgraph induced on given edges.
+    """Return a subgraph induced on the given edges.
-    The metagraph of the returned subgraph is the same as the parent graph.
-    Features are copied from the original graph.
+    An edge-induced subgraph is equivalent to creating a new graph
+    with the same number of nodes using the given edges.  In addition to extracting
-    Parameters
+    the subgraph, DGL conducts the following:
-    ----------
-    graph : DGLGraph
-        The graph to extract subgraphs from.
-    edges : dict[(str, str, str), Tensor]
-        A dictionary mapping edge types to edge ID array for constructing
-        subgraph. All edges must exist in the subgraph.
-        The edge types are characterized by triplets of
+    * Relabel the incident nodes to IDs starting from zero. Isolated nodes are removed.
-        ``(src type, etype, dst type)``.
-        If the graph only has one edge type, one can just specify a list,
+    * Copy the features of the extracted nodes and edges to the resulting graph.
-        tensor, or any iterable of edge IDs intead.
+      The copy is *lazy* and incurs data movement only when needed.
-        The edge ID array can be either an interger tensor or a bool tensor.
+    * Store the IDs of the extracted nodes and edges in the ``ndata`` and ``edata``
-        When a bool tensor is used, it is automatically converted to
+      of the resulting graph under name ``dgl.NID`` and ``dgl.EID``, respectively.
-        an interger tensor using the semantic of np.where(edges_idx == True).
-        Note: When using bool tensor, only backend (torch, tensorflow, mxnet)
+    If the graph is heterogeneous, DGL extracts a subgraph per relation and composes
-        tensors are supported.
+    them as the resulting graph. Thus, the resulting graph has the same set of relations
+    as the input one.
-    preserve_nodes : bool
+    Parameters
-        Whether to preserve all nodes or not. If false, all nodes
+    ----------
-        without edges will be removed. (Default: False)
+    graph : DGLGraph
+        The graph to extract the subgraph from.
+    edges : dict[(str, str, str), edges]
+        The edges to form the subgraph. The allowed edges formats are:
+        * Int Tensor: Each element is an edge ID. The tensor must have the same device type
+          and ID data type as the graph's.
+        * iterable[int]: Each element is an edge ID.
+        * Bool Tensor: Each :math:`i^{th}` element is a bool flag indicating whether
+          edge :math:`i` is in the subgraph.
+        If the graph is homogeneous, one can directly pass the above formats.
+        Otherwise, the argument must be a dictionary with keys being edge types
+        and values being the nodes.
+    preserve_nodes : bool, optional
+        If true, do not relabel the incident nodes and remove the isolated nodes
+        in the extracted subgraph. (Default: False)
    Returns
    -------
    G : DGLGraph
        The subgraph.
-        The nodes and edges are relabeled using consecutive integers from 0.
-        One can retrieve the mapping from subgraph node/edge ID to parent
-        node/edge ID via ``dgl.NID`` and ``dgl.EID`` node/edge features of the
-        subgraph.
    Examples
    --------
    The following example uses PyTorch backend.
-    Instantiate a heterograph.
+    >>> import dgl
+    >>> import torch
+    Extract a subgraph from a homogeneous graph.
+    >>> g = dgl.graph(([0, 1, 2, 3, 4], [1, 2, 3, 4, 0]))  # 5-node cycle
+    >>> sg = dgl.edge_subgraph(g, [0, 4])
+    >>> sg
+    Graph(num_nodes=3, num_edges=2,
+          ndata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)}
+          edata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)})
+    >>> sg.edges()
+    (tensor([0, 1]), tensor([2, 0]))
+    >>> sg.ndata[dgl.NID]  # original node IDs
+    tensor([0, 4, 1])
+    >>> sg.edata[dgl.EID]  # original edge IDs
+    tensor([0, 4])
+    Extract a subgraph without node relabeling.
+    >>> sg = dgl.edge_subgraph(g, [0, 4], preserve_nodes=True)
+    >>> sg
+    Graph(num_nodes=5, num_edges=2,
+          ndata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)}
+          edata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)})
+    >>> sg.edges()
+    (tensor([0, 4]), tensor([1, 0]))
+    Specify edges using a boolean mask.
+    >>> nodes = torch.tensor([True, False, False, False, True])  # choose edges [0, 4]
+    >>> dgl.edge_subgraph(g, nodes)
+    Graph(num_nodes=3, num_edges=2,
+          ndata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)}
+          edata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)})
+    The resulting subgraph also copies features from the parent graph.
+    >>> g.ndata['x'] = torch.arange(10).view(5, 2)
+    >>> sg = dgl.edge_subgraph(g, [0, 4])
+    >>> sg
+    Graph(num_nodes=3, num_edges=2,
+          ndata_schemes={'x': Scheme(shape=(2,), dtype=torch.int64),
+                         '_ID': Scheme(shape=(), dtype=torch.int64)}
+          edata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)})
+    >>> sg.ndata[dgl.NID]
+    tensor([0, 4, 1])
+    >>> sg.ndata['x']
+    tensor([[0, 1],
+            [8, 9],
+            [2, 3]])
+    Extract a subgraph from a hetergeneous graph.
    >>> g = dgl.heterograph({
    >>>     ('user', 'plays', 'game'): ([0, 1, 1, 2], [0, 0, 2, 1]),
    >>>     ('user', 'follows', 'user'): ([0, 1, 1], [1, 2, 2])
    >>> })
-    >>> # Set edge features
+    >>> sub_g = dgl.edge_subgraph(g, {('user', 'follows', 'user'): [1, 2],
-    >>> g.edges['follows'].data['h'] = torch.tensor([[0.], [1.], [2.]])
+    ...                               ('user', 'plays', 'game'): [2]})
-    Get subgraphs.
-    >>> g.edge_subgraph({('user', 'follows', 'user'): [5, 6]})
-    Traceback (most recent call last):
-        ...
-    dgl._ffi.base.DGLError: ...
-    >>> sub_g = g.edge_subgraph({('user', 'follows', 'user'): [1, 2],
-    >>>                          ('user', 'plays', 'game'): [2]})
    >>> print(sub_g)
    Graph(num_nodes={'user': 2, 'game': 1},
          num_edges={('user', 'plays', 'game'): 1, ('user', 'follows', 'user'): 2},
          metagraph=[('user', 'game'), ('user', 'user')])
-    Get subgraphs using boolean mask tensor.
-    >>> sub_g = g.edge_subgraph({('user', 'follows', 'user'): th.tensor([False, True, True]),
-    >>>                   ('user', 'plays', 'game'): th.tensor([False, False, True, False])})
-    >>> sub_g
-    Graph(num_nodes={'user': 2, 'game': 1},
-        num_edges={('user', 'plays', 'game'): 1, ('user', 'follows', 'user'): 2},
-        metagraph=[('user', 'game'), ('user', 'user')])
-    Get the original node/edge indices.
-    >>> sub_g['follows'].ndata[dgl.NID] # Get the node indices in the raw graph
-    tensor([1, 2])
-    >>> sub_g['plays'].edata[dgl.EID]   # Get the edge indices in the raw graph
-    tensor([2])
-    Get the copied node features.
-    >>> sub_g.edges['follows'].data['h']
-    tensor([[1.],
-            [2.]])
-    >>> sub_g.edges['follows'].data['h'] += 1
-    >>> g.edges['follows'].data['h']          # Features are not shared.
-    tensor([[0.],
-            [1.],
-            [2.]])
    See Also
    --------
-    subgraph
+    node_subgraph
    """
    if graph.is_block:
        raise DGLError('Extracting subgraph from a block graph is not allowed.')
@@ -252,73 +280,82 @@ def edge_subgraph(graph, edges, preserve_nodes=False):
    induced_nodes = sgi.induced_nodes
    return _create_hetero_subgraph(graph, sgi, induced_nodes, induced_edges)
-DGLHeteroGraph.edge_subgraph = edge_subgraph
+DGLHeteroGraph.edge_subgraph = utils.alias_func(edge_subgraph)
 def in_subgraph(g, nodes):
-    """Return the subgraph induced on the inbound edges of all edge types of the
+    """Return the subgraph induced on the inbound edges of all the edge types of the
    given nodes.
-    All the nodes are preserved regardless of whether they have an edge or not.
+    An edge-induced subgraph is equivalent to creating a new graph
+    with the same number of nodes using the given edges.  In addition to extracting
+    the subgraph, DGL conducts the following:
-    The metagraph of the returned subgraph is the same as the parent graph.
+    * Copy the features of the extracted nodes and edges to the resulting graph.
+      The copy is *lazy* and incurs data movement only when needed.
-    Features are copied from the original graph.
+    * Store the IDs of the extracted edges in the ``edata``
+      of the resulting graph under name ``dgl.EID``.
+    If the graph is heterogeneous, DGL extracts a subgraph per relation and composes
+    them as the resulting graph. Thus, the resulting graph has the same set of relations
+    as the input one.
    Parameters
    ----------
    g : DGLGraph
-        Full graph structure.
+        The input graph.
-    nodes : tensor or dict
+    nodes : nodes or dict[str, nodes]
-        Node ids to sample neighbors from. The allowed types
+        The nodes to form the subgraph. The allowed nodes formats are:
-        are dictionary of node types to node id tensors, or simply node id tensor if
-        the given graph g has only one type of nodes.
+        * Int Tensor: Each element is an ID. The tensor must have the same device type
+          and ID data type as the graph's.
+        * iterable[int]: Each element is an ID.
+        If the graph is homogeneous, one can directly pass the above formats.
+        Otherwise, the argument must be a dictionary with keys being node types
+        and values being the nodes.
    Returns
    -------
    DGLGraph
        The subgraph.
-        One can retrieve the mapping from subgraph edge ID to parent
-        edge ID via ``dgl.EID`` edge features of the subgraph.
    Examples
    --------
    The following example uses PyTorch backend.
-    Instantiate a heterograph.
+    >>> import dgl
+    >>> import torch
+    Extract a subgraph from a homogeneous graph.
+    >>> g = dgl.graph(([0, 1, 2, 3, 4], [1, 2, 3, 4, 0]))  # 5-node cycle
+    >>> g.edata['w'] = torch.arange(10).view(5, 2)
+    >>> sg = dgl.in_subgraph(g, [2, 0])
+    >>> sg
+    Graph(num_nodes=5, num_edges=2,
+          ndata_schemes={}
+          edata_schemes={'w': Scheme(shape=(2,), dtype=torch.int64),
+                         '_ID': Scheme(shape=(), dtype=torch.int64)})
+    >>> sg.edges()
+    (tensor([1, 4]), tensor([2, 0]))
+    >>> sg.edata[dgl.EID]  # original edge IDs
+    tensor([1, 4])
+    >>> sg.edata['w']  # also extract the features
+    tensor([[2, 3],
+            [8, 9]])
+    Extract a subgraph from a heterogeneous graph.
    >>> g = dgl.heterograph({
    ...     ('user', 'plays', 'game'): ([0, 1, 1, 2], [0, 0, 2, 1]),
    ...     ('user', 'follows', 'user'): ([0, 1, 1], [1, 2, 2])})
-    >>> # Set edge features
-    >>> g.edges['follows'].data['h'] = torch.tensor([[0.], [1.], [2.]])
-    Get subgraphs.
    >>> sub_g = g.in_subgraph({'user': [2], 'game': [2]})
-    >>> print(sub_g)
+    >>> sub_g
    Graph(num_nodes={'game': 3, 'user': 3},
          num_edges={('user', 'plays', 'game'): 1, ('user', 'follows', 'user'): 2},
          metagraph=[('user', 'game', 'plays'), ('user', 'user', 'follows')])
-    Get the original node/edge indices.
-    >>> sub_g.edges['plays'].data[dgl.EID]
-    tensor([2])
-    >>> sub_g.edges['follows'].data[dgl.EID]
-    tensor([1, 2])
-    Get the copied edge features.
-    >>> sub_g.edges['follows'].data['h']
-    tensor([[1.],
-            [2.]])
-    >>> sub_g.edges['follows'].data['h'] += 1
-    >>> g.edges['follows'].data['h']          # Features are not shared.
-    tensor([[0.],
-            [1.],
-            [2.]])
    See also
    --------
    out_subgraph
@@ -341,73 +378,82 @@ def in_subgraph(g, nodes):
    induced_edges = sgi.induced_edges
    return _create_hetero_subgraph(g, sgi, None, induced_edges)
-DGLHeteroGraph.in_subgraph = in_subgraph
+DGLHeteroGraph.in_subgraph = utils.alias_func(in_subgraph)
 def out_subgraph(g, nodes):
-    """Return the subgraph induced on the outbound edges of all edge types of the
+    """Return the subgraph induced on the out-bound edges of all the edge types of the
    given nodes.
-    All the nodes are preserved regardless of whether they have an edge or not.
+    An edge-induced subgraph is equivalent to creating a new graph
+    with the same number of nodes using the given edges.  In addition to extracting
+    the subgraph, DGL conducts the following:
-    The metagraph of the returned subgraph is the same as the parent graph.
+    * Copy the features of the extracted nodes and edges to the resulting graph.
+      The copy is *lazy* and incurs data movement only when needed.
-    Features are copied from the original graph.
+    * Store the IDs of the extracted edges in the ``edata``
+      of the resulting graph under name ``dgl.EID``.
+    If the graph is heterogeneous, DGL extracts a subgraph per relation and composes
+    them as the resulting graph. Thus, the resulting graph has the same set of relations
+    as the input one.
    Parameters
    ----------
    g : DGLGraph
-        Full graph structure.
+        The input graph.
-    nodes : tensor or dict
+    nodes : nodes or dict[str, nodes]
-        Node ids to sample neighbors from. The allowed types
+        The nodes to form the subgraph. The allowed nodes formats are:
-        are dictionary of node types to node id tensors, or simply node id tensor if
-        the given graph g has only one type of nodes.
+        * Int Tensor: Each element is a node ID. The tensor must have the same device type
+          and ID data type as the graph's.
+        * iterable[int]: Each element is a node ID.
+        If the graph is homogeneous, one can directly pass the above formats.
+        Otherwise, the argument must be a dictionary with keys being node types
+        and values being the nodes.
    Returns
    -------
    DGLGraph
        The subgraph.
-        One can retrieve the mapping from subgraph edge ID to parent
-        edge ID via ``dgl.EID`` edge features of the subgraph.
    Examples
    --------
    The following example uses PyTorch backend.
-    Instantiate a heterograph.
+    >>> import dgl
+    >>> import torch
+    Extract a subgraph from a homogeneous graph.
+    >>> g = dgl.graph(([0, 1, 2, 3, 4], [1, 2, 3, 4, 0]))  # 5-node cycle
+    >>> g.edata['w'] = torch.arange(10).view(5, 2)
+    >>> sg = dgl.out_subgraph(g, [2, 0])
+    >>> sg
+    Graph(num_nodes=5, num_edges=2,
+          ndata_schemes={}
+          edata_schemes={'w': Scheme(shape=(2,), dtype=torch.int64),
+                         '_ID': Scheme(shape=(), dtype=torch.int64)})
+    >>> sg.edges()
+    (tensor([2, 0]), tensor([3, 1]))
+    >>> sg.edata[dgl.EID]  # original edge IDs
+    tensor([2, 0])
+    >>> sg.edata['w']  # also extract the features
+    tensor([[4, 5],
+            [0, 1]])
+    Extract a subgraph from a heterogeneous graph.
    >>> g = dgl.heterograph({
    ...     ('user', 'plays', 'game'): ([0, 1, 1, 2], [0, 0, 2, 1]),
    ...     ('user', 'follows', 'user'): ([0, 1, 1], [1, 2, 2])})
-    >>> # Set edge features
-    >>> g.edges['follows'].data['h'] = torch.tensor([[0.], [1.], [2.]])
-    Get subgraphs.
    >>> sub_g = g.out_subgraph({'user': [1]})
-    >>> print(sub_g)
+    >>> sub_g
    Graph(num_nodes={'game': 3, 'user': 3},
          num_edges={('user', 'plays', 'game'): 2, ('user', 'follows', 'user'): 2},
          metagraph=[('user', 'game', 'plays'), ('user', 'user', 'follows')])
-    Get the original node/edge indices.
-    >>> sub_g.edges['plays'].data[dgl.EID]
-    tensor([1, 2])
-    >>> sub_g.edges['follows'].data[dgl.EID]
-    tensor([1, 2])
-    Get the copied edge features.
-    >>> sub_g.edges['follows'].data['h']
-    tensor([[1.],
-            [2.]])
-    >>> sub_g.edges['follows'].data['h'] += 1
-    >>> g.edges['follows'].data['h']          # Features are not shared.
-    tensor([[0.],
-            [1.],
-            [2.]])
    See also
    --------
    in_subgraph
@@ -430,22 +476,23 @@ def out_subgraph(g, nodes):
    induced_edges = sgi.induced_edges
    return _create_hetero_subgraph(g, sgi, None, induced_edges)
-DGLHeteroGraph.out_subgraph = out_subgraph
+DGLHeteroGraph.out_subgraph = utils.alias_func(out_subgraph)
 def node_type_subgraph(graph, ntypes):
    """Return the subgraph induced on given node types.
-    The metagraph of the returned subgraph is the subgraph of the original
+    A node-type-induced subgraph contains all the nodes of the given subset of
-    metagraph induced from the node types.
+    the node types of a graph and any edges whose endpoints are both in this subset.
+    In addition to extracting the subgraph, DGL also copies the features of the
-    Features are shared with the original graph.
+    extracted nodes and edges to the resulting graph.
+    The copy is *lazy* and incurs data movement only when needed.
    Parameters
    ----------
    graph : DGLGraph
        The graph to extract subgraphs from.
    ntypes : list[str]
-        The node types
+        The type names of the nodes in the subgraph.
    Returns
    -------
@@ -456,6 +503,9 @@ def node_type_subgraph(graph, ntypes):
    --------
    The following example uses PyTorch backend.
+    >>> import dgl
+    >>> import torch
    Instantiate a heterograph.
    >>> g = dgl.heterograph({
@@ -473,17 +523,12 @@ def node_type_subgraph(graph, ntypes):
          ndata_schemes={'h': Scheme(shape=(1,), dtype=torch.float32)}
          edata_schemes={})
-    Get the shared node features.
+    Get the extracted node features.
    >>> sub_g.nodes['user'].data['h']
    tensor([[0.],
            [1.],
            [2.]])
-    >>> sub_g.nodes['user'].data['h'] += 1
-    >>> g.nodes['user'].data['h']          # Features are shared.
-    tensor([[1.],
-            [2.],
-            [3.]])
    See Also
    --------
@@ -498,22 +543,28 @@ def node_type_subgraph(graph, ntypes):
            etypes.append(graph.canonical_etypes[etid])
    return edge_type_subgraph(graph, etypes)
-DGLHeteroGraph.node_type_subgraph = node_type_subgraph
+DGLHeteroGraph.node_type_subgraph = utils.alias_func(node_type_subgraph)
 def edge_type_subgraph(graph, etypes):
    """Return the subgraph induced on given edge types.
-    The metagraph of the returned subgraph is the subgraph of the original metagraph
+    An edge-type-induced subgraph contains all the edges of the given subset of
-    induced from the edge types.
+    the edge types of a graph and the nodes incident by those edges.
+    In addition to extracting the subgraph, DGL also copies the features of the
-    Features are shared with the original graph.
+    extracted nodes and edges to the resulting graph.
+    The copy is *lazy* and incurs data movement only when needed.
    Parameters
    ----------
    graph : DGLGraph
        The graph to extract subgraphs from.
-    etypes : list[str or tuple]
+    etypes : list[str] or list[(str, str, str)]
-        The edge types
+        The type names of the edges in the subgraph. The allowed type name
+        formats are:
+        * ``(str, str, str)`` for source node type, edge type and destination node type.
+        * or one ``str`` for the edge type name  if the name can uniquely identify a
+          triplet format in the graph.
    Returns
    -------
@@ -524,6 +575,9 @@ def edge_type_subgraph(graph, etypes):
    --------
    The following example uses PyTorch backend.
+    >>> import dgl
+    >>> import torch
    Instantiate a heterograph.
    >>> g = dgl.heterograph({
@@ -536,7 +590,7 @@ def edge_type_subgraph(graph, etypes):
    Get subgraphs.
    >>> sub_g = g.edge_type_subgraph(['follows'])
-    >>> print(sub_g)
+    >>> sub_g
    Graph(num_nodes=3, num_edges=3,
          ndata_schemes={}
          edata_schemes={'h': Scheme(shape=(1,), dtype=torch.float32)})
@@ -547,11 +601,6 @@ def edge_type_subgraph(graph, etypes):
    tensor([[0.],
            [1.],
            [2.]])
-    >>> sub_g.edges['follows'].data['h'] += 1
-    >>> g.edges['follows'].data['h']          # Features are shared.
-    tensor([[1.],
-            [2.],
-            [3.]])
    See Also
    --------
@@ -579,7 +628,7 @@ def edge_type_subgraph(graph, etypes):
    hg = DGLHeteroGraph(hgidx, induced_ntypes, induced_etypes, node_frames, edge_frames)
    return hg
-DGLHeteroGraph.edge_type_subgraph = edge_type_subgraph
+DGLHeteroGraph.edge_type_subgraph = utils.alias_func(edge_type_subgraph)
 #################### Internal functions ####################

--- a/python/dgl/transform.py
+++ b/python/dgl/transform.py
@@ -59,30 +59,29 @@ def pairwise_squared_distance(x):
 #pylint: disable=invalid-name
 def knn_graph(x, k):
-    """Convert a tensor into k-nearest-neighbor (KNN) graph(s) according
+    """Construct a graph from a set of points according to k-nearest-neighbor (KNN)
-    to Euclidean distance.
+    and return.
    The function transforms the coordinates/features of a point set
-    into a directed homogeneous graph.  The coordinates of the point
+    into a directed homogeneous graph. The coordinates of the point
    set is specified as a matrix whose rows correspond to points and
    columns correspond to coordinate/feature dimensions.
-    The nodes of the returned graph correspond to the points.  An edge
+    The nodes of the returned graph correspond to the points, where the predecessors
-    exists if the source node is one of the k-nearest neighbors of the
+    of each point are its k-nearest neighbors measured by the Euclidean distance.
-    destination node.
-    If you give a 3D tensor, then each submatrix will be transformed
+    If :attr:`x` is a 3D tensor, then each submatrix will be transformed
-    into a separate graph.  DGL then composes the graphs into a large
+    into a separate graph. DGL then composes the graphs into a large
    graph of multiple connected components.
    Parameters
    ----------
-    x : 2D or 3D Tensor
+    x : Tensor
-        The input tensor.  It can be either on CPU or GPU.
+        The point coordinates. It can be either on CPU or GPU.
-        * If 2D, ``x[i]`` corresponds to the i-th node in the KNN graph.
+        * If is 2D, ``x[i]`` corresponds to the i-th node in the KNN graph.
-        * If 3D, ``x[i]`` corresponds to the i-th KNN graph and
+        * If is 3D, ``x[i]`` corresponds to the i-th KNN graph and
          ``x[i][j]`` corresponds to the j-th node in the i-th KNN graph.
    k : int
        The number of nearest neighbors per node.
@@ -90,7 +89,7 @@ def knn_graph(x, k):
    Returns
    -------
    DGLGraph
-        The graph. The node IDs are in the same order as :attr:`x`.
+        The constructred graph. The node IDs are in the same order as :attr:`x`.
        The returned graph is on CPU, regardless of the context of input :attr:`x`.
@@ -152,22 +151,25 @@ def knn_graph(x, k):
 #pylint: disable=invalid-name
 def segmented_knn_graph(x, k, segs):
-    """Convert a tensor into multiple k-nearest-neighbor (KNN) graph(s)
+    """Construct multiple graphs from multiple sets of points according to
-    with different number of nodes.
+    k-nearest-neighbor (KNN) and return.
-    Each chunk of :attr:`x` contains coordinates/features of a point set.
+    Compared with :func:`dgl.knn_graph`, this allows multiple point sets with
+    different capacity. The points from different sets are stored contiguously
+    in the :attr:`x` tensor.
    :attr:`segs` specifies the number of points in each point set. The
    function constructs a KNN graph for each point set, where the predecessors
-    of each point are its k-nearest neighbors. DGL then composes all KNN graphs
+    of each point are its k-nearest neighbors measured by the Euclidean distance.
+    DGL then composes all KNN graphs
    into a graph with multiple connected components.
    Parameters
    ----------
-    x : 2D Tensor
+    x : Tensor
-        Coordinates/features of points.  It can be either on CPU or GPU.
+        Coordinates/features of points. Must be 2D. It can be either on CPU or GPU.
    k : int
        The number of nearest neighbors per node.
-    segs : list of int
+    segs : list[int]
        Number of points in each point set. The numbers in :attr:`segs`
        must sum up to the number of rows in :attr:`x`.
@@ -222,37 +224,28 @@ def segmented_knn_graph(x, k, segs):
    return convert.from_scipy(adj)
-def to_bidirected(g, readonly=None, copy_ndata=False):
+def to_bidirected(g, copy_ndata=False, readonly=None):
-    r"""Convert the graph to a bidirectional simple graph, adding reverse edges and
+    r"""Convert the graph to a bi-directional simple graph and return.
-    removing parallel edges.
-    The function generates a new graph with no edge features.  In the new graph,
-    a single edge ``(u, v)`` exists if and only if there exists an edge connecting ``u``
-    to ``v`` or an edge connecting ``v`` to ``u`` in the original graph.
-    For a heterogeneous graph with multiple edge types, DGL treats edges corresponding
+    For an input graph :math:`G`, return a new graph :math:`G'` such that an edge
-    to each type as a separate graph and convert the graph to a bidirected one
+    :math:`(u, v)\in G'` if and only if there exists an edge :math:`(u, v)\in G` or
-    for each of them.
+    an edge :math:`(v, u)\in G`. The resulting graph :math:`G'` is a simple graph,
+    meaning there is no parallel edge.
-    Since :func:`to_bidirected` **is not well defined for unidirectional
+    The operation only works for edges whose two endpoints belong to the same node type.
-    bipartite graphs**, DGL will raise an error if an edge type whose source node type is
+    DGL will raise error if the input graph is heterogeneous and contains edges
-    different from the destination node type exists.
+    with different types of endpoints.
    Parameters
    ----------
    g : DGLGraph
        The input graph.
-    readonly : bool
-        Deprecated. There will be no difference between readonly and non-readonly
-        (Default: True)
    copy_ndata: bool, optional
        If True, the node features of the bidirected graph are copied from the
-        original graph.
+        original graph. If False, the bidirected graph will not have any node features.
-        If False, the bidirected graph will not have any node features.
        (Default: False)
+    readonly : bool
+        **DEPRECATED**.
    Returns
    -------
@@ -261,10 +254,9 @@ def to_bidirected(g, readonly=None, copy_ndata=False):
    Notes
    -----
-    If :attr:`copy_ndata` is True, same tensors will be used for
+    If :attr:`copy_ndata` is True, the resulting graph will share the node feature
-    the features of the original graph and the returned graph to save memory cost.
+    tensors with the input graph. Hence, users should try to avoid in-place operations
-    As a result, users should avoid performing in-place operations on the features of
+    which will be visible to both graphs.
-    the returned graph, which will corrupt the features of the original graph as well.
    Examples
    --------
@@ -314,24 +306,21 @@ def to_bidirected(g, readonly=None, copy_ndata=False):
 def add_reverse_edges(g, readonly=None, copy_ndata=True,
                      copy_edata=False, ignore_bipartite=False):
-    r"""Add reverse edges to a graph.
+    r"""Add an reversed edge for each edge in the input graph and return a new graph.
    For a graph with edges :math:`(i_1, j_1), \cdots, (i_n, j_n)`, this
    function creates a new graph with edges
    :math:`(i_1, j_1), \cdots, (i_n, j_n), (j_1, i_1), \cdots, (j_n, i_n)`.
-    For a heterogeneous graph with multiple edge types, DGL treats the edges corresponding
+    The operation only works for edges whose two endpoints belong to the same node type.
-    to each type as a separate graph and add reverse edges for each of them.
+    DGL will raise error if the input graph is heterogeneous and contains edges
+    with different types of endpoints. If :attr:`ignore_bipartite` is true, DGL will
-    Since :func:`add_reverse_edges` **is not well defined for unidirectional bipartite graphs**,
+    ignore those edges instead.
-    an error will be raised if an edge type of the input heterogeneous graph is for a
-    unidirectional bipartite graph.  DGL simply skips the edge types corresponding
-    to unidirectional bipartite graphs by specifying ``ignore_bipartite=True``.
    Parameters
    ----------
    g : DGLGraph
-        The input graph.  Can be on either CPU or GPU.
+        The input graph.
    readonly : bool, default to be True
        Deprecated. There will be no difference between readonly and non-readonly
    copy_ndata: bool, optional
@@ -360,13 +349,10 @@ def add_reverse_edges(g, readonly=None, copy_ndata=True,
    Notes
    -----
-    If :attr:`copy_ndata` is True, same tensors are used as
+    If :attr:`copy_ndata` is True, the resulting graph will share the node feature
-    the node features of the original graph and the new graph.
+    tensors with the input graph. Hence, users should try to avoid in-place operations
-    As a result, users should avoid performing in-place operations
+    which will be visible to both graphs. On the contrary, the two graphs do not share
-    on the node features of the new graph to avoid feature corruption.
+    the same edge feature storage.
-    On the contrary, edge features are concatenated,
-    and they are not shared due to concatenation.
    Examples
    --------
@@ -377,7 +363,7 @@ def add_reverse_edges(g, readonly=None, copy_ndata=True,
    >>> bg1.edges()
    (tensor([0, 0, 0, 1]), tensor([0, 1, 0, 0]))
-    **Heterogeneous graphs with Multiple Edge Types**
+    **Heterogeneous graphs**
    >>> g = dgl.heterograph({
    >>>     ('user', 'wins', 'user'): (th.tensor([0, 2, 0, 2, 2]), th.tensor([1, 1, 2, 1, 0])),
@@ -489,12 +475,11 @@ def line_graph(g, backtracking=True, shared=False):
    Notes
    -----
-    If :attr:`shared` is True, same tensors will be used for
+    * If :attr:`shared` is True, the node features of the resulting graph share the same
-    the features of the original graph and the returned graph to save memory cost.
+      storage with the edge features of the input graph. Hence, users should try to
-    As a result, users should avoid performing in-place operations on the features of
+      avoid in-place operations which will be visible to both graphs.
-    the returned graph, which will corrupt the features of the original graph as well.
-    The implementation is done on CPU, even if the input and output graphs are on GPU.
+    * The function supports input graph on GPU but copies it to CPU during computation.
    Examples
    --------
@@ -532,15 +517,13 @@ def line_graph(g, backtracking=True, shared=False):
    return lg
-DGLHeteroGraph.line_graph = line_graph
+DGLHeteroGraph.line_graph = utils.alias_func(line_graph)
 def khop_adj(g, k):
    """Return the matrix of :math:`A^k` where :math:`A` is the adjacency matrix of the graph
-    :math:`g`, where rows represent source nodes and columns represent destination nodes.
+    :math:`g`.
-    The returned matrix is a 32-bit float dense matrix on CPU.
-    The graph must be homogeneous.
+    The returned matrix is a 32-bit float dense matrix on CPU. The graph must be homogeneous.
    Parameters
    ----------
@@ -551,7 +534,7 @@ def khop_adj(g, k):
    Returns
    -------
-    tensor
+    Tensor
        The returned tensor.
    Examples
@@ -607,10 +590,9 @@ def khop_graph(g, k, copy_ndata=True):
    Notes
    -----
-    If :attr:`copy_ndata` is True, same tensors will be used for
+    If :attr:`copy_ndata` is True, the resulting graph will share the node feature
-    the features of the original graph and the returned graph to save memory cost.
+    tensors with the input graph. Hence, users should try to avoid in-place operations
-    As a result, users should avoid performing in-place operations on the features of
+    which will be visible to both graphs.
-    the returned graph, which will corrupt the features of the original graph as well.
    Examples
    --------
@@ -656,19 +638,11 @@ def khop_graph(g, k, copy_ndata=True):
    return new_g
 def reverse(g, copy_ndata=True, copy_edata=False, *, share_ndata=None, share_edata=None):
-    r"""Return the reverse of a graph.
+    r"""Return a new graph with every edges being the reverse ones in the input graph.
    The reverse (also called converse, transpose) of a graph with edges
-    :math:`(i_1, j_1), (i_2, j_2), \cdots` is a new graph with edges
+    :math:`(i_1, j_1), (i_2, j_2), \cdots` of type ``(U, E, V)`` is a new graph with edges
-    :math:`(j_1, i_1), (j_2, i_2), \cdots`.
+    :math:`(j_1, i_1), (j_2, i_2), \cdots` of type ``(V, E, U)``.
-    For a heterogeneous graph with multiple edge types, DGL treats the edges corresponding
-    to each type as a separate graph and compute the reverse for each of them.
-    If the original edge type is ``(A, B, C)``, its reverse will have edge type
-    ``(C, B, A)``.
-    Given a :class:`DGLGraph` object, DGL returns another :class:`DGLGraph`
-    object representing its reverse.
    Parameters
    ----------
@@ -676,17 +650,11 @@ def reverse(g, copy_ndata=True, copy_edata=False, *, share_ndata=None, share_eda
        The input graph.
    copy_ndata: bool, optional
        If True, the node features of the reversed graph are copied from the
-        original graph.
+        original graph. If False, the reversed graph will not have any node features.
-        If False, the reversed graph will not have any node features.
        (Default: True)
    copy_edata: bool, optional
        If True, the edge features of the reversed graph are copied from the
-        original graph.
+        original graph. If False, the reversed graph will not have any edge features.
-        If False, the reversed graph will not have any edge features.
        (Default: False)
    Return
@@ -696,14 +664,14 @@ def reverse(g, copy_ndata=True, copy_edata=False, *, share_ndata=None, share_eda
    Notes
    -----
-    If :attr:`copy_ndata` or :attr:`copy_edata` is True, same tensors will be used for
+    If :attr:`copy_ndata` or :attr:`copy_edata` is True,
-    the features of the original graph and the reversed graph to save memory cost.
+    the resulting graph will share the node or edge feature
-    As a result, users should avoid performing in-place operations on the features of
+    tensors with the input graph. Hence, users should try to avoid in-place operations
-    the reversed graph, which will corrupt the features of the original graph as well.
+    which will be visible to both graphs.
    Examples
    --------
-    **Homogeneous graphs or Heterogeneous graphs with A Single Edge Type**
+    **Homogeneous graphs**
    Create a graph to reverse.
@@ -731,24 +699,7 @@ def reverse(g, copy_ndata=True, copy_edata=False, *, share_ndata=None, share_eda
            [4.],
            [5.]])
-    **In-place operations on features of one graph will be reflected on features of
+    **Heterogenenous graphs**
-    its reverse, which is dangerous. Out-place operations will not be reflected.**
-    >>> rg.ndata['h'] += 1
-    >>> g.ndata['h']
-    tensor([[1.],
-            [2.],
-            [3.]])
-    >>> g.ndata['h'] += 1
-    >>> rg.ndata['h']
-    tensor([[2.],
-            [3.],
-            [4.]])
-    >>> rg.ndata['h2'] = th.ones(3, 1)
-    >>> 'h2' in g.ndata
-    False
-    **Heterogenenous graphs with Multiple Edge Types**
    >>> g = dgl.heterograph({
    ...     ('user', 'follows', 'user'): (th.tensor([0, 2]), th.tensor([1, 2])),
@@ -758,7 +709,7 @@ def reverse(g, copy_ndata=True, copy_edata=False, *, share_ndata=None, share_eda
    >>> g.edges['plays'].data['he'] = th.zeros(3, 1)
    The resulting graph will have edge types
-    ``('user', 'follows', 'user)`` and ``('user', 'plays', 'game')``.
+    ``('user', 'follows', 'user)`` and ``('game', 'plays', 'user')``.
    >>> rg = dgl.reverse(g, copy_ndata=True)
    >>> rg
@@ -804,7 +755,7 @@ def reverse(g, copy_ndata=True, copy_edata=False, *, share_ndata=None, share_eda
    return new_g
-DGLHeteroGraph.reverse = reverse
+DGLHeteroGraph.reverse = utils.alias_func(reverse)
 def to_simple_graph(g):
    """Convert the graph to a simple graph with no multi-edge.
@@ -874,14 +825,15 @@ def to_bidirected_stale(g, readonly=True):
 def laplacian_lambda_max(g):
    """Return the largest eigenvalue of the normalized symmetric Laplacian of a graph.
    If the graph is batched from multiple graphs, return the list of the largest eigenvalue
    for each graph instead.
    Parameters
    ----------
    g : DGLGraph
-        The input graph, it should be an undirected graph.  It must be homogeneous.
+        The input graph, it must be a bi-directed homogeneous graph, i.e., every edge
+        should have an accompanied reverse edge in the graph.
        The graph can be batched from multiple graphs.
    Returns
@@ -938,7 +890,7 @@ def metapath_reachable_graph(g, metapath):
    Returns
    -------
    DGLGraph
-        A homogeneous or unidirectional bipartite graph.  It will be on CPU regardless of
+        A homogeneous or unidirectional bipartite graph. It will be on CPU regardless of
        whether the input graph is on CPU or GPU.
    Examples
@@ -970,21 +922,20 @@ def metapath_reachable_graph(g, metapath):
    return new_g
 def add_nodes(g, num, data=None, ntype=None):
-    r"""Append new nodes of the given node type.
+    r"""Add the given number of nodes to the graph and return a new graph.
-    The new nodes will have IDs starting from ``g.number_of_nodes(ntype)``.
+    The new nodes will have IDs starting from ``g.num_nodes(ntype)``.
-    A new graph with newly added nodes is returned.
    Parameters
    ----------
    num : int
-        Number of nodes to add.
+        The number of nodes to add.
-    data : dict, optional
+    data : dict[str, Tensor], optional
-        Feature data of the added nodes.
+        Feature data of the added nodes. The keys are feature names
+        while the values are feature data.
    ntype : str, optional
-        The type of the new nodes. Can be omitted if there is
+        The node type name. Can be omitted if there is
-        only one node type in the graph.
+        only one type of nodes in the graph.
    Return
    ------
@@ -993,11 +944,10 @@ def add_nodes(g, num, data=None, ntype=None):
    Notes
    -----
-    * If the key of :attr:`data` does not contain some existing feature fields,
+    * For features in :attr:`g` but not in :attr:`data`,
-    those features for the new nodes will be filled with zeros).
+      DGL assigns zero features for the newly added nodes.
+    * For feature in :attr:`data` but not in :attr:`g`, DGL assigns zero features
-    * If the key of :attr:`data` contains new feature fields, those features for
+      for the existing nodes in the graph.
-    the old nodes will be filled zeros).
    Examples
    --------
@@ -1007,7 +957,7 @@ def add_nodes(g, num, data=None, ntype=None):
    >>> import dgl
    >>> import torch
-    **Homogeneous Graphs or Heterogeneous Graphs with A Single Node Type**
+    **Homogeneous Graphs**
    >>> g = dgl.graph((torch.tensor([0, 1]), torch.tensor([1, 2])))
    >>> g.num_nodes()
@@ -1017,26 +967,26 @@ def add_nodes(g, num, data=None, ntype=None):
    5
    If the graph has some node features and new nodes are added without
-    features, their features will be created with zeros.
+    features, their features will be filled with zeros.
    >>> g.ndata['h'] = torch.ones(5, 1)
    >>> g = dgl.add_nodes(g, 1)
    >>> g.ndata['h']
    tensor([[1.], [1.], [1.], [1.], [1.], [0.]])
-    You can also assign features for the new nodes in adding new nodes.
+    Assign features for the new nodes.
    >>> g = dgl.add_nodes(g, 1, {'h': torch.ones(1, 1), 'w': torch.ones(1, 1)})
    >>> g.ndata['h']
    tensor([[1.], [1.], [1.], [1.], [1.], [0.], [1.]])
-    Since :attr:`data` contains new feature fields, the features for old nodes
+    Since :attr:`data` contains new feature fields, the features for existing nodes
-    will be created with zeros.
+    will be filled with zeros.
    >>> g.ndata['w']
    tensor([[0.], [0.], [0.], [0.], [0.], [0.], [1.]])
-    **Heterogeneous Graphs with Multiple Node Types**
+    **Heterogeneous Graphs**
    >>> g = dgl.heterograph({
    ...     ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]),
@@ -1061,25 +1011,28 @@ def add_nodes(g, num, data=None, ntype=None):
    return g
 def add_edges(g, u, v, data=None, etype=None):
-    r"""Append multiple new edges for the specified edge type.
+    r"""Add the edges to the graph and return a new graph.
-    A new graph with newly added edges is returned.
    The i-th new edge will be from ``u[i]`` to ``v[i]``.  The IDs of the new
-    edges will start from ``g.number_of_edges(etype)``.
+    edges will start from ``g.num_edges(etype)``.
    Parameters
    ----------
-    u : int, tensor, numpy.ndarray, list
+    u : int, Tensor or iterable[int]
        Source node IDs, ``u[i]`` gives the source node for the i-th new edge.
-    v : int, tensor, numpy.ndarray, list
+    v : int, Tensor or iterable[int]
        Destination node IDs, ``v[i]`` gives the destination node for the i-th new edge.
-    data : dict, optional
+    data : dict[str, Tensor], optional
-        Feature data of the added edges. The i-th row of the feature data
+        Feature data of the added edges. The keys are feature names
-        corresponds to the i-th new edge.
+        while the values are feature data.
-    etype : str or tuple of str, optional
+    etype : str or (str, str, str), optional
-        The type of the new edges. Can be omitted if there is
+        The type names of the edges. The allowed type name formats are:
-        only one edge type in the graph.
+        * ``(str, str, str)`` for source node type, edge type and destination node type.
+        * or one ``str`` edge type name if the name can uniquely identify a
+          triplet format in the graph.
+        Can be omitted if the graph has only one type of edges.
    Return
    ------
@@ -1088,15 +1041,13 @@ def add_edges(g, u, v, data=None, etype=None):
    Notes
    -----
-    * If end nodes of adding edges does not exists, add_nodes is invoked
+    * If the end nodes of the given edges do not exist in :attr:`g`,
-    to add new nodes. The node features of the new nodes will be created
+      :func:`dgl.add_nodes` is invoked to add those nodes.
-    with zeros.
+      The node features of the new nodes will be filled with zeros.
+    * For features in :attr:`g` but not in :attr:`data`,
-    * If the key of :attr:`data` does not contain some existing feature fields,
+      DGL assigns zero features for the newly added nodes.
-    those features for the new edges will be created with zeros.
+    * For feature in :attr:`data` but not in :attr:`g`, DGL assigns zero features
+      for the existing nodes in the graph.
-    * If the key of :attr:`data` contains new feature fields, those features for
-    the old edges will be created with zeros.
    Examples
    --------
@@ -1105,7 +1056,7 @@ def add_edges(g, u, v, data=None, etype=None):
    >>> import dgl
    >>> import torch
-    **Homogeneous Graphs or Heterogeneous Graphs with A Single Edge Type**
+    **Homogeneous Graphs**
    >>> g = dgl.graph((torch.tensor([0, 1]), torch.tensor([1, 2])))
    >>> g.num_edges()
@@ -1121,7 +1072,7 @@ def add_edges(g, u, v, data=None, etype=None):
    4
    If the graph has some edge features and new edges are added without
-    features, their features will be created with zeros.
+    features, their features will be filled with zeros.
    >>> g.edata['h'] = torch.ones(4, 1)
    >>> g = dgl.add_edges(g, torch.tensor([1]), torch.tensor([1]))
@@ -1136,12 +1087,12 @@ def add_edges(g, u, v, data=None, etype=None):
    tensor([[1.], [1.], [1.], [1.], [0.], [1.], [2.]])
    Since :attr:`data` contains new feature fields, the features for old edges
-    will be created with zeros.
+    will be filled with zeros.
    >>> g.edata['w']
    tensor([[0.], [0.], [0.], [0.], [0.], [1.], [1.]])
-    **Heterogeneous Graphs with Multiple Edge Types**
+    **Heterogeneous Graphs**
    >>> g = dgl.heterograph({
    ...     ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]),
@@ -1166,22 +1117,24 @@ def add_edges(g, u, v, data=None, etype=None):
    return g
 def remove_edges(g, eids, etype=None):
-    r"""Remove multiple edges with the specified edge type.
+    r"""Remove the specified edges and return a new graph.
-    A new graph with certain edges deleted is returned.
-    Nodes will not be removed. After removing edges, the rest
-    edges will be re-indexed using consecutive integers from 0,
-    with their relative order preserved.
-    The features for the removed edges will be removed accordingly.
+    Also delete the features of the edges. The edges must exist in the graph.
+    The resulting graph has the same number of the nodes as the input one,
+    even if some nodes become isolated after the the edge removal.
    Parameters
    ----------
-    eids : int, tensor, numpy.ndarray, list
+    eids : int, Tensor, iterable[int]
-        IDs for the edges to remove.
+        The IDs of the edges to remove.
-    etype : str or tuple of str, optional
+    etype : str or (str, str, str), optional
-        The type of the edges to remove. Can be omitted if there is
+        The type names of the edges. The allowed type name formats are:
-        only one edge type in the graph.
+        * ``(str, str, str)`` for source node type, edge type and destination node type.
+        * or one ``str`` edge type name if the name can uniquely identify a
+          triplet format in the graph.
+        Can be omitted if the graph has only one type of edges.
    Return
    ------
@@ -1193,7 +1146,7 @@ def remove_edges(g, eids, etype=None):
    >>> import dgl
    >>> import torch
-    **Homogeneous Graphs or Heterogeneous Graphs with A Single Edge Type**
+    **Homogeneous Graphs**
    >>> g = dgl.graph((torch.tensor([0, 0, 2]), torch.tensor([0, 1, 2])))
    >>> g.edata['he'] = torch.arange(3).float().reshape(-1, 1)
@@ -1207,7 +1160,7 @@ def remove_edges(g, eids, etype=None):
    >>> g.edata['he']
    tensor([[2.]])
-    **Heterogeneous Graphs with Multiple Edge Types**
+    **Heterogeneous Graphs**
    >>> g = dgl.heterograph({
    ...     ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]),
@@ -1231,19 +1184,16 @@ def remove_edges(g, eids, etype=None):
 def remove_nodes(g, nids, ntype=None):
-    r"""Remove multiple nodes with the specified node type.
+    r"""Remove the specified nodes and return a new graph.
-    A new graph with certain nodes deleted is returned.
-    Edges that connect to the nodes will be removed as well. After removing
-    nodes and edges, the rest nodes and edges will be re-indexed using
-    consecutive integers from 0, with their relative order preserved.
-    The features for the removed nodes/edges will be removed accordingly.
+    Also delete the features. Edges that connect from/to the nodes will be
+    removed as well. After the removal, DGL re-labels the remaining nodes and edges
+    with IDs from 0.
    Parameters
    ----------
-    nids : int, tensor, numpy.ndarray, list
+    nids : int, Tensor, iterable[int]
-        Nodes to remove.
+        The nodes to be removed.
    ntype : str, optional
        The type of the nodes to remove. Can be omitted if there is
        only one node type in the graph.
@@ -1259,7 +1209,7 @@ def remove_nodes(g, nids, ntype=None):
    >>> import dgl
    >>> import torch
-    **Homogeneous Graphs or Heterogeneous Graphs with A Single Node Type**
+    **Homogeneous Graphs**
    >>> g = dgl.graph((torch.tensor([0, 0, 2]), torch.tensor([0, 1, 2])))
    >>> g.ndata['hv'] = torch.arange(3).float().reshape(-1, 1)
@@ -1274,7 +1224,7 @@ def remove_nodes(g, nids, ntype=None):
    >>> g.edata['he']
    tensor([[2.]])
-    **Heterogeneous Graphs with Multiple Node Types**
+    **Heterogeneous Graphs**
    >>> g = dgl.heterograph({
    ...     ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]),
@@ -1301,42 +1251,41 @@ def remove_nodes(g, nids, ntype=None):
    return g
 def add_self_loop(g, etype=None):
-    r"""Add self-loop for each node in the graph for the given edge type.
+    r"""Add self-loops for each node in the graph and return a new graph.
-    A new graph with self-loop is returned.
-    If the graph is heterogeneous, the given edge type must have its source
-    node type the same as its destination node type.
    Parameters
    ----------
    g : DGLGraph
        The graph.
-    etype : str or tuple of str, optional
+    etype : str or (str, str, str), optional
-        The type of the edges to remove. Can be omitted if there is
+        The type names of the edges. The allowed type name formats are:
-        only one edge type in the graph.
+        * ``(str, str, str)`` for source node type, edge type and destination node type.
+        * or one ``str`` edge type name if the name can uniquely identify a
+          triplet format in the graph.
-        Its source node type must be the same as its destination node type.
+        Can be omitted if the graph has only one type of edges.
    Return
    ------
    DGLGraph
-        The graph with self-loop.
+        The graph with self-loops.
    Notes
    -----
-    * :func:`add_self_loop` adds self loops regardless of whether the self-loop already exists.
+    * The function only supports homogeneous graphs or heterogeneous graphs but
+      the relation graph specified by the :attr:`etype` argument is homogeneous.
-      If you would like to have exactly one self-loop for every node, you would need to
+    * The function adds self-loops regardless of whether they already exist or not.
+      If one wishes to have exactly one self-loop for every node,
      call :func:`remove_self_loop` before invoking :func:`add_self_loop`.
+    * Features of the new edges (self-loop edges) will be filled with zeros.
-    * Features for the new edges (self-loop edges) will be created with zeros.
    Examples
    --------
    >>> import dgl
    >>> import torch
-    **Homogeneous Graphs or Heterogeneous Graphs with A Single Node Type**
+    **Homogeneous Graphs**
    >>> g = dgl.graph((torch.tensor([0, 0, 2]), torch.tensor([2, 1, 0])))
    >>> g.ndata['hv'] = torch.arange(3).float().reshape(-1, 1)
@@ -1354,7 +1303,7 @@ def add_self_loop(g, etype=None):
            [0.],
            [0.]])
-    **Heterogeneous Graphs with Multiple Node Types**
+    **Heterogeneous Graphs**
    >>> g = dgl.heterograph({
    ...     ('user', 'follows', 'user'): (torch.tensor([1, 2]),
@@ -1377,20 +1326,28 @@ def add_self_loop(g, etype=None):
    new_g = add_edges(g, nodes, nodes, etype=etype)
    return new_g
-DGLHeteroGraph.add_self_loop = add_self_loop
+DGLHeteroGraph.add_self_loop = utils.alias_func(add_self_loop)
 def remove_self_loop(g, etype=None):
-    r""" Remove self loops for each node in the graph.
+    r""" Remove self-loops for each node in the graph and return a new graph.
-    A new graph with self-loop removed is returned.
-    If there are multiple self loops for a certain node,
-    all of them will be removed.
    Parameters
    ----------
-    etype : str or tuple of str, optional
+    g : DGLGraph
-        The type of the edges to remove. Can be omitted if there is
+        The graph.
-        only one edge type in the graph.
+    etype : str or (str, str, str), optional
+        The type names of the edges. The allowed type name formats are:
+        * ``(str, str, str)`` for source node type, edge type and destination node type.
+        * or one ``str`` edge type name if the name can uniquely identify a
+          triplet format in the graph.
+        Can be omitted if the graph has only one type of edges.
+    Notes
+    -----
+    If a node has multiple self-loops, remove them all. Do nothing for nodes without
+    self-loops.
    Examples
    ---------
@@ -1398,7 +1355,7 @@ def remove_self_loop(g, etype=None):
    >>> import dgl
    >>> import torch
-    **Homogeneous Graphs or Heterogeneous Graphs with A Single Node Type**
+    **Homogeneous Graphs**
    >>> g = dgl.graph((torch.tensor([0, 0, 0, 1]), torch.tensor([1, 0, 0, 2])))
    >>> g.edata['he'] = torch.arange(4).float().reshape(-1, 1)
@@ -1409,7 +1366,7 @@ def remove_self_loop(g, etype=None):
    >>> g.edata['he']
    tensor([[0.],[3.]])
-    **Heterogeneous Graphs with Multiple Node Types**
+    **Heterogeneous Graphs**
    >>> g = dgl.heterograph({
    ...     ('user', 'follows', 'user'): (torch.tensor([0, 1, 1, 1, 2]),
@@ -1442,7 +1399,7 @@ def remove_self_loop(g, etype=None):
    new_g = remove_edges(g, self_loop_eids, etype=etype)
    return new_g
-DGLHeteroGraph.remove_self_loop = remove_self_loop
+DGLHeteroGraph.remove_self_loop = utils.alias_func(remove_self_loop)
 def compact_graphs(graphs, always_preserve=None, copy_ndata=True, copy_edata=True):
    """Given a list of graphs with the same set of nodes, find and eliminate the common
@@ -1502,10 +1459,9 @@ def compact_graphs(graphs, always_preserve=None, copy_ndata=True, copy_edata=Tru
    This function currently requires that the same node type of all graphs should have
    the same node type ID, i.e. the node types are *ordered* the same.
-    If :attr:`copy_edata` is True, same tensors will be used for
+    If :attr:`copy_edata` is True, the resulting graph will share the edge feature
-    the features of the original graphs and the returned graphs to save memory cost.
+    tensors with the input graph. Hence, users should try to avoid in-place operations
-    As a result, users should avoid performing in-place operations on the edge features of
+    which will be visible to both graphs.
-    the returned graph, which will corrupt the edge features of the original graph as well.
    Examples
    --------
@@ -1666,6 +1622,13 @@ def to_block(g, dst_nodes=None, include_dst_in_src=True):
        If :attr:`dst_nodes` is specified but it is not a superset of all the nodes that
        have at least one inbound edge.
+    Notes
+    -----
+    :func:`to_block` is most commonly used in customizing neighborhood sampling
+    for stochastic training on a large graph.  Please refer to the user guide
+    :ref:`guide-minibatch` for a more thorough discussion about the methodology
+    of stochastic training.
    Examples
    --------
    Converting a homogeneous graph to a block as described above:
@@ -1727,13 +1690,6 @@ def to_block(g, dst_nodes=None, include_dst_in_src=True):
    >>> block.srcnodes['A'].data[dgl.NID]
    tensor([2, 1])
-    Notes
-    -----
-    :func:`to_block` is most commonly used in customizing neighborhood sampling
-    for stochastic training on a large graph.  Please refer to User Guide Chapter 6
-    for a more thorough discussion driven by the methodology of stochastic training on a
-    large graph.
    """
    assert g.device == F.cpu(), 'the graph must be on CPU'
@@ -1781,14 +1737,19 @@ def to_block(g, dst_nodes=None, include_dst_in_src=True):
    return new_graph
-def to_simple(g, return_counts='count', writeback_mapping=False, copy_ndata=True, copy_edata=False):
+def to_simple(g,
-    r"""Convert a graph to a simple graph, removing the parallel edges.
+              return_counts='count',
+              writeback_mapping=False,
+              copy_ndata=True,
+              copy_edata=False):
+    r"""Convert a graph to a simple graph without parallel edges and return.
-    For a heterogeneous graph with multiple edge types, DGL removes the parallel edges
+    For a heterogeneous graph with multiple edge types, DGL treats edges with the same
-    with the same edge type.
+    edge type and endpoints as parallel edges and removes them.
+    Optionally, one can get the the number of parallel edges by specifying the
-    Optionally, the number of parallel edges and/or the mapping from the edges in the simple graph
+    :attr:`return_counts` argument. To get the a mapping from the edge IDs in the
-    to the edges in the original graph is returned.
+    input graph to the edge IDs in the resulting graph, set :attr:`writeback_mapping`
+    to true.
    Parameters
    ----------
@@ -1801,10 +1762,10 @@ def to_simple(g, return_counts='count', writeback_mapping=False, copy_ndata=True
        (Default: "count")
    writeback_mapping: bool, optional
-        If True, a write-back mapping is returned for each edge
+        If True, return an extra write-back mapping for each edge
-        type subgraph.  The write-back mapping is a tensor recording
+        type.  The write-back mapping is a tensor recording
-        the mapping from the IDs of the edges in the new graph to
+        the mapping from the edge IDs in the input graph to
-        the IDs of the edges in the original graph.  If the graph is
+        the edge IDs in the result graph. If the graph is
        heterogeneous, DGL returns a dictionary of edge types and such
        tensors.
@@ -1833,21 +1794,17 @@ def to_simple(g, return_counts='count', writeback_mapping=False, copy_ndata=True
    DGLGraph
        The graph.
    tensor or dict of tensor
-        The writeback mapping.
+        The writeback mapping. Only when ``writeback_mapping`` is True.
-        Only returned if ``writeback_mapping`` is True.
    Notes
    -----
-    If ``copy_ndata`` is ``True``, same tensors will be used for
+    If :attr:`copy_ndata` is True, the resulting graph will share the node feature
-    the features of the original graph and the to_simpled graph. As a result, users
+    tensors with the input graph. Hence, users should try to avoid in-place operations
-    should avoid performing in-place operations on the features of the to_simpled
+    which will be visible to both graphs.
-    graph, which will corrupt the features of the original graph as well. For
-    concrete examples, refer to the ``Examples`` section below.
    Examples
    --------
-    **Homogeneous Graphs or Heterogeneous Graphs with A Single Edge Type**
+    **Homogeneous Graphs**
    Create a graph for demonstrating to_simple API.
    In the original graph, there are multiple edges between 1 and 2.
@@ -1881,24 +1838,7 @@ def to_simple(g, return_counts='count', writeback_mapping=False, copy_ndata=True
    >>> 'h' in g.edata
    False
-    **In-place operations on features of one graph will be reflected on features of
+    **Heterogeneous Graphs**
-    the simple graph, which is dangerous. Out-place operations will not be reflected.**
-    >>> sg.ndata['h'] += 1
-    >>> g.ndata['h']
-    tensor([[1.],
-            [2.],
-            [3.]])
-    >>> g.ndata['h'] += 1
-    >>> sg.ndata['h']
-    tensor([[2.],
-            [3.],
-            [4.]])
-    >>> sg.ndata['h2'] = th.ones(3, 1)
-    >>> 'h2' in g.ndata
-    False
-    **Heterogeneous Graphs with Multiple Edge Types**
    >>> g = dgl.heterograph({
    ...     ('user', 'wins', 'user'): (th.tensor([0, 2, 0, 2, 2]), th.tensor([1, 1, 2, 1, 0])),
@@ -1968,7 +1908,7 @@ def to_simple(g, return_counts='count', writeback_mapping=False, copy_ndata=True
    return simple_graph
-DGLHeteroGraph.to_simple = to_simple
+DGLHeteroGraph.to_simple = utils.alias_func(to_simple)
 def as_heterograph(g, ntype='_U', etype='_E'):  # pylint: disable=unused-argument
    """Convert a DGLGraph to a DGLHeteroGraph with one node and edge type.

--- a/python/dgl/utils/internal.py
+++ b/python/dgl/utils/internal.py
@@ -891,4 +891,12 @@ def set_num_threads(num_threads):
    """
    _CAPI_DGLSetOMPThreads(num_threads)
+def alias_func(func):
+    """Return an alias function with proper docstring."""
+    @wraps(func)
+    def _fn(*args, **kwargs):
+        return func(*args, **kwargs)
+    _fn.__doc__ = """Alias of :func:`dgl.{}`.""".format(func.__name__)
+    return _fn
 _init_api("dgl.utils.internal")