[Doc] Scan the API docs and make many changes (#2080)

* WIP: api * dgl.sampling, dgl.data * dgl.sampling; dgl.dataloading * sampling packages * convert * subgraph * deprecate * subgraph APIs * All docstrings for convert/subgraph/transform * almost all funcs under dgl namespace * WIP: DGLGraph * done graph query * message passing functions * lint * fix merge error * fix test * lint * fix Co-authored-by: Quan Gan <coin2028@hotmail.com>

[Doc] Scan the API docs and make many changes (#2080)
* WIP: api * dgl.sampling, dgl.data * dgl.sampling; dgl.dataloading * sampling packages * convert * subgraph * deprecate * subgraph APIs * All docstrings for convert/subgraph/transform * almost all funcs under dgl namespace * WIP: DGLGraph * done graph query * message passing functions * lint * fix merge error * fix test * lint * fix Co-authored-by: Quan Gan <coin2028@hotmail.com>
f13b9b62 · Minjie Wang · GitHub · 35e25914 · f13b9b62 · f13b9b62
Unverified Commit f13b9b62 authored Aug 20, 2020 by Minjie Wang Committed by GitHub Aug 20, 2020
11 changed files
--- a/python/dgl/data/graph_serialize.py
+++ b/python/dgl/data/graph_serialize.py
 """For Graph Serialization"""
 from __future__ import absolute_import
 import os
-from ..base import dgl_warning
+from ..base import dgl_warning, DGLError
 from ..heterograph import DGLHeteroGraph
 from .._ffi.object import ObjectBase, register_object
 from .._ffi.function import _init_api
@@ -66,16 +66,23 @@ class GraphData(ObjectBase):


 def save_graphs(filename, g_list, labels=None):
-    r"""
-    Save DGLGraphs and graph labels to file
+    r"""Save graphs and optionally their labels to file.
+
+    Besides saving to local files, DGL supports writing the graphs directly
+    to S3 (by providing a ``"s3://..."`` path) or to HDFS (by providing
+    ``"hdfs://..."`` a path).
+
+    The function saves both the graph structure and node/edge features to file
+    in DGL's own binary format. For graph-level features, pass them via
+    the :attr:`labels` argument.

    Parameters
    ----------
    filename : str
-        File name to store graphs.
+        The file name to store the graphs and labels.
    g_list: list
-        DGLGraph or list of DGLGraph/DGLHeteroGraph
-    labels: dict[str, tensor]
+        The graphs to be saved.
+    labels: dict[str, Tensor]
        labels should be dict of tensors, with str as keys

    Examples
@@ -83,7 +90,7 @@ def save_graphs(filename, g_list, labels=None):
    >>> import dgl
    >>> import torch as th

-    Create :code:`DGLGraph`/:code:`DGLHeteroGraph` objects and initialize node
+    Create :class:`DGLGraph` objects and initialize node
    and edge features.

    >>> g1 = dgl.graph(([0, 1, 2], [1, 2, 3]))
@@ -96,55 +103,66 @@ def save_graphs(filename, g_list, labels=None):
    >>> graph_labels = {"glabel": th.tensor([0, 1])}
    >>> save_graphs("./data.bin", [g1, g2], graph_labels)

+    See Also
+    --------
+    load_graphs
    """
    # if it is local file, do some sanity check
    if filename.startswith('s3://') is False:
-        assert not os.path.isdir(filename), "filename {} is an existing directory.".format(filename)
+        if os.path.isdir(filename):
+            raise DGLError("Filename {} is an existing directory.".format(filename))
        f_path, _ = os.path.split(filename)
        if not os.path.exists(f_path):
            os.makedirs(f_path)

    g_sample = g_list[0] if isinstance(g_list, list) else g_list
-    if type(g_sample) == DGLHeteroGraph: # Doesn't support DGLHeteroGraph's derived class
+    if type(g_sample) == DGLHeteroGraph:  # Doesn't support DGLHeteroGraph's derived class
        save_heterographs(filename, g_list, labels)
    else:
-        raise Exception(
-            "Invalid argument g_list. Must be a DGLGraph or a list of DGLGraphs/DGLHeteroGraphs")
+        raise DGLError(
+            "Invalid argument g_list. Must be a DGLGraph or a list of DGLGraphs.")



 def load_graphs(filename, idx_list=None):
-    """
-    Load DGLGraphs from file
+    """Load graphs and optionally their labels from file saved by :func:`save_graphs`.
+
+    Besides loading from local files, DGL supports loading the graphs directly
+    from S3 (by providing a ``"s3://..."`` path) or from HDFS (by providing
+    ``"hdfs://..."`` a path).

    Parameters
    ----------
    filename: str
-        filename to load graphs
-    idx_list: list of int
-        list of index of graph to be loaded. If not specified, will
-        load all graphs from file
+        The file name to load graphs from.
+    idx_list: list[int], optional
+        The indices of the graphs to be loaded if the file contains multiple graphs.
+        Default is loading all the graphs stored in the file.

    Returns
    --------
-    graph_list: list of DGLGraphs / DGLHeteroGraph
+    graph_list: list[DGLGraph]
        The loaded graphs.
    labels: dict[str, Tensor]
        The graph labels stored in file. If no label is stored, the dictionary is empty.
-        Regardless of whether the ``idx_list`` argument is given or not, the returned dictionary
-        always contains labels of all the graphs.
+        Regardless of whether the ``idx_list`` argument is given or not,
+        the returned dictionary always contains the labels of all the graphs.

    Examples
    ----------
-    Following the example in save_graphs.
+    Following the example in :func:`save_graphs`.

    >>> from dgl.data.utils import load_graphs
    >>> glist, label_dict = load_graphs("./data.bin") # glist will be [g1, g2]
    >>> glist, label_dict = load_graphs("./data.bin", [0]) # glist will be [g1]

+    See Also
+    --------
+    save_graphs
    """
    # if it is local file, do some sanity check
-    assert filename.startswith('s3://') or os.path.exists(filename), "file {} does not exist.".format(filename)
+    if not (filename.startswith('s3://') or os.path.exists(filename)):
+        raise DGLError("File {} does not exist.".format(filename))

    version = _CAPI_GetFileVersion(filename)
    if version == 1:
@@ -155,7 +173,7 @@ def load_graphs(filename, idx_list=None):
    elif version == 2:
        return load_graph_v2(filename, idx_list)
    else:
-        raise Exception("Invalid DGL Version Number")
+        raise DGLError("Invalid DGL Version Number.")


 def load_graph_v2(filename, idx_list=None):

--- a/python/dgl/dataloading/__init__.py
+++ b/python/dgl/dataloading/__init__.py
-"""Classes that involves iterating over nodes or edges in a graph and generates
-computation dependency of necessary nodes with neighborhood sampling methods.
+"""The ``dgl.dataloading`` package contains:

-This includes
-
-* :py:class:`~dgl.dataloading.pytorch.NodeDataLoader` for iterating over the nodes in
-  a graph in minibatches.
-
-* :py:class:`~dgl.dataloading.pytorch.EdgeDataLoader` for iterating over the edges in
-  a graph in minibatches.
+* Data loader classes for iterating over a set of nodes or edges in a graph and generates
+  computation dependency via neighborhood sampling methods.

 * Various sampler classes that perform neighborhood sampling for multi-layer GNNs.

 * Negative samplers for link prediction.

-NOTE: this module is experimental and the interfaces may be subject to changes in
-future releases.
+For a holistic explanation on how different components work together.
+Read the user guide :ref:`guide-minibatch`.
+
+.. note::
+    This package is experimental and the interfaces may be subject
+    to changes in future releases. It currently only has implementations in PyTorch.
 """
 from .neighbor import *
 from .dataloader import *

--- a/python/dgl/generators.py
+++ b/python/dgl/generators.py
 """Module for various graph generator functions."""
-# pylint: disable= dangerous-default-value

 from . import backend as F
 from . import convert
@@ -7,13 +6,14 @@ from . import random

 __all__ = ['rand_graph', 'rand_bipartite']

-def rand_graph(num_nodes, num_edges, idtype=F.int64, device=F.cpu(),
-               formats=['coo', 'csr', 'csc']):
-    """Generate a random graph of the given number of nodes/edges.
+def rand_graph(num_nodes, num_edges, idtype=F.int64, device=F.cpu()):
+    """Generate a random graph of the given number of nodes/edges and return.

-    It uniformly chooses ``num_edges`` from all pairs and form a graph.
+    It uniformly chooses ``num_edges`` from all possible node pairs and form a graph.
+    The random choice is without replacement, which means there will be no multi-edge
+    in the resulting graph.

-    TODO(minjie): support RNG as one of the arguments.
+    To control the randomness, set the random seed via :func:`dgl.seed`.

    Parameters
    ----------
@@ -22,34 +22,51 @@ def rand_graph(num_nodes, num_edges, idtype=F.int64, device=F.cpu(),
    num_edges : int
        The number of edges
    idtype : int32, int64, optional
-        Integer ID type. Must be int32 or int64. Default: int64.
+        The data type for storing the structure-related graph information
+        such as node and edge IDs. It should be a framework-specific data type object
+        (e.g., torch.int32). By default, DGL uses int64.
    device : Device context, optional
-        Device on which the graph is created. Default: CPU.
-    formats : str or list of str
-        It can be ``'coo'``/``'csr'``/``'csc'`` or a sublist of them,
-        Force the storage formats.  Default: ``['coo', 'csr', 'csc']``.
+        The device of the resulting graph. It should be a framework-specific device
+        object (e.g., torch.device). By default, DGL stores the graph on CPU.

    Returns
    -------
-    DGLHeteroGraph
-        Generated random graph.
+    DGLGraph
+        The generated random graph.
+
+    See Also
+    --------
+    rand_bipartite
+
+    Examples
+    --------
+    >>> import dgl
+    >>> dgl.rand_graph(100, 10)
+    Graph(num_nodes=100, num_edges=10,
+          ndata_schemes={}
+          edata_schemes={})
    """
+    #TODO(minjie): support RNG as one of the arguments.
    eids = random.choice(num_nodes * num_nodes, num_edges, replace=False)
-    rows = F.copy_to(F.astype(eids / num_nodes, idtype), device)
-    cols = F.copy_to(F.astype(eids % num_nodes, idtype), device)
-    g = convert.graph((rows, cols),
-                      num_nodes=num_nodes,
-                      idtype=idtype, device=device)
-    return g.formats(formats)
+    eids = F.zerocopy_to_numpy(eids)
+    rows = F.zerocopy_from_numpy(eids // num_nodes)
+    cols = F.zerocopy_from_numpy(eids % num_nodes)
+    rows = F.copy_to(F.astype(rows, idtype), device)
+    cols = F.copy_to(F.astype(cols, idtype), device)
+    return convert.graph((rows, cols),
+                         num_nodes=num_nodes,
+                         idtype=idtype, device=device)

 def rand_bipartite(utype, etype, vtype,
                   num_src_nodes, num_dst_nodes, num_edges,
-                   idtype=F.int64, device=F.cpu(),
-                   formats=['csr', 'coo', 'csc']):
-    """Generate a random bipartite graph of the given number of src/dst nodes and
-    number of edges.
+                   idtype=F.int64, device=F.cpu()):
+    """Generate a random uni-directional bipartite graph and return.
+
+    It uniformly chooses ``num_edges`` from all possible node pairs and form a graph.
+    The random choice is without replacement, which means there will be no multi-edge
+    in the resulting graph.

-    It uniformly chooses ``num_edges`` from all pairs and form a graph.
+    To control the randomness, set the random seed via :func:`dgl.seed`.

    Parameters
    ----------
@@ -60,28 +77,43 @@ def rand_bipartite(utype, etype, vtype,
    vtype : str, optional
        The name of the destination node type.
    num_src_nodes : int
-        The number of source nodes, the :math:`|U|` in :math:`G=(U,V,E)`.
+        The number of source nodes.
    num_dst_nodes : int
-        The number of destination nodes, the :math:`|V|` in :math:`G=(U,V,E)`.
+        The number of destination nodes.
    num_edges : int
        The number of edges
    idtype : int32, int64, optional
-        Integer ID type. Must be int32 or int64. Default: int64.
+        The data type for storing the structure-related graph information
+        such as node and edge IDs. It should be a framework-specific data type object
+        (e.g., torch.int32). By default, DGL uses int64.
    device : Device context, optional
-        Device on which the graph is created. Default: CPU.
-    formats : str or list of str
-        It can be ``'coo'``/``'csr'``/``'csc'`` or a sublist of them,
-        Force the storage formats.  Default: ``['coo', 'csr', 'csc']``.
+        The device of the resulting graph. It should be a framework-specific device
+        object (e.g., torch.device). By default, DGL stores the graph on CPU.

    Returns
    -------
-    DGLHeteroGraph
-        Generated random bipartite graph.
+    DGLGraph
+        The generated random bipartite graph.
+
+    See Also
+    --------
+    rand_graph
+
+    Examples
+    --------
+    >>> import dgl
+    >>> dgl.rand_bipartite('user', 'buys', 'game', 50, 100, 10)
+    Graph(num_nodes={'game': 100, 'user': 50},
+          num_edges={('user', 'buys', 'game'): 10},
+          metagraph=[('user', 'game', 'buys')])
    """
+    #TODO(minjie): support RNG as one of the arguments.
    eids = random.choice(num_src_nodes * num_dst_nodes, num_edges, replace=False)
-    rows = F.copy_to(F.astype(eids / num_dst_nodes, idtype), device)
-    cols = F.copy_to(F.astype(eids % num_dst_nodes, idtype), device)
-    g = convert.heterograph({(utype, etype, vtype): (rows, cols)},
-                            {utype: num_src_nodes, vtype: num_dst_nodes},
-                            idtype=idtype, device=device)
-    return g.formats(formats)
+    eids = F.zerocopy_to_numpy(eids)
+    rows = F.zerocopy_from_numpy(eids // num_dst_nodes)
+    cols = F.zerocopy_from_numpy(eids % num_dst_nodes)
+    rows = F.copy_to(F.astype(rows, idtype), device)
+    cols = F.copy_to(F.astype(cols, idtype), device)
+    return convert.heterograph({(utype, etype, vtype): (rows, cols)},
+                               {utype: num_src_nodes, vtype: num_dst_nodes},
+                               idtype=idtype, device=device)
--- a/python/dgl/heterograph.py
+++ b/python/dgl/heterograph.py
 """Classes for heterogeneous graphs."""
 #pylint: disable= too-many-lines
-from collections import defaultdict, Iterable
-from collections.abc import Mapping
+from collections import defaultdict
+from collections.abc import Mapping, Iterable
 from contextlib import contextmanager
 import copy
 import numbers
@@ -21,158 +21,17 @@ from .view import HeteroNodeView, HeteroNodeDataView, HeteroEdgeView, HeteroEdge
 __all__ = ['DGLHeteroGraph', 'combine_names']

 class DGLHeteroGraph(object):
-    """Base heterogeneous graph class.
-
-    **Do NOT instantiate from this class directly; use** :mod:`conversion methods
-    <dgl.convert>` **instead.**
-
-    A Heterogeneous graph is defined as a graph with node types and edge
-    types.
-
-    If two edges share the same edge type, then their source nodes, as well
-    as their destination nodes, also have the same type (the source node
-    types don't have to be the same as the destination node types).
-
-    Examples
-    --------
-    Suppose that we want to construct the following heterogeneous graph:
-
-    .. graphviz::
-
-       digraph G {
-           Alice -> Bob [label=follows]
-           Bob -> Carol [label=follows]
-           Alice -> Tetris [label=plays]
-           Bob -> Tetris [label=plays]
-           Bob -> Minecraft [label=plays]
-           Carol -> Minecraft [label=plays]
-           Nintendo -> Tetris [label=develops]
-           Mojang -> Minecraft [label=develops]
-           {rank=source; Alice; Bob; Carol}
-           {rank=sink; Nintendo; Mojang}
-       }
-
-    And suppose that one maps the users, games and developers to the following
-    IDs:
-
-    =========  =====  ===  =====
-    User name  Alice  Bob  Carol
-    =========  =====  ===  =====
-    User ID    0      1    2
-    =========  =====  ===  =====
-
-    =========  ======  =========
-    Game name  Tetris  Minecraft
-    =========  ======  =========
-    Game ID    0       1
-    =========  ======  =========
-
-    ==============  ========  ======
-    Developer name  Nintendo  Mojang
-    ==============  ========  ======
-    Developer ID    0         1
-    ==============  ========  ======
-
-    One can construct the graph as follows:
-
-    >>> g = dgl.heterograph({
-    ...     ('user', 'follows', 'user'): ([0, 1], [1, 2]),
-    ...     ('user', 'plays', 'game'): ([0, 1, 1, 2], [0, 0, 1, 1]),
-    ...     ('developer', 'develops', 'game'): ([0, 1], [0, 1]),
-    ...     })
-
-    Then one can query the graph structure by specifying the ``ntype`` or ``etype`` arguments:
-
-    >>> g.number_of_nodes('user')
-    3
-    >>> g.number_of_edges('plays')
-    4
-    >>> g.out_degrees(etype='develops')  # out-degrees of source nodes of 'develops' edge type
-    tensor([1, 1])
-    >>> g.in_edges(0, etype='develops')  # in-edges of destination node 0 of 'develops' edge type
-    (tensor([0]), tensor([0]))
-
-    Or on the sliced graph for an edge type:
-
-    >>> g['plays'].number_of_edges()
-    4
-    >>> g['develops'].out_degrees()
-    tensor([1, 1])
-    >>> g['develops'].in_edges(0)
-    (tensor([0]), tensor([0]))
-
-    Node type names must be distinct (no two types have the same name). Edge types could
-    have the same name but they must be distinguishable by the ``(src_type, edge_type, dst_type)``
-    triplet (called *canonical edge type*).
-
-    For example, suppose a graph that has two types of relation "user-watches-movie"
-    and "user-watches-TV" as follows:
-
-    >>> GG = dgl.heterograph({
-    ...     ('user', 'watches', 'movie'): ([0, 1, 1], [1, 0, 1]),
-    ...     ('user', 'watches', 'TV'): ([0, 1], [0, 1])
-    ... })
-
-    To distinguish between the two "watches" edge type, one must specify a full triplet:
-
-    >>> GG.number_of_edges(('user', 'watches', 'movie'))
-    3
-    >>> GG.number_of_edges(('user', 'watches', 'TV'))
-    2
-    >>> GG['user', 'watches', 'movie'].out_degrees()
-    tensor([1, 2])
-
-    Using only one single edge type string "watches" is ambiguous and will cause error:
-
-    >>> GG.number_of_edges('watches')  # AMBIGUOUS!!
-
-    In many cases, there is only one type of nodes or one type of edges, and the ``ntype``
-    and ``etype`` argument could be omitted. This is very common when using the sliced
-    graph, which usually contains only one edge type, and sometimes only one node type:
-
-    >>> g['follows'].number_of_nodes()  # OK!! because g['follows'] only has one node type 'user'
-    3
-    >>> g['plays'].number_of_nodes()  # ERROR!! There are two types 'user' and 'game'.
-    >>> g['plays'].number_of_edges()  # OK!! because there is only one edge type 'plays'
-
-    TODO(minjie): docstring about uni-directional bipartite graph
-
-    Metagraph
-    ---------
-    For each heterogeneous graph, one can often infer the *metagraph*, the template of
-    edge connections showing how many types of nodes and edges exist in the graph, and
-    how each edge type could connect between node types.
-
-    One can analyze the example gameplay graph above and figure out the metagraph as
-    follows:
-
-    .. graphviz::
-
-       digraph G {
-           User -> User [label=follows]
-           User -> Game [label=plays]
-           Developer -> Game [label=develops]
-       }
+    """Class for storing graph structure and node/edge feature data.

+    There are a few ways to create create a DGLGraph:

-    Parameters
-    ----------
-    gidx : HeteroGraphIndex
-        Graph index object.
-    ntypes : list of str, pair of list of str
-        Node type list. ``ntypes[i]`` stores the name of node type i.
-        If a pair is given, the graph created is a uni-directional bipartite graph,
-        and its SRC node types and DST node types are given as in the pair.
-    etypes : list of str
-        Edge type list. ``etypes[i]`` stores the name of edge type i.
-    node_frames : list[Frame], optional
-        Node feature storage. If None, empty frame is created.
-        Otherwise, ``node_frames[i]`` stores the node features
-        of node type i. (default: None)
-    edge_frames : list[Frame], optional
-        Edge feature storage. If None, empty frame is created.
-        Otherwise, ``edge_frames[i]`` stores the edge features
-        of edge type i. (default: None)
+    * To create a homogeneous graph from Tensor data, use :func:`dgl.graph`.
+    * To create a heterogeneous graph from Tensor data, use :func:`dgl.heterograph`.
+    * To create a graph from other data sources, use ``dgl.*`` create ops. See
+      :ref:`api-graph-create-ops`.
+
+    Read the user guide chapter :ref:`guide-graph` for an in-depth explanation about its
+    usage.
    """
    is_block = False

@@ -184,6 +43,27 @@ class DGLHeteroGraph(object):
                 node_frames=None,
                 edge_frames=None,
                 **deprecate_kwargs):
+        """Internal constructor for creating a DGLGraph.
+
+        Parameters
+        ----------
+        gidx : HeteroGraphIndex
+            Graph index object.
+        ntypes : list of str, pair of list of str
+            Node type list. ``ntypes[i]`` stores the name of node type i.
+            If a pair is given, the graph created is a uni-directional bipartite graph,
+            and its SRC node types and DST node types are given as in the pair.
+        etypes : list of str
+            Edge type list. ``etypes[i]`` stores the name of edge type i.
+        node_frames : list[Frame], optional
+            Node feature storage. If None, empty frame is created.
+            Otherwise, ``node_frames[i]`` stores the node features
+            of node type i. (default: None)
+        edge_frames : list[Frame], optional
+            Edge feature storage. If None, empty frame is created.
+            Otherwise, ``edge_frames[i]`` stores the edge features
+            of edge type i. (default: None)
+        """
        if isinstance(gidx, DGLHeteroGraph):
            raise DGLError('The input is already a DGLGraph. No need to create it again.')
        if not isinstance(gidx, heterograph_index.HeteroGraphIndex):
@@ -851,12 +731,17 @@ class DGLHeteroGraph(object):

    @property
    def ntypes(self):
-        """Return the node types of the graph.
+        """Return all the node type names in the graph.

        Returns
        -------
-        list of str
-            Each ``str`` is a node type.
+        list[str]
+            All the node type names in a list.
+
+        Notes
+        -----
+        DGL internally assigns an integer ID for each node type. The returned
+        node type names are sorted according to their IDs.

        Examples
        --------
@@ -877,19 +762,27 @@ class DGLHeteroGraph(object):

    @property
    def etypes(self):
-        """Return the edge types of the graph.
+        """Return all the edge type names in the graph.

        Returns
        -------
-        list of str
-            Each ``str`` is an edge type.
+        list[str]
+            All the edge type names in a list.

        Notes
        -----
-        An edge type can appear in multiple canonical edge types. For example, ``'interacts'``
-        can appear in two canonical edge types ``('drug', 'interacts', 'drug')`` and
-        ``('protein', 'interacts', 'protein')``. It is recommended to use
-        :func:`~dgl.DGLGraph.canonical_etypes` in this case.
+        DGL internally assigns an integer ID for each edge type. The returned
+        edge type names are sorted according to their IDs.
+
+        The complete format to specify an relation is a string triplet ``(str, str, str)``
+        for source node type, edge type and destination node type. DGL calls this
+        format *canonical edge type*. An edge type can appear in multiple canonical edge types.
+        For example, ``'interacts'`` can appear in two canonical edge types
+        ``('drug', 'interacts', 'drug')`` and ``('protein', 'interacts', 'protein')``.
+
+        See Also
+        --------
+        canonical_etypes

        Examples
        --------
@@ -910,16 +803,24 @@ class DGLHeteroGraph(object):

    @property
    def canonical_etypes(self):
-        """Return the canonical edge types of the graph.
+        """Return all the canonical edge types in the graph.

-        A canonical edge type is a 3-tuple of str ``src_type, edge_type, dst_type``, where
-        ``src_type``, ``edge_type``, ``dst_type`` are the type of the source nodes, edges
-        and destination nodes respectively.
+        A canonical edge type is a string triplet ``(str, str, str)``
+        for source node type, edge type and destination node type.

        Returns
        -------
-        list of 3-tuple of str
-            Each 3-tuple of str is a canonical edge type.
+        list[(str, str, str)]
+            All the canonical edge type triplets in a list.
+
+        Notes
+        -----
+        DGL internally assigns an integer ID for each edge type. The returned
+        edge type names are sorted according to their IDs.
+
+        See Also
+        --------
+        etypes

        Examples
        --------
@@ -942,15 +843,24 @@ class DGLHeteroGraph(object):

    @property
    def srctypes(self):
-        """Return the source node types.
+        """Return all the source node type names in this graph.
+
+        If the graph can further divide its node types into two subsets A and B where
+        all the edeges are from nodes of types in A to nodes of types in B, we call
+        this graph a *uni-bipartite* graph and the nodes in A being the *source*
+        nodes and the ones in B being the *destination* nodes. If the graph is not
+        uni-bipartite, the source and destination nodes are just the entire set of
+        nodes in the graph.

        Returns
        -------
-        list of str
+        list[str]
+            All the source node type names in a list.

-            * If the graph is a uni-bipartite graph, it returns the source node types.
-              For a definition of uni-bipartite, see :func:`is_unibipartite`.
-            * Otherwise, it returns all node types in the graph.
+        See Also
+        --------
+        dsttypes
+        is_unibipartite

        Examples
        --------
@@ -984,16 +894,24 @@ class DGLHeteroGraph(object):

    @property
    def dsttypes(self):
-        """Return the destination node types.
+        """Return all the destination node type names in this graph.
+
+        If the graph can further divide its node types into two subsets A and B where
+        all the edeges are from nodes of types in A to nodes of types in B, we call
+        this graph a *uni-bipartite* graph and the nodes in A being the *source*
+        nodes and the ones in B being the *destination* nodes. If the graph is not
+        uni-bipartite, the source and destination nodes are just the entire set of
+        nodes in the graph.

        Returns
        -------
-        list of str
-            Each str is a node type.
+        list[str]
+            All the destination node type names in a list.

-            * If the graph is a uni-bipartite graph, it returns the destination node types.
-              For a definition of uni-bipartite, see :func:`is_unibipartite`.
-            * Otherwise, it returns all node types in the graph.
+        See Also
+        --------
+        srctypes
+        is_unibipartite

        Examples
        --------
@@ -1065,29 +983,24 @@ class DGLHeteroGraph(object):
    def to_canonical_etype(self, etype):
        """Convert an edge type to the corresponding canonical edge type in the graph.

-        A canonical edge type is a 3-tuple of strings ``src_type, edge_type, dst_type``, where
-        ``src_type``, ``edge_type``, ``dst_type`` are separately the type of source
-        nodes, edges and destination nodes.
+        A canonical edge type is a string triplet ``(str, str, str)``
+        for source node type, edge type and destination node type.
+
+        The function expects the given edge type name can uniquely identify a canonical edge
+        type. DGL will raise error if this is not the case.

        Parameters
        ----------
-        etype : str or 3-tuple of str
+        etype : str or (str, str, str)
            If :attr:`etype` is an edge type (str), it returns the corresponding canonical edge
-            type in the graph. If :attr:`etype` is already a canonical edge type
-            (3-tuple of str), it simply returns :attr:`etype`.
+            type in the graph. If :attr:`etype` is already a canonical edge type,
+            it directly returns the input unchanged.

        Returns
        -------
-        3-tuple of str
+        (str, str, str)
            The canonical edge type corresponding to the edge type.

-        Notes
-        -----
-        If :attr:`etype` is an edge type, the API expects it to appear only once in the graph. For
-        example, in a graph with canonical edge types ``('A', 'follows', 'B')``,
-        ``('A', 'follows', 'C')`` and ``('B', 'watches', 'D')``, ``'follows'`` is an invalid value
-        for :attr:`etype` while ``'watches'`` is a valid one.
-
        Examples
        --------
        The following example uses PyTorch backend.
@@ -1131,7 +1044,7 @@ class DGLHeteroGraph(object):
            return ret

    def get_ntype_id(self, ntype):
-        """Return the id of the given node type.
+        """Return the ID of the given node type.

        ntype can also be None. If so, there should be only one node type in the
        graph.
@@ -1165,7 +1078,7 @@ class DGLHeteroGraph(object):
        return ntid

    def get_ntype_id_from_src(self, ntype):
-        """Return the id of the given SRC node type.
+        """Internal function to return the ID of the given SRC node type.

        ntype can also be None. If so, there should be only one node type in the
        SRC category. Callable even when the self graph is not uni-bipartite.
@@ -1190,7 +1103,7 @@ class DGLHeteroGraph(object):
        return ntid

    def get_ntype_id_from_dst(self, ntype):
-        """Return the id of the given DST node type.
+        """Internal function to return the ID of the given DST node type.

        ntype can also be None. If so, there should be only one node type in the
        DST category. Callable even when the self graph is not uni-bipartite.
@@ -2057,16 +1970,16 @@ class DGLHeteroGraph(object):
    #################################################################

    def number_of_nodes(self, ntype=None):
-        """Alias of :func:`num_nodes`"""
+        """Alias of :meth:`num_nodes`"""
        return self.num_nodes(ntype)

    def num_nodes(self, ntype=None):
-        """Return the number of nodes.
+        """Return the number of nodes of in the graph.

        Parameters
        ----------
        ntype : str, optional
-            The node type for query. If given, it returns the number of nodes for a particular
+            The node type name. If given, it returns the number of nodes of the
            type. If not given (default), it returns the total number of nodes of all types.

        Returns
@@ -2104,17 +2017,24 @@ class DGLHeteroGraph(object):
            return self._graph.number_of_nodes(self.get_ntype_id(ntype))

    def number_of_src_nodes(self, ntype=None):
-        """Alias of :func:`num_src_nodes`"""
+        """Alias of :meth:`num_src_nodes`"""
        return self.num_src_nodes(ntype)

    def num_src_nodes(self, ntype=None):
-        """Return the number of nodes of the given source node type.
+        """Return the number of source nodes in the graph.
+
+        If the graph can further divide its node types into two subsets A and B where
+        all the edeges are from nodes of types in A to nodes of types in B, we call
+        this graph a *uni-bipartite* graph and the nodes in A being the *source*
+        nodes and the ones in B being the *destination* nodes. If the graph is not
+        uni-bipartite, the source and destination nodes are just the entire set of
+        nodes in the graph.

        Parameters
        ----------
        ntype : str, optional
-            The source node type for query. If given, it returns the number of nodes for a
-            particular source node type. If not given (default), it returns the number of
+            The source node type name. If given, it returns the number of nodes for
+            the source node type. If not given (default), it returns the number of
            nodes summed over all source node types.

        Returns
@@ -2122,6 +2042,11 @@ class DGLHeteroGraph(object):
        int
            The number of nodes

+        See Also
+        --------
+        num_dst_nodes
+        is_unibipartite
+
        Examples
        --------
        The following example uses PyTorch backend.
@@ -2162,20 +2087,32 @@ class DGLHeteroGraph(object):
        return self.num_dst_nodes(ntype)

    def num_dst_nodes(self, ntype=None):
-        """Return the number of nodes of the given destination node type.
+        """Return the number of destination nodes in the graph.
+
+        If the graph can further divide its node types into two subsets A and B where
+        all the edeges are from nodes of types in A to nodes of types in B, we call
+        this graph a *uni-bipartite* graph and the nodes in A being the *source*
+        nodes and the ones in B being the *destination* nodes. If the graph is not
+        uni-bipartite, the source and destination nodes are just the entire set of
+        nodes in the graph.

        Parameters
        ----------
        ntype : str, optional
-            The destination node type for query. If given, it returns the number of nodes for a
-            particular destination node type. If not given (default), it returns the number of
-            nodes summed over all destination node types.
+            The destination node type name. If given, it returns the number of nodes of
+            the destination node type. If not given (default), it returns the number of
+            nodes summed over all the destination node types.

        Returns
        -------
        int
            The number of nodes

+        See Also
+        --------
+        num_src_nodes
+        is_unibipartite
+
        Examples
        --------
        The following example uses PyTorch backend.
@@ -2216,16 +2153,19 @@ class DGLHeteroGraph(object):
        return self.num_edges(etype)

    def num_edges(self, etype=None):
-        """Return the number of edges.
+        """Return the number of edges in the graph.

        Parameters
        ----------
-        etype : str or tuple of str, optional
-            The edge type for query, which can be an edge type (str) or a canonical edge type
-            (3-tuple of str). When an edge type appears in multiple canonical edge types, one
-            must use a canonical edge type. If given, it returns the number of edges for a
-            particular edge type. If not given (default), it returns the total number of edges
-            of all types.
+        etype : str or (str, str, str), optional
+            The type name of the edges. The allowed type name formats are:
+
+            * ``(str, str, str)`` for source node type, edge type and destination node type.
+            * or one ``str`` edge type name if the name can uniquely identify a
+              triplet format in the graph.
+
+            If not provided, return the total number of edges regardless of the types
+            in the graph.

        Returns
        -------
@@ -2277,22 +2217,21 @@ class DGLHeteroGraph(object):

    @property
    def is_multigraph(self):
-        """Whether the graph is a multigraph
-
-        In a multigraph, there can be multiple edges from a node ``u`` to a node ``v``.
+        """Return whether the graph is a multigraph with parallel edges.

-        For a heterogeneous graph of multiple canonical edge types, we consider it as a
-        multigraph if there are multiple edges from a node ``u`` to a node ``v`` for any
-        canonical edge type.
+        A multigraph has more than one edges between the same pair of nodes, called
+        *parallel edges*.  For heterogeneous graphs, parallel edge further requires
+        the canonical edge type to be the same (see :meth:`canonical_etypes` for the
+        definition).

        Returns
        -------
        bool
-            Whether the graph is a multigraph.
+            True if the graph is a multigraph.

        Notes
        -----
-        Checking whether the graph is a multigraph can be expensive for a large one.
+        Checking whether the graph is a multigraph could be expensive for a large one.

        Examples
        --------
@@ -2330,14 +2269,14 @@ class DGLHeteroGraph(object):

    @property
    def is_homogeneous(self):
-        """Whether the graph is a homogeneous graph.
+        """Return whether the graph is a homogeneous graph.

        A homogeneous graph only has one node type and one edge type.

        Returns
        -------
        bool
-            Whether the graph is a homogeneous graph.
+            True if the graph is a homogeneous graph.

        Examples
        --------
@@ -2366,7 +2305,7 @@ class DGLHeteroGraph(object):

    @property
    def is_readonly(self):
-        """Deprecated: DGLGraph will always be mutable.
+        """**DEPRECATED**: DGLGraph will always be mutable.

        Returns
        -------
@@ -2424,38 +2363,33 @@ class DGLHeteroGraph(object):
        return self._graph.dtype

    def __contains__(self, vid):
-        """Deprecated: please directly call :func:`has_nodes`.
-        """
+        """**DEPRECATED**: please directly call :func:`has_nodes`."""
        dgl_warning('DGLGraph.__contains__ is deprecated.'
                    ' Please directly call has_nodes.')
        return self.has_nodes(vid)

    def has_nodes(self, vid, ntype=None):
-        """Whether the graph has some particular node(s) of a given type.
+        """Return whether the graph contains the given nodes.

        Parameters
        ----------
        vid : node ID(s)
-            The node ID(s) for query. The allowed formats are:
+            The nodes IDs. The allowed nodes ID formats are:
+
+            * ``int``: The ID of a single node.
+            * Int Tensor: Each element is a node ID. The tensor must have the same device type
+              and ID data type as the graph's.
+            * iterable[int]: Each element is a node ID.

-            - ``int``: The ID of a single node.
-            - ``Tensor``: A 1D tensor that contains the IDs of multiple nodes, whose data type and
-              device should be the same as the :py:attr:`idtype` and device of the graph.
-            - ``iterable[int]``: A sequence (e.g. list, tuple, numpy.ndarray)
-              of integers that contains the IDs of multiple nodes.
        ntype : str, optional
-            The node type for query. It is required if the graph has
-            multiple node types.
+            The node type name. Can be omitted if there is
+            only one type of nodes in the graph.

        Returns
        -------
        bool or bool Tensor
-
-            - If :attr:`vid` is an ``int``, the result will be a ``bool`` indicating
-              whether the graph has the particular node.
-            - If :attr:`vid` is a 1D ``Tensor`` or ``iterable[int]`` of node IDs,
-              the result will be a bool Tensor whose i-th element indicates whether
-              the graph has node :attr:`vid[i]` of the given type.
+            A tensor of bool flags where each element is True if the node is in the graph.
+            If the input is a single node, return one bool value.

        Examples
        --------
@@ -2494,50 +2428,47 @@ class DGLHeteroGraph(object):
    def has_node(self, vid, ntype=None):
        """Whether the graph has a particular node of a given type.

-        DEPRECATED: see :func:`~DGLGraph.has_nodes`
+        **DEPRECATED**: see :func:`~DGLGraph.has_nodes`
        """
        dgl_warning("DGLGraph.has_node is deprecated. Please use DGLGraph.has_nodes")
        return self.has_nodes(vid, ntype)

    def has_edges_between(self, u, v, etype=None):
-        """Whether the graph has some particular edge(s) of a given type.
+        """Return whether the graph contains the given edges.

        Parameters
        ----------
-        u : source node ID(s)
-            The source node(s) of the edges for query. The allowed formats are:
-
-            - ``int``: The source node of an edge for query.
-            - ``Tensor``: A 1D tensor that contains the source node(s) of edge(s) for query.
-              The data type and device of the tensor must be the same as the :py:attr:`idtype` and
-              device of the graph. Its i-th element represents the source node ID of the
-              i-th edge for query.
-            - ``iterable[int]`` : Similar to the tensor, but stores node IDs in a sequence
-              (e.g. list, tuple, numpy.ndarray).
-        v : destination node ID(s)
-            The destination node(s) of the edges for query. It's a counterpart of :attr:`u`
-            for destination nodes and should have the same format as :attr:`u`. If :attr:`u`
-            and :attr:`v` are not int, they should have the same length.
-        etype : str or tuple of str, optional
-            The edge type for query, which can be an edge type (str) or a canonical edge type
-            (3-tuple of str). When an edge type appears in multiple canonical edge types, one
-            must use a canonical edge type. If the graph has multiple edge types, one must
-            specify the argument. Otherwise, it can be omitted.
+        u : node IDs
+            The source node IDs of the edges. The allowed formats are:
+
+            * ``int``: A single node.
+            * Int Tensor: Each element is a node ID. The tensor must have the same device type
+              and ID data type as the graph's.
+            * iterable[int]: Each element is a node ID.
+
+        v : node IDs
+            The destination node IDs of the edges. The allowed formats are:
+
+            * ``int``: A single node.
+            * Int Tensor: Each element is a node ID. The tensor must have the same device type
+              and ID data type as the graph's.
+            * iterable[int]: Each element is a node ID.
+
+        etype : str or (str, str, str), optional
+            The type names of the edges. The allowed type name formats are:
+
+            * ``(str, str, str)`` for source node type, edge type and destination node type.
+            * or one ``str`` edge type name if the name can uniquely identify a
+              triplet format in the graph.
+
+            Can be omitted if the graph has only one type of edges.
+

        Returns
        -------
        bool or bool Tensor
-
-            - If :attr:`u` and :attr:`v` are ``int`` objects, the result will be a ``bool``
-              indicating whether there is an edge from ``u`` to ``v`` of the given edge type.
-            - If :attr:`u` and :attr:`v` are ``Tensor`` or ``iterable[int]`` objects, the
-              result will be a bool Tensor whose i-th element indicates whether there is an
-              edge from ``u[i]`` to ``v[i]`` of the given edge type.
-
-        Notes
-        -----
-        The value(s) of :attr:`u` and :attr:`v` need to be separately smaller than the
-        number of nodes of the source and destination type.
+            A tensor of bool flags where each element is True if the node is in the graph.
+            If the input is a single node, return one bool value.

        Examples
        --------
@@ -2595,7 +2526,7 @@ class DGLHeteroGraph(object):
    def has_edge_between(self, u, v, etype=None):
        """Whether the graph has edges of type ``etype``.

-        DEPRECATED: please use :func:`~DGLGraph.has_edge_between`.
+        **DEPRECATED**: please use :func:`~DGLGraph.has_edge_between`.
        """
        dgl_warning("DGLGraph.has_edge_between is deprecated. "
                    "Please use DGLGraph.has_edges_between")
@@ -2610,12 +2541,16 @@ class DGLHeteroGraph(object):
        Parameters
        ----------
        v : int
-            The destination node for query.
-        etype : str or tuple of str, optional
-            The edge type for query, which can be an edge type (str) or a canonical edge type
-            (3-tuple of str). When an edge type appears in multiple canonical edge types, one
-            must use a canonical edge type. If the graph has multiple edge types, one must
-            specify the argument. Otherwise, it can be omitted.
+            The node ID.
+        etype : str or (str, str, str), optional
+            The type names of the edges. The allowed type name formats are:
+
+            * ``(str, str, str)`` for source node type, edge type and destination node type.
+            * or one ``str`` edge type name if the name can uniquely identify a
+              triplet format in the graph.
+
+            Can be omitted if the graph has only one type of edges.
+

        Returns
        -------
@@ -2664,12 +2599,15 @@ class DGLHeteroGraph(object):
        Parameters
        ----------
        v : int
-            The source node for query.
-        etype : str or tuple of str, optional
-            The edge type for query, which can be an edge type (str) or a canonical edge type
-            (3-tuple of str). When an edge type appears in multiple canonical edge types, one
-            must use a canonical edge type. If the graph has multiple edge types, one must
-            specify the argument. Otherwise, it can be omitted.
+            The node ID.
+        etype : str or (str, str, str), optional
+            The type names of the edges. The allowed type name formats are:
+
+            * ``(str, str, str)`` for source node type, edge type and destination node type.
+            * or one ``str`` edge type name if the name can uniquely identify a
+              triplet format in the graph.
+
+            Can be omitted if the graph has only one type of edges.

        Returns
        -------
@@ -2720,48 +2658,51 @@ class DGLHeteroGraph(object):
                             return_uv=return_uv, etype=etype)

    def edge_ids(self, u, v, force_multi=None, return_uv=False, etype=None):
-        """Return the ID(s) of edge(s) from the given source node(s) to the given destination
-        node(s) with the specified edge type.
+        """Return the edge ID(s) given the two endpoints of the edge(s).

        Parameters
        ----------
-        u : source node ID(s)
-            The source node(s) of the edges for query. The allowed formats are:
-
-            - ``int``: The source node of an edge for query.
-            - ``Tensor``: A 1D tensor that contains the source node(s) of edge(s) for query, whose
-              data type an device should be the same as the :py:attr:`idtype` and device of
-              the graph. Its i-th element is the source node of the i-th edge for query.
-            - ``iterable[int]``: Similar to the tensor, but stores node IDs in a sequence
-              (e.g. list, tuple, numpy.ndarray).
-        v : destination node ID(s)
-            The destination node(s) of the edges for query. It's a counterpart of :attr:`u`
-            for destination nodes and should have the same format as :attr:`u`. If :attr:`u`
-            and :attr:`v` are not int, they should have the same length.
+        u : node IDs
+            The source node IDs of the edges. The allowed formats are:
+
+            * ``int``: A single node.
+            * Int Tensor: Each element is a node ID. The tensor must have the same device type
+              and ID data type as the graph's.
+            * iterable[int]: Each element is a node ID.
+
+        v : node IDs
+            The destination node IDs of the edges. The allowed formats are:
+
+            * ``int``: A single node.
+            * Int Tensor: Each element is a node ID. The tensor must have the same device type
+              and ID data type as the graph's.
+            * iterable[int]: Each element is a node ID.
        force_multi : bool, optional
-            Deprecated, use :attr:`return_uv` instead. Whether to allow the graph to be a
+            **DEPRECATED**, use :attr:`return_uv` instead. Whether to allow the graph to be a
            multigraph, i.e. there can be multiple edges from one node to another.
        return_uv : bool, optional
            Whether to return the source and destination node IDs along with the edges. If
            False (default), it assumes that the graph is a simple graph and there is only
            one edge from one node to another. If True, there can be multiple edges found
            from one node to another.
-        etype : str or tuple of str, optional
-            The edge type for query, which can be an edge type (str) or a canonical edge type
-            (3-tuple of str). When an edge type appears in multiple canonical edge types, one
-            must use a canonical edge type. If the graph has multiple edge types, one must
-            specify the argument. Otherwise, it can be omitted.
+        etype : str or (str, str, str), optional
+            The type names of the edges. The allowed type name formats are:
+
+            * ``(str, str, str)`` for source node type, edge type and destination node type.
+            * or one ``str`` edge type name if the name can uniquely identify a
+              triplet format in the graph.
+
+            Can be omitted if the graph has only one type of edges.

        Returns
        -------
-        tensor, or (tensor, tensor, tensor)
+        Tensor, or (Tensor, Tensor, Tensor)

-            * If ``return_uv=False``, it returns a 1D tensor that contains the IDs of the edges.
-              If :attr:`u` and :attr:`v` are int, the tensor has length 1. Otherwise, the i-th
-              element of the tensor is the ID of the edge ``(u[i], v[i])``.
+            * If ``return_uv=False``, it returns the edge IDs in a tensor, where the i-th
+              element is the ID of the edge ``(u[i], v[i])``.
            * If ``return_uv=True``, it returns a tuple of three 1D tensors ``(eu, ev, e)``.
              ``e[i]`` is the ID of an edge from ``eu[i]`` to ``ev[i]``. It returns all edges
-              from ``eu[i]`` to ``ev[i]`` in this case.
+              (including parallel edges) from ``eu[i]`` to ``ev[i]`` in this case.

        Notes
        -----
@@ -2840,34 +2781,35 @@ class DGLHeteroGraph(object):
            return F.as_scalar(eid) if is_int else eid

    def find_edges(self, eid, etype=None):
-        """Return the source and destination node(s) of some particular edge(s)
-        with the specified edge type.
+        """Return the source and destination node ID(s) given the edge ID(s).

        Parameters
        ----------
        eid : edge ID(s)
-            The IDs of the edges for query. The function expects that :attr:`eid` contains
-            valid edge IDs only, i.e. among consecutive integers :math:`0, 1, ... E - 1`, where
-            :math:`E` is the number of edges with the specified edge type.
-
-            - ``int``: An edge ID for query.
-            - ``Tensor``: A 1D tensor that contains the edge IDs for query, whose data
-              type and device should be the same as the :py:attr:`idtype` and device of the graph.
-            - ``iterable[int]``: Similar to the tensor, but stores edge IDs in a sequence
-              (e.g. list, tuple, numpy.ndarray).
-        etype : str or tuple of str, optional
-            The edge type for query, which can be an edge type (str) or a canonical edge type
-            (3-tuple of str). When an edge type appears in multiple canonical edge types, one
-            must use a canonical edge type.
+            The edge IDs. The allowed formats are:
+
+            * ``int``: A single ID.
+            * Int Tensor: Each element is an ID. The tensor must have the same device type
+              and ID data type as the graph's.
+            * iterable[int]: Each element is an ID.
+
+        etype : str or (str, str, str), optional
+            The type names of the edges. The allowed type name formats are:
+
+            * ``(str, str, str)`` for source node type, edge type and destination node type.
+            * or one ``str`` edge type name if the name can uniquely identify a
+              triplet format in the graph.
+
+            Can be omitted if the graph has only one type of edges.

        Returns
        -------
        Tensor
-            The source node IDs of the edges, whose i-th element is the source node of the edge
-            with ID ``eid[i]``.
+            The source node IDs of the edges. The i-th element is the source node ID of
+            the i-th edge.
        Tensor
-            The destination node IDs of the edges, whose i-th element is the destination node of
-            the edge with ID ``eid[i]``.
+            The destination node IDs of the edges. The i-th element is the destination node
+            ID of the i-th edge.

        Examples
        --------
@@ -2910,20 +2852,19 @@ class DGLHeteroGraph(object):
        return src, dst

    def in_edges(self, v, form='uv', etype=None):
-        """Return the incoming edges of some particular node(s) with the specified edge type.
+        """Return the incoming edges of the given nodes.

        Parameters
        ----------
-        v : destination node ID(s)
-            The destination node(s) for query. The allowed formats are:
+        v : node ID(s)
+            The node IDs. The allowed formats are:

-            - ``int``: The destination node for query.
-            - ``Tensor``: A 1D tensor that contains the destination node(s) for query, whose data
-              type and device should be the same as the :py:attr:`idtype` and device of the graph.
-            - ``iterable[int]``: Similar to the tensor, but stores node IDs in a sequence
-              (e.g. list, tuple, numpy.ndarray).
+            * ``int``: A single node.
+            * Int Tensor: Each element is a node ID. The tensor must have the same device type
+              and ID data type as the graph's.
+            * iterable[int]: Each element is a node ID.
        form : str, optional
-            The return form, which can be one of the following:
+            The result format, which can be one of the following:

            - ``'eid'``: The returned result is a 1D tensor :math:`EID`, representing
              the IDs of all edges.
@@ -2933,11 +2874,14 @@ class DGLHeteroGraph(object):
            - ``'all'``: The returned result is a 3-tuple of 1D tensors :math:`(U, V, EID)`,
              representing the source nodes, destination nodes and IDs of all edges.
              For each :math:`i`, :math:`(U[i], V[i])` forms an edge with ID :math:`EID[i]`.
-        etype : str or tuple of str, optional
-            The edge type for query, which can be an edge type (str) or a canonical edge type
-            (3-tuple of str). When an edge type appears in multiple canonical edge types, one
-            must use a canonical edge type. If the graph has multiple edge types, one must
-            specify the argument. Otherwise, it can be omitted.
+        etype : str or (str, str, str), optional
+            The type names of the edges. The allowed type name formats are:
+
+            * ``(str, str, str)`` for source node type, edge type and destination node type.
+            * or one ``str`` edge type name if the name can uniquely identify a
+              triplet format in the graph.
+
+            Can be omitted if the graph has only one type of edges.

        Returns
        -------
@@ -2992,18 +2936,17 @@ class DGLHeteroGraph(object):
            raise DGLError('Invalid form: {}. Must be "all", "uv" or "eid".'.format(form))

    def out_edges(self, u, form='uv', etype=None):
-        """Return the outgoing edges of some particular node(s) with the specified edge type.
+        """Return the outgoing edges of the given nodes.

        Parameters
        ----------
-        u : source node ID(s)
-            The source node(s) for query. The allowed formats are:
+        u : node ID(s)
+            The node IDs. The allowed formats are:

-            - ``int``: The source node for query.
-            - ``Tensor``: A 1D tensor that contains the source node(s) for query, whose data
-              type and device should be the same as the :py:attr:`idtype` and device of the graph.
-            - ``iterable[int]``: Similar to the tensor, but stores node IDs in a sequence
-              (e.g. list, tuple, numpy.ndarray).
+            * ``int``: A single node.
+            * Int Tensor: Each element is a node ID. The tensor must have the same device type
+              and ID data type as the graph's.
+            * iterable[int]: Each element is a node ID.
        form : str, optional
            The return form, which can be one of the following:

@@ -3015,11 +2958,14 @@ class DGLHeteroGraph(object):
            - ``'all'``: The returned result is a 3-tuple of 1D tensors :math:`(U, V, EID)`,
              representing the source nodes, destination nodes and IDs of all edges.
              For each :math:`i`, :math:`(U[i], V[i])` forms an edge with ID :math:`EID[i]`.
-        etype : str or tuple of str, optional
-            The edge type for query, which can be an edge type (str) or a canonical edge type
-            (3-tuple of str). When an edge type appears in multiple canonical edge types, one
-            must use a canonical edge type. If the graph has multiple edge types, one must
-            specify the argument. Otherwise, it can be omitted.
+        etype : str or (str, str, str), optional
+            The type names of the edges. The allowed type name formats are:
+
+            * ``(str, str, str)`` for source node type, edge type and destination node type.
+            * or one ``str`` edge type name if the name can uniquely identify a
+              triplet format in the graph.
+
+            Can be omitted if the graph has only one type of edges.

        Returns
        -------
@@ -3159,43 +3105,41 @@ class DGLHeteroGraph(object):
    def in_degree(self, v, etype=None):
        """Return the in-degree of node ``v`` with edges of type ``etype``.

-        DEPRECATED: Please use in_degrees
+        **DEPRECATED**: Please use in_degrees
        """
        dgl_warning("DGLGraph.in_degree is deprecated. Please use DGLGraph.in_degrees")
        return self.in_degrees(v, etype)

    def in_degrees(self, v=ALL, etype=None):
-        """Return the in-degree(s) of some particular node(s) with the specified edge type.
+        """Return the in-degree(s) of the given nodes.
+
+        It computes the in-degree(s) w.r.t. to the edges of the given edge type.

        Parameters
        ----------
-        v : destination node ID(s), optional
-            The destination node(s) for query. The allowed formats are:
+        v : node IDs
+            The node IDs. The allowed formats are:

-            - ``int``: The destination node for query.
-            - ``Tensor``: A 1D tensor that contains the destination node(s) for query, whose data
-              type and device should be the same as the :py:attr:`idtype` and device of the graph.
-            - ``iterable[int]``: Similar to the tensor, but stores node IDs in a sequence
-              (e.g. list, tuple, numpy.ndarray).
+            * ``int``: A single node.
+            * Int Tensor: Each element is a node ID. The tensor must have the same device type
+              and ID data type as the graph's.
+            * iterable[int]: Each element is a node ID.

-            By default, it considers all nodes.
-        etype : str or tuple of str, optional
-            The edge type for query, which can be an edge type (str) or a canonical edge type
-            (3-tuple of str). When an edge type appears in multiple canonical edge types, one
-            must use a canonical edge type. If the graph has multiple edge types, one must
-            specify the argument. Otherwise, it can be omitted.
+            If not given, return the in-degrees of all the nodes.
+        etype : str or (str, str, str), optional
+            The type name of the edges. The allowed type name formats are:
+
+            * ``(str, str, str)`` for source node type, edge type and destination node type.
+            * or one ``str`` edge type name if the name can uniquely identify a
+              triplet format in the graph.
+
+            Can be omitted if the graph has only one type of edges.

        Returns
        -------
-        tensor or int
-            The in-degree(s) of the node(s).
-
-            - If :attr:`v` is an ``int`` object, the return result will be an ``int``
-              object as well.
-            - If :attr:`v` is a ``Tensor`` or ``iterable[int]`` object, the return result
-              will be a 1D ``Tensor``. The data type of the result will be the same as the
-              idtype of the graph. The i-th element of the tensor is the in-degree of the
-              node ``v[i]``.
+        int or Tensor
+            The in-degree(s) of the node(s) in a Tensor. The i-th element is the in-degree
+            of the i-th input node. If :attr:`v` is an ``int``, return an ``int`` too.

        Examples
        --------
@@ -3251,36 +3195,35 @@ class DGLHeteroGraph(object):
        return self.out_degrees(u, etype)

    def out_degrees(self, u=ALL, etype=None):
-        """Return the out-degree(s) of some particular node(s) with the specified edge type.
+        """Return the out-degree(s) of the given nodes.
+
+        It computes the out-degree(s) w.r.t. to the edges of the given edge type.

        Parameters
        ----------
-        u : source node ID(s), optional
+        u : node IDs
+            The node IDs. The allowed formats are:

-            - ``int``: The source node for query.
-            - ``Tensor``: A 1D tensor that contains the source node(s) for query, whose data
-              type and device should be the same as the :py:attr:`idtype` and device of the graph.
-            - ``iterable[int]``: Similar to the tensor, but stores node IDs in a sequence
-              (e.g. list, tuple, numpy.ndarray).
+            * ``int``: A single node.
+            * Int Tensor: Each element is a node ID. The tensor must have the same device type
+              and ID data type as the graph's.
+            * iterable[int]: Each element is a node ID.

-            By default, it considers all nodes.
-        etype : str or tuple of str, optional
-            The edge type for query, which can be an edge type (str) or a canonical edge type
-            (3-tuple of str). When an edge type appears in multiple canonical edge types, one
-            must use a canonical edge type. If the graph has multiple edge types, one must
-            specify the argument. Otherwise, it can be omitted.
+            If not given, return the in-degrees of all the nodes.
+        etype : str or (str, str, str), optional
+            The type names of the edges. The allowed type name formats are:
+
+            * ``(str, str, str)`` for source node type, edge type and destination node type.
+            * or one ``str`` edge type name if the name can uniquely identify a
+              triplet format in the graph.
+
+            Can be omitted if the graph has only one type of edges.

        Returns
        -------
-        tensor or int
-            The out-degree(s) of the node(s).
-
-            - If :attr:`u` is an ``int`` object, the return result will be an ``int``
-              object as well.
-            - If :attr:`u` is a ``Tensor`` or ``iterable[int]`` object, the return result
-              will be a 1D ``Tensor``. The data type of the result will be the same as the
-              idtype of the graph. The i-th element of the tensor is the out-degree of the
-              node ``v[i]``.
+        int or Tensor
+            The out-degree(s) of the node(s) in a Tensor. The i-th element is the out-degree
+            of the i-th input node. If :attr:`v` is an ``int``, return an ``int`` too.

        Examples
        --------
@@ -3330,7 +3273,7 @@ class DGLHeteroGraph(object):
            return deg

    def adjacency_matrix(self, transpose=True, ctx=F.cpu(), scipy_fmt=None, etype=None):
-        """Alias of :func:`adj`"""
+        """Alias of :meth:`adj`"""
        return self.adj(transpose, ctx, scipy_fmt, etype)

    def adj(self, transpose=True, ctx=F.cpu(), scipy_fmt=None, etype=None):
@@ -3351,12 +3294,15 @@ class DGLHeteroGraph(object):
        scipy_fmt : str, optional
            If specified, return a scipy sparse matrix in the given format.
            Otherwise, return a backend dependent sparse tensor. (Default: None)
-        etype : str or tuple of str, optional
-            The edge type for query, which can be an edge type (str) or a canonical edge type
-            (3-tuple of str). When an edge type appears in multiple canonical edge types, one
-            must use a canonical edge type. If given, it returns the number of edges for a
-            particular edge type. If not given (default), it returns the total number of edges
-            of all types.
+        etype : str or (str, str, str), optional
+            The type names of the edges. The allowed type name formats are:
+
+            * ``(str, str, str)`` for source node type, edge type and destination node type.
+            * or one ``str`` edge type name if the name can uniquely identify a
+              triplet format in the graph.
+
+            Can be omitted if the graph has only one type of edges.
+

        Returns
        -------
@@ -3398,6 +3344,7 @@ class DGLHeteroGraph(object):
        else:
            return self._graph.adjacency_matrix_scipy(etid, transpose, scipy_fmt, False)

+
    def adjacency_matrix_scipy(self, transpose=True, fmt='csr', return_edge_ids=None):
        """DEPRECATED: please use ``dgl.adjacency_matrix(transpose, scipy_fmt=fmt)``.
        """
@@ -3407,10 +3354,6 @@ class DGLHeteroGraph(object):

        return self.adjacency_matrix(transpose=transpose, scipy_fmt=fmt)

-    def incidence_matrix(self, typestr, ctx=F.cpu(), etype=None):
-        """Alias of :func:`inc`"""
-        return self.inc(typestr, ctx, etype)
-
    def inc(self, typestr, ctx=F.cpu(), etype=None):
        """Return the incidence matrix representation of edges with the given
        edge type.
@@ -3446,12 +3389,14 @@ class DGLHeteroGraph(object):
            Can be either ``in``, ``out`` or ``both``
        ctx : context, optional
            The context of returned incidence matrix. (Default: cpu)
-        etype : str or tuple of str, optional
-            The edge type for query, which can be an edge type (str) or a canonical edge type
-            (3-tuple of str). When an edge type appears in multiple canonical edge types, one
-            must use a canonical edge type. If given, it returns the number of edges for a
-            particular edge type. If not given (default), it returns the total number of edges
-            of all types.
+        etype : str or (str, str, str), optional
+            The type names of the edges. The allowed type name formats are:
+
+            * ``(str, str, str)`` for source node type, edge type and destination node type.
+            * or one ``str`` edge type name if the name can uniquely identify a
+              triplet format in the graph.
+
+            Can be omitted if the graph has only one type of edges.

        Returns
        -------
@@ -3485,6 +3430,8 @@ class DGLHeteroGraph(object):
        etid = self.get_etype_id(etype)
        return self._graph.incidence_matrix(etid, typestr, ctx)[0]

+    incidence_matrix = inc
+
    #################################################################
    # Features
    #################################################################
@@ -3497,8 +3444,8 @@ class DGLHeteroGraph(object):
        Parameters
        ----------
        ntype : str, optional
-            The node type for query. If the graph has multiple node types, one must
-            specify the argument. Otherwise, it can be omitted.
+            The node type name. Can be omitted if there is only one type of nodes
+            in the graph.

        Returns
        -------
@@ -3544,11 +3491,15 @@ class DGLHeteroGraph(object):

        Parameters
        ----------
-        etype : str or tuple of str, optional
-            The edge type for query, which can be an edge type (str) or a canonical edge type
-            (3-tuple of str). When an edge type appears in multiple canonical edge types, one
-            must use a canonical edge type. If the graph has multiple edge types, one must
-            specify the argument. Otherwise, it can be omitted.
+        etype : str or (str, str, str), optional
+            The type names of the edges. The allowed type name formats are:
+
+            * ``(str, str, str)`` for source node type, edge type and destination node type.
+            * or one ``str`` edge type name if the name can uniquely identify a
+              triplet format in the graph.
+
+            Can be omitted if the graph has only one type of edges.
+

        Returns
        -------
@@ -3614,9 +3565,7 @@ class DGLHeteroGraph(object):
            The name of the feature that the initializer applies. If not given, the
            initializer applies to all features.
        ntype : str, optional
-            The type of the nodes that the initializer applies. If the graph has
-            multiple node types, one must specify the argument. Otherwise, it can
-            be omitted.
+            The type name of the nodes. Can be omitted if the graph has only one type of nodes.

        Notes
        -----
@@ -3703,11 +3652,15 @@ class DGLHeteroGraph(object):
        field : str, optional
            The name of the feature that the initializer applies. If not given, the
            initializer applies to all features.
-        etype : str or tuple of str, optional
-            The edge type for query, which can be an edge type (str) or a canonical edge type
-            (3-tuple of str). When an edge type appears in multiple canonical edge types, one
-            must use a canonical edge type. If the graph has multiple edge types, one must
-            specify the argument. Otherwise, it can be omitted.
+        etype : str or (str, str, str), optional
+            The type names of the edges. The allowed type name formats are:
+
+            * ``(str, str, str)`` for source node type, edge type and destination node type.
+            * or one ``str`` edge type name if the name can uniquely identify a
+              triplet format in the graph.
+
+            Can be omitted if the graph has only one type of edges.
+

        Notes
        -----
@@ -3945,27 +3898,50 @@ class DGLHeteroGraph(object):
    #################################################################

    def apply_nodes(self, func, v=ALL, ntype=None, inplace=False):
-        """Apply the function on the nodes with the same type to update their
-        features.
-
-        If None is provided for ``func``, nothing will happen.
+        """Update the features of the specified nodes by the provided function.

        Parameters
        ----------
-        func : callable or None
-            Apply function on the nodes. The function should be
-            a :mod:`Node UDF <dgl.udf>`.
-        v : int or iterable of int or tensor, optional
-            The (type-specific) node (ids) on which to apply ``func``. (Default: ALL)
+        func : callable
+            The function to update node features. It must be
+            a :ref:`apiudf`.
+        v : node IDs
+            The node IDs. The allowed formats are:
+
+            * ``int``: A single node.
+            * Int Tensor: Each element is a node ID. The tensor must have the same device type
+              and ID data type as the graph's.
+            * iterable[int]: Each element is a node ID.
+
+            If not given (default), use all the nodes in the graph.
        ntype : str, optional
-            The node type. Can be omitted if there is only one node type
-            in the graph. (Default: None)
+            The node type name. Can be omitted if there is
+            only one type of nodes in the graph.
        inplace : bool, optional
-            **DEPRECATED**. If True, update will be done in place, but autograd will break.
-            (Default: False)
+            **DEPRECATED**.

        Examples
        --------
+
+        The following example uses PyTorch backend.
+
+        >>> import dgl
+        >>> import torch
+
+        **Homogeneous graph**
+
+        >>> g = dgl.graph(([0, 1, 2, 3], [1, 2, 3, 4]))
+        >>> g.ndata['h'] = torch.ones(5, 2)
+        >>> g.apply_nodes(lambda nodes: {'x' : nodes.data['h'] * 2})
+        >>> g.ndata['x']
+        tensor([[2., 2.],
+                [2., 2.],
+                [2., 2.],
+                [2., 2.],
+                [2., 2.]])
+
+        **Heterogeneous graph**
+
        >>> g = dgl.heterograph({('user', 'follows', 'user'): ([0, 1], [1, 2])})
        >>> g.nodes['user'].data['h'] = torch.ones(3, 5)
        >>> g.apply_nodes(lambda nodes: {'h': nodes.data['h'] * 2}, ntype='user')
@@ -3990,27 +3966,76 @@ class DGLHeteroGraph(object):
        self._set_n_repr(ntid, v, ndata)

    def apply_edges(self, func, edges=ALL, etype=None, inplace=False):
-        """Apply the function on the edges with the same type to update their
-        features.
-
-        If None is provided for ``func``, nothing will happen.
+        """Update the features of the specified edges by the provided function.

        Parameters
        ----------
-        func : callable
-            Apply function on the edge. The function should be
-            an :mod:`Edge UDF <dgl.udf>`.
-        edges : optional
-            Edges on which to apply ``func``. See :func:`send` for valid
-            edge specification. (Default: ALL)
-        etype : str or tuple of str, optional
-            The edge type. Can be omitted if there is only one edge type
-            in the graph. (Default: None)
+        func : dgl.function.BuiltinFunction or callable
+            The function to generate new edge features. It must be either
+            a :ref:`api-built-in` or a :ref:`apiudf`.
+        edges : edges
+            The edges to update features on. The allowed input formats are:
+
+            * ``int``: A single edge ID.
+            * Int Tensor: Each element is an edge ID.  The tensor must have the same device type
+              and ID data type as the graph's.
+            * iterable[int]: Each element is an edge ID.
+            * (Tensor, Tensor): The node-tensors format where the i-th elements
+              of the two tensors specify an edge.
+            * (iterable[int], iterable[int]): Similar to the node-tensors format but
+              stores edge endpoints in python iterables.
+
+            Default value specifies all the edges in the graph.
+
+        etype : str or (str, str, str), optional
+            The type name of the edges. The allowed type name formats are:
+
+            * ``(str, str, str)`` for source node type, edge type and destination node type.
+            * or one ``str`` edge type name if the name can uniquely identify a
+              triplet format in the graph.
+
+            Can be omitted if the graph has only one type of edges.
+
        inplace: bool, optional
-            **DEPRECATED**. Must be False.
+            **DEPRECATED**.
+
+        Notes
+        -----
+        DGL recommends using DGL's bulit-in function for the :attr:`func` argument,
+        because DGL will invoke efficient kernels that avoids copying node features to
+        edge features in this case.

        Examples
        --------
+
+        The following example uses PyTorch backend.
+
+        >>> import dgl
+        >>> import torch
+
+        **Homogeneous graph**
+
+        >>> g = dgl.graph(([0, 1, 2, 3], [1, 2, 3, 4]))
+        >>> g.ndata['h'] = torch.ones(5, 2)
+        >>> g.apply_edges(lambda edges: {'x' : edges.src['h'] + edges.dst['h']})
+        >>> g.edata['x']
+        tensor([[2., 2.],
+                [2., 2.],
+                [2., 2.],
+                [2., 2.]])
+
+        Use built-in function
+
+        >>> import dgl.function as fn
+        >>> g.apply_edges(fn.u_add_v('h', 'h', 'x'))
+        >>> g.edata['x']
+        tensor([[2., 2.],
+                [2., 2.],
+                [2., 2.],
+                [2., 2.]])
+
+        **Heterogeneous graph**
+
        >>> g = dgl.heterograph({('user', 'plays', 'game'): ([0, 1, 1, 2], [0, 0, 2, 1])})
        >>> g.edges[('user', 'plays', 'game')].data['h'] = torch.ones(4, 5)
        >>> g.apply_edges(lambda edges: {'h': edges.data['h'] * 2})
@@ -4048,40 +4073,50 @@ class DGLHeteroGraph(object):
                      apply_node_func=None,
                      etype=None,
                      inplace=False):
-        """Send messages along edges of the specified type, and let destinations
-        receive them.
-
-        Optionally, apply a function to update the node features after "receive".
-
-        This is a convenient combination for performing
-        :mod:`send <dgl.DGLHeteroGraph.send>` along the ``edges`` and
-        :mod:`recv <dgl.DGLHeteroGraph.recv>` for the destinations of the ``edges``.
-
-        **Only works if the graph has one edge type.**  For multiple types, use
-
-        .. code::
-
-           g['edgetype'].send_and_recv(edges, message_func, reduce_func,
-                                       apply_node_func, inplace=inplace)
+        """Send messages along the specified edges and reduce them on
+        the destination nodes to update their features.

        Parameters
        ----------
-        edges : See :func:`send` for valid edge specification.
-            Edges on which to apply ``func``.
-        message_func : callable
-            Message function on the edges. The function should be
-            an :mod:`Edge UDF <dgl.udf>`.
-        reduce_func : callable
-            Reduce function on the node. The function should be
-            a :mod:`Node UDF <dgl.udf>`.
+        edges : edges
+            The edges to send and receive messages on. The allowed input formats are:
+
+            * ``int``: A single edge ID.
+            * Int Tensor: Each element is an edge ID.  The tensor must have the same device type
+              and ID data type as the graph's.
+            * iterable[int]: Each element is an edge ID.
+            * (Tensor, Tensor): The node-tensors format where the i-th elements
+              of the two tensors specify an edge.
+            * (iterable[int], iterable[int]): Similar to the node-tensors format but
+              stores edge endpoints in python iterables.
+
+        message_func : dgl.function.BuiltinFunction or callable
+            The message function to generate messages along the edges.
+            It must be either a :ref:`api-built-in` or a :ref:`apiudf`.
+        reduce_func : dgl.function.BuiltinFunction or callable
+            The reduce function to aggregate the messages.
+            It must be either a :ref:`api-built-in` or a :ref:`apiudf`.
        apply_node_func : callable, optional
-            Apply function on the nodes. The function should be
-            a :mod:`Node UDF <dgl.udf>`. (Default: None)
-        etype : str or tuple of str, optional
-            The edge type. Can be omitted if there is only one edge type
-            in the graph. (Default: None)
+            An optional apply function to further update the node features
+            after the message reduction. It must be a :ref:`apiudf`.
+        etype : str or (str, str, str), optional
+            The type name of the edges. The allowed type name formats are:
+
+            * ``(str, str, str)`` for source node type, edge type and destination node type.
+            * or one ``str`` edge type name if the name can uniquely identify a
+              triplet format in the graph.
+
+            Can be omitted if the graph has only one type of edges.
+
        inplace: bool, optional
-            **DEPRECATED**. Must be False.
+            **DEPRECATED**.
+
+        Notes
+        -----
+        DGL recommends using DGL's bulit-in function for the :attr:`message_func`
+        and the :attr:`reduce_func` arguments,
+        because DGL will invoke efficient kernels that avoids copying node features to
+        edge features in this case.

        Examples
        --------
@@ -4090,6 +4125,29 @@ class DGLHeteroGraph(object):
        >>> import dgl.function as fn
        >>> import torch

+        **Homogeneous graph**
+
+        >>> g = dgl.graph(([0, 1, 2, 3], [1, 2, 3, 4]))
+        >>> g.ndata['x'] = torch.ones(5, 2)
+        >>> # Specify edges using (Tensor, Tensor).
+        >>> g.send_and_recv(([1, 2], [2, 3]), fn.copy_u('x', 'm'), fn.sum('m', 'h'))
+        >>> g.ndata['h']
+        tensor([[0., 0.],
+                [0., 0.],
+                [1., 1.],
+                [1., 1.],
+                [0., 0.]])
+        >>> # Specify edges using IDs.
+        >>> g.send_and_recv([0, 2, 3], fn.copy_u('x', 'm'), fn.sum('m', 'h'))
+        >>> g.ndata['h']
+        tensor([[0., 0.],
+                [1., 1.],
+                [0., 0.],
+                [1., 1.],
+                [1., 1.]])
+
+        **Heterogeneous graph**
+
        >>> g = dgl.heterograph({
        ...     ('user', 'follows', 'user'): ([0, 1], [1, 2]),
        ...     ('user', 'plays', 'game'): ([0, 1, 1, 2], [0, 0, 1, 1])
@@ -4128,45 +4186,50 @@ class DGLHeteroGraph(object):
             apply_node_func=None,
             etype=None,
             inplace=False):
-        """Pull messages from the node(s)' predecessors and then update their features.
-
-        Optionally, apply a function to update the node features after receive.
-
-        This is equivalent to :mod:`send_and_recv <dgl.DGLHeteroGraph.send_and_recv>`
-        on the incoming edges of ``v`` with the specified type.
-
-        Other notes:
-
-        * `reduce_func` will be skipped for nodes with no incoming messages.
-        * If all ``v`` have no incoming message, this will downgrade to an :func:`apply_nodes`.
-        * If some ``v`` have no incoming message, their new feature value will be calculated
-          by the column initializer (see :func:`set_n_initializer`). The feature shapes and
-          dtypes will be inferred.
-
-        **Only works if the graph has one edge type.** For multiple types, use
-
-        .. code::
-
-           g['edgetype'].pull(v, message_func, reduce_func, apply_node_func, inplace=inplace)
+        """Pull messages from the specified node(s)' predecessors along the
+        specified edge type, aggregate them to update the node features.

        Parameters
        ----------
-        v : int, container or tensor, optional
-            The node(s) to be updated.
-        message_func : callable
-            Message function on the edges. The function should be
-            an :mod:`Edge UDF <dgl.udf>`.
-        reduce_func : callable
-            Reduce function on the node. The function should be
-            a :mod:`Node UDF <dgl.udf>`.
+        v : node IDs
+            The node IDs. The allowed formats are:
+
+            * ``int``: A single node.
+            * Int Tensor: Each element is a node ID. The tensor must have the same device type
+              and ID data type as the graph's.
+            * iterable[int]: Each element is a node ID.
+
+        message_func : dgl.function.BuiltinFunction or callable
+            The message function to generate messages along the edges.
+            It must be either a :ref:`api-built-in` or a :ref:`apiudf`.
+        reduce_func : dgl.function.BuiltinFunction or callable
+            The reduce function to aggregate the messages.
+            It must be either a :ref:`api-built-in` or a :ref:`apiudf`.
        apply_node_func : callable, optional
-            Apply function on the nodes. The function should be
-            a :mod:`Node UDF <dgl.udf>`. (Default: None)
-        etype : str or tuple of str, optional
-            The edge type. Can be omitted if there is only one edge type
-            in the graph. (Default: None)
+            An optional apply function to further update the node features
+            after the message reduction. It must be a :ref:`apiudf`.
+        etype : str or (str, str, str), optional
+            The type name of the edges. The allowed type name formats are:
+
+            * ``(str, str, str)`` for source node type, edge type and destination node type.
+            * or one ``str`` edge type name if the name can uniquely identify a
+              triplet format in the graph.
+
+            Can be omitted if the graph has only one type of edges.
+
        inplace: bool, optional
-            **DEPRECATED**. Must be False.
+            **DEPRECATED**.
+
+        Notes
+        -----
+        * If some of the given nodes :attr:`v` has no in-edges, DGL does not invoke
+          message and reduce functions for these nodes and fill their aggregated messages
+          with zero. Users can control the filled values via :meth:`set_n_initializer`.
+          DGL still invokes :attr:`apply_node_func` if provided.
+        * DGL recommends using DGL's bulit-in function for the :attr:`message_func`
+          and the :attr:`reduce_func` arguments,
+          because DGL will invoke efficient kernels that avoids copying node features to
+          edge features in this case.

        Examples
        --------
@@ -4175,7 +4238,19 @@ class DGLHeteroGraph(object):
        >>> import dgl.function as fn
        >>> import torch

-        Instantiate a heterograph.
+        **Homogeneous graph**
+
+        >>> g = dgl.graph(([0, 1, 2, 3], [1, 2, 3, 4]))
+        >>> g.ndata['x'] = torch.ones(5, 2)
+        >>> g.pull([0, 3, 4], fn.copy_u('x', 'm'), fn.sum('m', 'h'))
+        >>> g.ndata['h']
+        tensor([[0., 0.],
+                [0., 0.],
+                [0., 0.],
+                [1., 1.],
+                [1., 1.]])
+
+        **Heterogeneous graph**

        >>> g = dgl.heterograph({
        ...     ('user', 'follows', 'user'): ([0, 1], [1, 2]),
@@ -4214,36 +4289,46 @@ class DGLHeteroGraph(object):
             apply_node_func=None,
             etype=None,
             inplace=False):
-        """Send message from the node(s) to their successors and update them.
-
-        This is equivalent to performing
-        :mod:`send_and_recv <DGLHeteroGraph.send_and_recv>` along the outbound
-        edges from ``u``.
-
-        **Only works if the graph has one edge type.** For multiple types, use
-
-        .. code::
-
-           g['edgetype'].push(u, message_func, reduce_func, apply_node_func, inplace=inplace)
+        """Send message from the specified node(s) to their successors
+        along the specified edge type and update their node features.

        Parameters
        ----------
-        u : int, container or tensor
-            The node(s) to push out messages.
-        message_func : callable
-            Message function on the edges. The function should be
-            an :mod:`Edge UDF <dgl.udf>`.
-        reduce_func : callable
-            Reduce function on the node. The function should be
-            a :mod:`Node UDF <dgl.udf>`.
+        v : node IDs
+            The node IDs. The allowed formats are:
+
+            * ``int``: A single node.
+            * Int Tensor: Each element is a node ID. The tensor must have the same device type
+              and ID data type as the graph's.
+            * iterable[int]: Each element is a node ID.
+
+        message_func : dgl.function.BuiltinFunction or callable
+            The message function to generate messages along the edges.
+            It must be either a :ref:`api-built-in` or a :ref:`apiudf`.
+        reduce_func : dgl.function.BuiltinFunction or callable
+            The reduce function to aggregate the messages.
+            It must be either a :ref:`api-built-in` or a :ref:`apiudf`.
        apply_node_func : callable, optional
-            Apply function on the nodes. The function should be
-            a :mod:`Node UDF <dgl.udf>`. (Default: None)
-        etype : str, optional
-            The edge type. Can be omitted if there is only one edge type
-            in the graph. (Default: None)
+            An optional apply function to further update the node features
+            after the message reduction. It must be a :ref:`apiudf`.
+        etype : str or (str, str, str), optional
+            The type name of the edges. The allowed type name formats are:
+
+            * ``(str, str, str)`` for source node type, edge type and destination node type.
+            * or one ``str`` edge type name if the name can uniquely identify a
+              triplet format in the graph.
+
+            Can be omitted if the graph has only one type of edges.
+
        inplace: bool, optional
-            **DEPRECATED**. Must be False.
+            **DEPRECATED**.
+
+        Notes
+        -----
+        DGL recommends using DGL's bulit-in function for the :attr:`message_func`
+        and the :attr:`reduce_func` arguments,
+        because DGL will invoke efficient kernels that avoids copying node features to
+        edge features in this case.

        Examples
        --------
@@ -4252,7 +4337,19 @@ class DGLHeteroGraph(object):
        >>> import dgl.function as fn
        >>> import torch

-        Instantiate a heterograph.
+        **Homogeneous graph**
+
+        >>> g = dgl.graph(([0, 1, 2, 3], [1, 2, 3, 4]))
+        >>> g.ndata['x'] = torch.ones(5, 2)
+        >>> g.push([0, 1], fn.copy_u('x', 'm'), fn.sum('m', 'h'))
+        >>> g.ndata['h']
+        tensor([[0., 0.],
+                [1., 1.],
+                [1., 1.],
+                [0., 0.],
+                [0., 0.]])
+
+        **Heterogeneous graph**

        >>> g = dgl.heterograph({('user', 'follows', 'user'): ([0, 0], [1, 2])})
        >>> g.nodes['user'].data['h'] = torch.tensor([[0.], [1.], [2.]])
@@ -4275,42 +4372,59 @@ class DGLHeteroGraph(object):
                   reduce_func,
                   apply_node_func=None,
                   etype=None):
-        """Send messages through all edges and update all nodes.
-
-        Optionally, apply a function to update the node features after receive.
-
-        This is equivalent to
-        :mod:`send_and_recv <dgl.DGLHeteroGraph.send_and_recv>` over all edges
-        of the specified type.
-
-        **Only works if the graph has one edge type.** For multiple types, use
-
-        .. code::
-
-           g['edgetype'].update_all(message_func, reduce_func, apply_node_func)
+        """Send messages along all the edges of the specified type
+        and update all the nodes of the corresponding destination type.

        Parameters
        ----------
-        message_func : callable
-            Message function on the edges. The function should be
-            an :mod:`Edge UDF <dgl.udf>`.
-        reduce_func : callable
-            Reduce function on the node. The function should be
-            a :mod:`Node UDF <dgl.udf>`.
+        message_func : dgl.function.BuiltinFunction or callable
+            The message function to generate messages along the edges.
+            It must be either a :ref:`api-built-in` or a :ref:`apiudf`.
+        reduce_func : dgl.function.BuiltinFunction or callable
+            The reduce function to aggregate the messages.
+            It must be either a :ref:`api-built-in` or a :ref:`apiudf`.
        apply_node_func : callable, optional
-            Apply function on the nodes. The function should be
-            a :mod:`Node UDF <dgl.udf>`. (Default: None)
-        etype : str, optional
-            The edge type. Can be omitted if there is only one edge type
-            in the graph. (Default: None)
+            An optional apply function to further update the node features
+            after the message reduction. It must be a :ref:`apiudf`.
+        etype : str or (str, str, str), optional
+            The type name of the edges. The allowed type name formats are:
+
+            * ``(str, str, str)`` for source node type, edge type and destination node type.
+            * or one ``str`` edge type name if the name can uniquely identify a
+              triplet format in the graph.
+
+            Can be omitted if the graph has only one type of edges.
+
+        Notes
+        -----
+        * If some of the nodes in the graph has no in-edges, DGL does not invoke
+          message and reduce functions for these nodes and fill their aggregated messages
+          with zero. Users can control the filled values via :meth:`set_n_initializer`.
+          DGL still invokes :attr:`apply_node_func` if provided.
+        * DGL recommends using DGL's bulit-in function for the :attr:`message_func`
+          and the :attr:`reduce_func` arguments,
+          because DGL will invoke efficient kernels that avoids copying node features to
+          edge features in this case.

        Examples
        --------
-        >>> import torch
        >>> import dgl
        >>> import dgl.function as fn
+        >>> import torch

-        Instantiate a heterograph.
+        **Homogeneous graph**
+
+        >>> g = dgl.graph(([0, 1, 2, 3], [1, 2, 3, 4]))
+        >>> g.ndata['x'] = torch.ones(5, 2)
+        >>> g.update_all(fn.copy_u('x', 'm'), fn.sum('m', 'h'))
+        >>> g.ndata['h']
+        tensor([[0., 0.],
+                [1., 1.],
+                [1., 1.],
+                [1., 1.],
+                [1., 1.]])
+
+        **Heterogeneous graph**

        >>> g = dgl.heterograph({('user', 'follows', 'user'): ([0, 1, 2], [1, 2, 2])})

@@ -4335,35 +4449,48 @@ class DGLHeteroGraph(object):
    #################################################################

    def multi_update_all(self, etype_dict, cross_reducer, apply_node_func=None):
-        r"""Send and receive messages along all edges.
-
-        This is equivalent to
-        :mod:`multi_send_and_recv <dgl.DGLHeteroGraph.multi_send_and_recv>`
-        over all edges.
+        r"""Send messages along all the edges, reduce them by first type-wisely
+        then across different types, and then update the node features of all
+        the nodes.

        Parameters
        ----------
        etype_dict : dict
-            Mapping an edge type (str or tuple of str) to the type specific
-            configuration (3-tuples). Each 3-tuple represents
-            (msg_func, reduce_func, apply_node_func):
-
-            * msg_func: callable
-                  Message function on the edges. The function should be
-                  an :mod:`Edge UDF <dgl.udf>`.
-            * reduce_func: callable
-                  Reduce function on the nodes. The function should be
-                  a :mod:`Node UDF <dgl.udf>`.
+            Arguments for edge-type-wise message passing. The keys are edge types
+            while the values are message passing arguments.
+
+            The allowed key formats are:
+
+            * ``(str, str, str)`` for source node type, edge type and destination node type.
+            * or one ``str`` edge type name if the name can uniquely identify a
+              triplet format in the graph.
+
+            The value must be a tuple ``(message_func, reduce_func, [apply_node_func])``, where
+
+            * message_func : dgl.function.BuiltinFunction or callable
+                The message function to generate messages along the edges.
+                It must be either a :ref:`api-built-in` or a :ref:`apiudf`.
+            * reduce_func : dgl.function.BuiltinFunction or callable
+                The reduce function to aggregate the messages.
+                It must be either a :ref:`api-built-in` or a :ref:`apiudf`.
            * apply_node_func : callable, optional
-                  Apply function on the nodes. The function should be
-                  a :mod:`Node UDF <dgl.udf>`. (Default: None)
+                An optional apply function to further update the node features
+                after the message reduction. It must be a :ref:`apiudf`.
+
        cross_reducer : str
            Cross type reducer. One of ``"sum"``, ``"min"``, ``"max"``, ``"mean"``, ``"stack"``.
-        apply_node_func : callable
-            Apply function on the nodes. The function should be
-            a :mod:`Node UDF <dgl.udf>`. (Default: None)
-        inplace: bool, optional
-            **DEPRECATED**. Must be False.
+        apply_node_func : callable, optional
+            An optional apply function after the messages are reduced both
+            type-wisely and across different types.
+            It must be a :ref:`apiudf`.
+
+        Notes
+        -----
+        DGL recommends using DGL's bulit-in function for the message_func
+        and the reduce_func in the type-wise message passing arguments,
+        because DGL will invoke efficient kernels that avoids copying node features to
+        edge features in this case.
+

        Examples
        --------
@@ -4431,21 +4558,27 @@ class DGLHeteroGraph(object):

        Parameters
        ----------
-        nodes_generator : iterable, each element is a list or a tensor of node ids
-            The generator of node frontiers. It specifies which nodes perform
-            :func:`pull` at each timestep.
-        message_func : callable
-            Message function on the edges. The function should be
-            an :mod:`Edge UDF <dgl.udf>`.
-        reduce_func : callable
-            Reduce function on the node. The function should be
-            a :mod:`Node UDF <dgl.udf>`.
+        nodes_generator : iterable[node IDs]
+            The generator of node frontiers. Each frontier is a set of node IDs
+            stored in Tensor or python iterables.
+            It specifies which nodes perform :func:`pull` at each step.
+        message_func : dgl.function.BuiltinFunction or callable
+            The message function to generate messages along the edges.
+            It must be either a :ref:`api-built-in` or a :ref:`apiudf`.
+        reduce_func : dgl.function.BuiltinFunction or callable
+            The reduce function to aggregate the messages.
+            It must be either a :ref:`api-built-in` or a :ref:`apiudf`.
        apply_node_func : callable, optional
-            Apply function on the nodes. The function should be
-            a :mod:`Node UDF <dgl.udf>`. (Default: None)
-        etype : str, optional
-            The edge type. Can be omitted if there is only one edge type
-            in the graph. (Default: None)
+            An optional apply function to further update the node features
+            after the message reduction. It must be a :ref:`apiudf`.
+        etype : str or (str, str, str), optional
+            The type name of the edges. The allowed type name formats are:
+
+            * ``(str, str, str)`` for source node type, edge type and destination node type.
+            * or one ``str`` edge type name if the name can uniquely identify a
+              triplet format in the graph.
+
+            Can be omitted if the graph has only one type of edges.

        Examples
        --------
@@ -4492,18 +4625,23 @@ class DGLHeteroGraph(object):
        ----------
        edges_generator : generator
            The generator of edge frontiers.
-        message_func : callable
-            Message function on the edges. The function should be
-            an :mod:`Edge UDF <dgl.udf>`.
-        reduce_func : callable
-            Reduce function on the node. The function should be
-            a :mod:`Node UDF <dgl.udf>`.
+        message_func : dgl.function.BuiltinFunction or callable
+            The message function to generate messages along the edges.
+            It must be either a :ref:`api-built-in` or a :ref:`apiudf`.
+        reduce_func : dgl.function.BuiltinFunction or callable
+            The reduce function to aggregate the messages.
+            It must be either a :ref:`api-built-in` or a :ref:`apiudf`.
        apply_node_func : callable, optional
-            Apply function on the nodes. The function should be
-            a :mod:`Node UDF <dgl.udf>`. (Default: None)
-        etype : str, optional
-            The edge type. Can be omitted if there is only one edge type
-            in the graph. (Default: None)
+            An optional apply function to further update the node features
+            after the message reduction. It must be a :ref:`apiudf`.
+        etype : str or (str, str, str), optional
+            The type name of the edges. The allowed type name formats are:
+
+            * ``(str, str, str)`` for source node type, edge type and destination node type.
+            * or one ``str`` edge type name if the name can uniquely identify a
+              triplet format in the graph.
+
+            Can be omitted if the graph has only one type of edges.

        Examples
        --------
@@ -4563,7 +4701,7 @@ class DGLHeteroGraph(object):

        Returns
        -------
-        tensor
+        Tensor
            A 1D tensor that contains the ID(s) of the node(s) that satisfy the predicate.

        Examples
@@ -4630,30 +4768,31 @@ class DGLHeteroGraph(object):
            Its output tensor should be a 1D boolean tensor with
            each element indicating whether the corresponding edge in
            the batch satisfies the predicate.
-        edges : edge ID(s) or edge end nodes, optional
-            The edge(s) for query. The allowed formats are:
+        edges : edges
+            The edges to send and receive messages on. The allowed input formats are:

-            - Tensor: A 1D tensor that contains the IDs of the edge(s) for query, whose data
-              type and device should be the same as the :py:attr:`idtype` and device of the graph.
-            - iterable[int]: Similar to the tensor, but stores edge IDs in a sequence
-              (e.g. list, tuple, numpy.ndarray).
-            - (Tensor, Tensor): A 2-tuple of the source and destination nodes of multiple
-              edges for query. Each tensor is a 1D tensor containing node IDs. DGL calls this
-              format "tuple of node-tensors". The data type and device of the tensors should
-              be the same as the :py:attr:`idtype` and device of the graph.
-            - (iterable[int], iterable[int]): Similar to the tuple of node-tensors format,
-              but stores node IDs in two sequences (e.g. list, tuple, numpy.ndarray).
-
-            By default, it considers all edges.
-        etype : str or tuple of str, optional
-            The edge type for query, which can be an edge type (str) or a canonical edge type
-            (3-tuple of str). When an edge type appears in multiple canonical edge types, one
-            must use a canonical edge type. If the graph has multiple edge types, one must
-            specify the argument. Otherwise, it can be omitted.
+            * ``int``: A single edge ID.
+            * Int Tensor: Each element is an edge ID.  The tensor must have the same device type
+              and ID data type as the graph's.
+            * iterable[int]: Each element is an edge ID.
+            * (Tensor, Tensor): The node-tensors format where the i-th elements
+              of the two tensors specify an edge.
+            * (iterable[int], iterable[int]): Similar to the node-tensors format but
+              stores edge endpoints in python iterables.
+
+            By default, it considers all the edges.
+        etype : str or (str, str, str), optional
+            The type name of the edges. The allowed type name formats are:
+
+            * ``(str, str, str)`` for source node type, edge type and destination node type.
+            * or one ``str`` edge type name if the name can uniquely identify a
+              triplet format in the graph.
+
+            Can be omitted if the graph has only one type of edges.

        Returns
        -------
-        tensor
+        Tensor
            A 1D tensor that contains the ID(s) of the edge(s) that satisfy the predicate.

        Examples

--- a/python/dgl/nn/__init__.py
+++ b/python/dgl/nn/__init__.py
-"""Package for neural network common components."""
+"""The ``dgl.nn`` package contains framework-specific implementations for
+common Graph Neural Network layers (or module in PyTorch, Block in MXNet).
+Users can directly import ``dgl.nn.<layer_name>`` (e.g., ``dgl.nn.GraphConv``),
+and the package will dispatch the layer name to the actual implementation
+according to the backend framework currently in use.
+
+Note that there are coverage differences among frameworks. If you encounter
+an ``ImportError: cannot import name 'XXX'`` error, that means the layer is
+not available to the current backend. If you wish a module to appear in DGL,
+please `create an issue <https://github.com/dmlc/dgl/issues>`_ started with
+"[Feature Request] NN Module XXXModel". If you want to contribute a NN module,
+please `create a pull request <https://github.com/dmlc/dgl/pulls>`_ started
+with "[NN] XXX module".
+"""
+
 import importlib
 import sys
 import os

--- a/python/dgl/random.py
+++ b/python/dgl/random.py
@@ -8,14 +8,12 @@ from . import ndarray as nd
 __all__ = ['seed']

 def seed(val):
-    """Set the seed of randomized methods in DGL.
-
-    The randomized methods include various samplers and random walk routines.
+    """Set the random seed of DGL.

    Parameters
    ----------
    val : int
-        The seed
+        The seed.
    """
    _CAPI_SetSeed(val)

@@ -41,8 +39,6 @@ def choice(a, size, replace=True, prob=None):  # pylint: disable=invalid-name

    It out-performs numpy for non-uniform sampling in general cases.

-    TODO(minjie): support RNG as one of the arguments.
-
    Parameters
    ----------
    a : 1-D tensor or int
@@ -61,6 +57,7 @@ def choice(a, size, replace=True, prob=None):  # pylint: disable=invalid-name
    samples : 1-D tensor
        The generated random samples
    """
+    #TODO(minjie): support RNG as one of the arguments.
    if isinstance(size, tuple):
        num = np.prod(size)
    else:

--- a/python/dgl/readout.py
+++ b/python/dgl/readout.py
@@ -28,9 +28,9 @@ def readout_nodes(graph, feat, weight=None, *, op='sum', ntype=None):
    feat : str
        Node feature name.
    weight : str, optional
-        Node weight name. If None, no weighting will be performed,
-        otherwise, weight each node feature with field :attr:`feat`.
-        for aggregation. The weight feature shape must be compatible with
+        Node weight name. None means aggregating without weights.
+        Otherwise, multiply each node feature by node feature :attr:`weight`
+        before aggregation. The weight feature shape must be compatible with
        an element-wise multiplication with the feature tensor.
    op : str, optional
        Readout operator. Can be 'sum', 'max', 'min', 'mean'.
@@ -39,7 +39,7 @@ def readout_nodes(graph, feat, weight=None, *, op='sum', ntype=None):

    Returns
    -------
-    tensor
+    Tensor
        Result tensor.

    Examples
@@ -101,22 +101,28 @@ def readout_edges(graph, feat, weight=None, *, op='sum', etype=None):
    Parameters
    ----------
    graph : DGLGraph.
-        Input graph.
+        The input graph.
    feat : str
-        Edge feature name.
+        The edge feature name.
    weight : str, optional
-        Edge weight name. If None, no weighting will be performed,
+        The edge weight feature name. If None, no weighting will be performed,
        otherwise, weight each edge feature with field :attr:`feat`.
        for summation. The weight feature shape must be compatible with
        an element-wise multiplication with the feature tensor.
    op : str, optional
        Readout operator. Can be 'sum', 'max', 'min', 'mean'.
-    etype : str, tuple of str, optional
-        Edge type. Can be omitted if there is only one edge type in the graph.
+    etype : str or (str, str, str), optional
+        The type names of the edges. The allowed type name formats are:
+
+        * ``(str, str, str)`` for source node type, edge type and destination node type.
+        * or one ``str`` edge type name if the name can uniquely identify a
+          triplet format in the graph.
+
+        Can be omitted if the graph has only one type of edges.

    Returns
    -------
-    tensor
+    Tensor
        Result tensor.

    Examples
@@ -166,31 +172,55 @@ def readout_edges(graph, feat, weight=None, *, op='sum', etype=None):

 def sum_nodes(graph, feat, weight=None, *, ntype=None):
    """Syntax sugar for ``dgl.readout_nodes(graph, feat, weight, ntype=ntype, op='sum')``.
+
+    See Also
+    --------
+    readout_nodes
    """
    return readout_nodes(graph, feat, weight, ntype=ntype, op='sum')

 def sum_edges(graph, feat, weight=None, *, etype=None):
    """Syntax sugar for ``dgl.readout_edges(graph, feat, weight, etype=etype, op='sum')``.
+
+    See Also
+    --------
+    readout_edges
    """
    return readout_edges(graph, feat, weight, etype=etype, op='sum')

 def mean_nodes(graph, feat, weight=None, *, ntype=None):
    """Syntax sugar for ``dgl.readout_nodes(graph, feat, weight, ntype=ntype, op='mean')``.
+
+    See Also
+    --------
+    readout_nodes
    """
    return readout_nodes(graph, feat, weight, ntype=ntype, op='mean')

 def mean_edges(graph, feat, weight=None, *, etype=None):
    """Syntax sugar for ``dgl.readout_edges(graph, feat, weight, etype=etype, op='mean')``.
+
+    See Also
+    --------
+    readout_edges
    """
    return readout_edges(graph, feat, weight, etype=etype, op='mean')

 def max_nodes(graph, feat, weight=None, *, ntype=None):
    """Syntax sugar for ``dgl.readout_nodes(graph, feat, weight, ntype=ntype, op='max')``.
+
+    See Also
+    --------
+    readout_nodes
    """
    return readout_nodes(graph, feat, weight, ntype=ntype, op='max')

 def max_edges(graph, feat, weight=None, *, etype=None):
    """Syntax sugar for ``dgl.readout_edges(graph, feat, weight, etype=etype, op='max')``.
+
+    See Also
+    --------
+    readout_edges
    """
    return readout_edges(graph, feat, weight, etype=etype, op='max')

@@ -210,15 +240,15 @@ def softmax_nodes(graph, feat, *, ntype=None):
    Parameters
    ----------
    graph : DGLGraph.
-        Input graph.
+        The input graph.
    feat : str
-        Node feature name.
+        The node feature name.
    ntype : str, optional
-        Node type. Can be omitted if there is only one node type in the graph.
+        The node type name. Can be omitted if there is only one node type in the graph.

    Returns
    -------
-    tensor
+    Tensor
        Result tensor.

    Examples
@@ -269,15 +299,21 @@ def softmax_edges(graph, feat, *, etype=None):
    Parameters
    ----------
    graph : DGLGraph.
-        Input graph.
+        The input graph.
    feat : str
-        Edge feature name.
-    etype : str, typle of str, optional
-        Edge type. Can be omitted if there is only one edge type in the graph.
+        The edge feature name.
+    etype : str or (str, str, str), optional
+        The type names of the edges. The allowed type name formats are:
+
+        * ``(str, str, str)`` for source node type, edge type and destination node type.
+        * or one ``str`` edge type name if the name can uniquely identify a
+          triplet format in the graph.
+
+        Can be omitted if the graph has only one type of edges.

    Returns
    -------
-    tensor
+    Tensor
        Result tensor.

    Examples
@@ -535,9 +571,10 @@ def _topk_on(graph, typestr, feat, k, descending, sortby, ntype_or_etype):
           topk_indices

 def topk_nodes(graph, feat, k, *, descending=True, sortby=None, ntype=None):
-    """Perform a graph-wise top-k on node features :attr:`feat` in
-    :attr:`graph` by feature at index :attr:`sortby`. If :attr:
-    `descending` is set to False, return the k smallest elements instead.
+    """Return a graph-level representation by a graph-wise top-k on
+    node features :attr:`feat` in :attr:`graph` by feature at index :attr:`sortby`.
+
+    If :attr:`descending` is set to False, return the k smallest elements instead.

    If :attr:`sortby` is set to None, the function would perform top-k on
    all dimensions independently, equivalent to calling
@@ -569,6 +606,11 @@ def topk_nodes(graph, feat, k, *, descending=True, sortby=None, ntype=None):
        :math:`B` is the batch size of the input graph, :math:`D`
        is the feature size.

+    Notes
+    -----
+    If an example has :math:`n` nodes and :math:`n<k`, the ``sorted_feat``
+    tensor will pad the :math:`n+1` to :math:`k` th rows with zero;
+
    Examples
    --------

@@ -631,20 +673,16 @@ def topk_nodes(graph, feat, k, *, descending=True, sortby=None, ntype=None):
              [0.0880, 0.6379, 0.4451, 0.6893, 0.5197]]]), tensor([[[1, 0, 1, 3, 1],
              [3, 2, 0, 2, 2],
              [2, 3, 2, 1, 3]]]))
-
-    Notes
-    -----
-    If an example has :math:`n` nodes and :math:`n<k`, the ``sorted_feat``
-    tensor will pad the :math:`n+1` to :math:`k`th rows with zero;
    """
    return _topk_on(graph, 'nodes', feat, k,
                    descending=descending, sortby=sortby,
                    ntype_or_etype=ntype)

 def topk_edges(graph, feat, k, *, descending=True, sortby=None, etype=None):
-    """Perform a graph-wise top-k on node features :attr:`feat` in
-    :attr:`graph` by feature at index :attr:`sortby`. If :attr:
-    `descending` is set to False, return the k smallest elements instead.
+    """Return a graph-level representation by a graph-wise top-k
+    on edge features :attr:`feat` in :attr:`graph` by feature at index :attr:`sortby`.
+
+    If :attr:`descending` is set to False, return the k smallest elements instead.

    If :attr:`sortby` is set to None, the function would perform top-k on
    all dimensions independently, equivalent to calling
@@ -676,6 +714,11 @@ def topk_edges(graph, feat, k, *, descending=True, sortby=None, etype=None):
        :math:`B` is the batch size of the input graph, :math:`D`
        is the feature size.

+
+    Notes
+    -----
+    If an example has :math:`n` nodes and :math:`n<k`, the ``sorted_feat``
+    tensor will pad the :math:`n+1` to :math:`k` th rows with zero;
    Examples
    --------

@@ -738,11 +781,6 @@ def topk_edges(graph, feat, k, *, descending=True, sortby=None, etype=None):
              [0.0880, 0.6379, 0.4451, 0.6893, 0.5197]]]), tensor([[[1, 0, 1, 3, 1],
              [3, 2, 0, 2, 2],
              [2, 3, 2, 1, 3]]]))
-
-    Notes
-    -----
-    If an example has :math:`n` nodes and :math:`n<k`, the ``sorted_feat``
-    tensor will pad the :math:`n+1` to :math:`k`th rows with zero;
    """
    return _topk_on(graph, 'edges', feat, k,
                    descending=descending, sortby=sortby,

--- a/python/dgl/sampling/__init__.py
+++ b/python/dgl/sampling/__init__.py
-"""This module contains the implementations of various sampling operators.
+"""The ``dgl.sampling`` package contains operators and utilities for
+sampling from a graph via random walks, neighbor sampling, etc. They
+are typically used together with the ``DataLoader`` s in the
+``dgl.dataloading`` package. The user guide :ref:`guide-minibatch`
+gives a holistic explanation on how different components work together.
 """
+
 from .randomwalks import *
 from .pinsage import *
 from .neighbor import *
--- a/python/dgl/subgraph.py
+++ b/python/dgl/subgraph.py
@@ -18,92 +18,102 @@ __all__ = ['node_subgraph', 'edge_subgraph', 'node_type_subgraph', 'edge_type_su
           'in_subgraph', 'out_subgraph']

 def node_subgraph(graph, nodes):
-    """Return the subgraph induced on given nodes.
+    """Return a subgraph induced on the given nodes.

-    The metagraph of the returned subgraph is the same as the parent graph.
-    Features are copied from the original graph.
+    A node-induced subgraph is a subset of the nodes of a graph together with
+    any edges whose endpoints are both in this subset. In addition to extracting
+    the subgraph, DGL conducts the following:
+
+    * Relabel the extracted nodes to IDs starting from zero.
+
+    * Copy the features of the extracted nodes and edges to the resulting graph.
+      The copy is *lazy* and incurs data movement only when needed.
+
+    * Store the IDs of the extracted nodes and edges in the ``ndata`` and ``edata``
+      of the resulting graph under name ``dgl.NID`` and ``dgl.EID``, respectively.
+
+    If the graph is heterogeneous, DGL extracts a subgraph per relation and composes
+    them as the resulting graph. Thus, the resulting graph has the same set of relations
+    as the input one.

    Parameters
    ----------
    graph : DGLGraph
        The graph to extract subgraphs from.
-    nodes : list or dict[str->list or iterable]
-        A dictionary mapping node types to node ID array for constructing
-        subgraph. All nodes must exist in the graph.
+    nodes : nodes or dict[str, nodes]
+        The nodes to form the subgraph. The allowed nodes formats are:

-        If the graph only has one node type, one can just specify a list,
-        tensor, or any iterable of node IDs intead.
+        * Int Tensor: Each element is a node ID. The tensor must have the same device type
+          and ID data type as the graph's.
+        * iterable[int]: Each element is a node ID.
+        * Bool Tensor: Each :math:`i^{th}` element is a bool flag indicating whether
+          node :math:`i` is in the subgraph.

-        The node ID array can be either an interger tensor or a bool tensor.
-        When a bool tensor is used, it is automatically converted to
-        an interger tensor using the semantic of np.where(nodes_idx == True).
-
-        Note: When using bool tensor, only backend (torch, tensorflow, mxnet)
-        tensors are supported.
+        If the graph is homogeneous, one can directly pass the above formats.
+        Otherwise, the argument must be a dictionary with keys being node types
+        and values being the nodes.

    Returns
    -------
    G : DGLGraph
        The subgraph.

-        The nodes and edges in the subgraph are relabeled using consecutive
-        integers from 0.
-
-        One can retrieve the mapping from subgraph node/edge ID to parent
-        node/edge ID via ``dgl.NID`` and ``dgl.EID`` node/edge features of the
-        subgraph.
-
    Examples
    --------
    The following example uses PyTorch backend.

-    Instantiate a heterograph.
+    >>> import dgl
+    >>> import torch
+
+    Extract a subgraph from a homogeneous graph.
+
+    >>> g = dgl.graph(([0, 1, 2, 3, 4], [1, 2, 3, 4, 0]))  # 5-node cycle
+    >>> sg = dgl.node_subgraph(g, [0, 1, 4])
+    >>> sg
+    Graph(num_nodes=3, num_edges=2,
+          ndata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)}
+          edata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)})
+    >>> sg.edges()
+    (tensor([0, 2]), tensor([1, 0]))
+    >>> sg.ndata[dgl.NID]  # original node IDs
+    tensor([0, 1, 4])
+    >>> sg.edata[dgl.EID]  # original edge IDs
+    tensor([0, 4])
+
+    Specify nodes using a boolean mask.
+
+    >>> nodes = torch.tensor([True, True, False, False, True])  # choose nodes [0, 1, 4]
+    >>> dgl.node_subgraph(g, nodes)
+    Graph(num_nodes=3, num_edges=2,
+          ndata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)}
+          edata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)})
+
+    The resulting subgraph also copies features from the parent graph.
+
+    >>> g.ndata['x'] = torch.arange(10).view(5, 2)
+    >>> sg = dgl.node_subgraph(g, [0, 1, 4])
+    >>> sg
+    Graph(num_nodes=3, num_edges=2,
+          ndata_schemes={'x': Scheme(shape=(2,), dtype=torch.int64),
+                         '_ID': Scheme(shape=(), dtype=torch.int64)}
+          edata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)})
+    >>> sg.ndata['x']
+    tensor([[0, 1],
+            [2, 3],
+            [8, 9]])
+
+    Extract a subgraph from a hetergeneous graph.

    >>> g = dgl.heterograph({
    >>>     ('user', 'plays', 'game'): ([0, 1, 1, 2], [0, 0, 2, 1]),
    >>>     ('user', 'follows', 'user'): ([0, 1, 1], [1, 2, 2])
    >>> })
-    >>> # Set node features
-    >>> g.nodes['user'].data['h'] = torch.tensor([[0.], [1.], [2.]])
-
-    Get subgraphs.
-
-    >>> g.subgraph({'user': [4, 5]})
-    Traceback (most recent call last):
-        ...
-    dgl._ffi.base.DGLError: ...
-    >>> sub_g = g.subgraph({'user': [1, 2]})
-    >>> print(sub_g)
-    Graph(num_nodes={'user': 2, 'game': 0},
-          num_edges={('user', 'plays', 'game'): 0, ('user', 'follows', 'user'): 2},
-          metagraph=[('user', 'game'), ('user', 'user')])
-
-    Get subgraphs using boolean mask tensor.
-
-    >>> sub_g = g.subgraph({'user': th.tensor([False, True, True])})
-    >>> print(sub_g)
+    >>> sub_g = dgl.node_subgraph(g, {'user': [1, 2]})
+    >>> sub_g
    Graph(num_nodes={'user': 2, 'game': 0},
          num_edges={('user', 'plays', 'game'): 0, ('user', 'follows', 'user'): 2},
          metagraph=[('user', 'game'), ('user', 'user')])

-    Get the original node/edge indices.
-
-    >>> sub_g['follows'].ndata[dgl.NID] # Get the node indices in the raw graph
-    tensor([1, 2])
-    >>> sub_g['follows'].edata[dgl.EID] # Get the edge indices in the raw graph
-    tensor([1, 2])
-
-    Get the copied node features.
-
-    >>> sub_g.nodes['user'].data['h']
-    tensor([[1.],
-            [2.]])
-    >>> sub_g.nodes['user'].data['h'] += 1
-    >>> g.nodes['user'].data['h']          # Features are not shared.
-    tensor([[0.],
-            [1.],
-            [2.]])
-
    See Also
    --------
    edge_subgraph
@@ -129,106 +139,124 @@ def node_subgraph(graph, nodes):
    induced_edges = sgi.induced_edges
    return _create_hetero_subgraph(graph, sgi, induced_nodes, induced_edges)

-DGLHeteroGraph.subgraph = node_subgraph
+DGLHeteroGraph.subgraph = utils.alias_func(node_subgraph)

 def edge_subgraph(graph, edges, preserve_nodes=False):
-    """Return the subgraph induced on given edges.
-
-    The metagraph of the returned subgraph is the same as the parent graph.
+    """Return a subgraph induced on the given edges.

-    Features are copied from the original graph.
-
-    Parameters
-    ----------
-    graph : DGLGraph
-        The graph to extract subgraphs from.
-    edges : dict[(str, str, str), Tensor]
-        A dictionary mapping edge types to edge ID array for constructing
-        subgraph. All edges must exist in the subgraph.
+    An edge-induced subgraph is equivalent to creating a new graph
+    with the same number of nodes using the given edges.  In addition to extracting
+    the subgraph, DGL conducts the following:

-        The edge types are characterized by triplets of
-        ``(src type, etype, dst type)``.
+    * Relabel the incident nodes to IDs starting from zero. Isolated nodes are removed.

-        If the graph only has one edge type, one can just specify a list,
-        tensor, or any iterable of edge IDs intead.
+    * Copy the features of the extracted nodes and edges to the resulting graph.
+      The copy is *lazy* and incurs data movement only when needed.

-        The edge ID array can be either an interger tensor or a bool tensor.
-        When a bool tensor is used, it is automatically converted to
-        an interger tensor using the semantic of np.where(edges_idx == True).
+    * Store the IDs of the extracted nodes and edges in the ``ndata`` and ``edata``
+      of the resulting graph under name ``dgl.NID`` and ``dgl.EID``, respectively.

-        Note: When using bool tensor, only backend (torch, tensorflow, mxnet)
-        tensors are supported.
+    If the graph is heterogeneous, DGL extracts a subgraph per relation and composes
+    them as the resulting graph. Thus, the resulting graph has the same set of relations
+    as the input one.

-    preserve_nodes : bool
-        Whether to preserve all nodes or not. If false, all nodes
-        without edges will be removed. (Default: False)
+    Parameters
+    ----------
+    graph : DGLGraph
+        The graph to extract the subgraph from.
+    edges : dict[(str, str, str), edges]
+        The edges to form the subgraph. The allowed edges formats are:
+
+        * Int Tensor: Each element is an edge ID. The tensor must have the same device type
+          and ID data type as the graph's.
+        * iterable[int]: Each element is an edge ID.
+        * Bool Tensor: Each :math:`i^{th}` element is a bool flag indicating whether
+          edge :math:`i` is in the subgraph.
+
+        If the graph is homogeneous, one can directly pass the above formats.
+        Otherwise, the argument must be a dictionary with keys being edge types
+        and values being the nodes.
+    preserve_nodes : bool, optional
+        If true, do not relabel the incident nodes and remove the isolated nodes
+        in the extracted subgraph. (Default: False)

    Returns
    -------
    G : DGLGraph
        The subgraph.

-        The nodes and edges are relabeled using consecutive integers from 0.
-
-        One can retrieve the mapping from subgraph node/edge ID to parent
-        node/edge ID via ``dgl.NID`` and ``dgl.EID`` node/edge features of the
-        subgraph.
-
    Examples
    --------
    The following example uses PyTorch backend.

-    Instantiate a heterograph.
+    >>> import dgl
+    >>> import torch
+
+    Extract a subgraph from a homogeneous graph.
+
+    >>> g = dgl.graph(([0, 1, 2, 3, 4], [1, 2, 3, 4, 0]))  # 5-node cycle
+    >>> sg = dgl.edge_subgraph(g, [0, 4])
+    >>> sg
+    Graph(num_nodes=3, num_edges=2,
+          ndata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)}
+          edata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)})
+    >>> sg.edges()
+    (tensor([0, 1]), tensor([2, 0]))
+    >>> sg.ndata[dgl.NID]  # original node IDs
+    tensor([0, 4, 1])
+    >>> sg.edata[dgl.EID]  # original edge IDs
+    tensor([0, 4])
+
+    Extract a subgraph without node relabeling.
+
+    >>> sg = dgl.edge_subgraph(g, [0, 4], preserve_nodes=True)
+    >>> sg
+    Graph(num_nodes=5, num_edges=2,
+          ndata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)}
+          edata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)})
+    >>> sg.edges()
+    (tensor([0, 4]), tensor([1, 0]))
+
+    Specify edges using a boolean mask.
+
+    >>> nodes = torch.tensor([True, False, False, False, True])  # choose edges [0, 4]
+    >>> dgl.edge_subgraph(g, nodes)
+    Graph(num_nodes=3, num_edges=2,
+          ndata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)}
+          edata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)})
+
+    The resulting subgraph also copies features from the parent graph.
+
+    >>> g.ndata['x'] = torch.arange(10).view(5, 2)
+    >>> sg = dgl.edge_subgraph(g, [0, 4])
+    >>> sg
+    Graph(num_nodes=3, num_edges=2,
+          ndata_schemes={'x': Scheme(shape=(2,), dtype=torch.int64),
+                         '_ID': Scheme(shape=(), dtype=torch.int64)}
+          edata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)})
+    >>> sg.ndata[dgl.NID]
+    tensor([0, 4, 1])
+    >>> sg.ndata['x']
+    tensor([[0, 1],
+            [8, 9],
+            [2, 3]])
+
+    Extract a subgraph from a hetergeneous graph.

    >>> g = dgl.heterograph({
    >>>     ('user', 'plays', 'game'): ([0, 1, 1, 2], [0, 0, 2, 1]),
    >>>     ('user', 'follows', 'user'): ([0, 1, 1], [1, 2, 2])
    >>> })
-    >>> # Set edge features
-    >>> g.edges['follows'].data['h'] = torch.tensor([[0.], [1.], [2.]])
-
-    Get subgraphs.
-
-    >>> g.edge_subgraph({('user', 'follows', 'user'): [5, 6]})
-    Traceback (most recent call last):
-        ...
-    dgl._ffi.base.DGLError: ...
-    >>> sub_g = g.edge_subgraph({('user', 'follows', 'user'): [1, 2],
-    >>>                          ('user', 'plays', 'game'): [2]})
+    >>> sub_g = dgl.edge_subgraph(g, {('user', 'follows', 'user'): [1, 2],
+    ...                               ('user', 'plays', 'game'): [2]})
    >>> print(sub_g)
    Graph(num_nodes={'user': 2, 'game': 1},
          num_edges={('user', 'plays', 'game'): 1, ('user', 'follows', 'user'): 2},
          metagraph=[('user', 'game'), ('user', 'user')])

-    Get subgraphs using boolean mask tensor.
-    >>> sub_g = g.edge_subgraph({('user', 'follows', 'user'): th.tensor([False, True, True]),
-    >>>                   ('user', 'plays', 'game'): th.tensor([False, False, True, False])})
-    >>> sub_g
-    Graph(num_nodes={'user': 2, 'game': 1},
-        num_edges={('user', 'plays', 'game'): 1, ('user', 'follows', 'user'): 2},
-        metagraph=[('user', 'game'), ('user', 'user')])
-
-    Get the original node/edge indices.
-
-    >>> sub_g['follows'].ndata[dgl.NID] # Get the node indices in the raw graph
-    tensor([1, 2])
-    >>> sub_g['plays'].edata[dgl.EID]   # Get the edge indices in the raw graph
-    tensor([2])
-
-    Get the copied node features.
-
-    >>> sub_g.edges['follows'].data['h']
-    tensor([[1.],
-            [2.]])
-    >>> sub_g.edges['follows'].data['h'] += 1
-    >>> g.edges['follows'].data['h']          # Features are not shared.
-    tensor([[0.],
-            [1.],
-            [2.]])
-
    See Also
    --------
-    subgraph
+    node_subgraph
    """
    if graph.is_block:
        raise DGLError('Extracting subgraph from a block graph is not allowed.')
@@ -252,73 +280,82 @@ def edge_subgraph(graph, edges, preserve_nodes=False):
    induced_nodes = sgi.induced_nodes
    return _create_hetero_subgraph(graph, sgi, induced_nodes, induced_edges)

-DGLHeteroGraph.edge_subgraph = edge_subgraph
+DGLHeteroGraph.edge_subgraph = utils.alias_func(edge_subgraph)

 def in_subgraph(g, nodes):
-    """Return the subgraph induced on the inbound edges of all edge types of the
+    """Return the subgraph induced on the inbound edges of all the edge types of the
    given nodes.

-    All the nodes are preserved regardless of whether they have an edge or not.
+    An edge-induced subgraph is equivalent to creating a new graph
+    with the same number of nodes using the given edges.  In addition to extracting
+    the subgraph, DGL conducts the following:

-    The metagraph of the returned subgraph is the same as the parent graph.
+    * Copy the features of the extracted nodes and edges to the resulting graph.
+      The copy is *lazy* and incurs data movement only when needed.

-    Features are copied from the original graph.
+    * Store the IDs of the extracted edges in the ``edata``
+      of the resulting graph under name ``dgl.EID``.
+
+    If the graph is heterogeneous, DGL extracts a subgraph per relation and composes
+    them as the resulting graph. Thus, the resulting graph has the same set of relations
+    as the input one.

    Parameters
    ----------
    g : DGLGraph
-        Full graph structure.
-    nodes : tensor or dict
-        Node ids to sample neighbors from. The allowed types
-        are dictionary of node types to node id tensors, or simply node id tensor if
-        the given graph g has only one type of nodes.
+        The input graph.
+    nodes : nodes or dict[str, nodes]
+        The nodes to form the subgraph. The allowed nodes formats are:
+
+        * Int Tensor: Each element is an ID. The tensor must have the same device type
+          and ID data type as the graph's.
+        * iterable[int]: Each element is an ID.
+
+        If the graph is homogeneous, one can directly pass the above formats.
+        Otherwise, the argument must be a dictionary with keys being node types
+        and values being the nodes.

    Returns
    -------
    DGLGraph
        The subgraph.

-        One can retrieve the mapping from subgraph edge ID to parent
-        edge ID via ``dgl.EID`` edge features of the subgraph.
-
    Examples
    --------
    The following example uses PyTorch backend.

-    Instantiate a heterograph.
+    >>> import dgl
+    >>> import torch
+
+    Extract a subgraph from a homogeneous graph.
+
+    >>> g = dgl.graph(([0, 1, 2, 3, 4], [1, 2, 3, 4, 0]))  # 5-node cycle
+    >>> g.edata['w'] = torch.arange(10).view(5, 2)
+    >>> sg = dgl.in_subgraph(g, [2, 0])
+    >>> sg
+    Graph(num_nodes=5, num_edges=2,
+          ndata_schemes={}
+          edata_schemes={'w': Scheme(shape=(2,), dtype=torch.int64),
+                         '_ID': Scheme(shape=(), dtype=torch.int64)})
+    >>> sg.edges()
+    (tensor([1, 4]), tensor([2, 0]))
+    >>> sg.edata[dgl.EID]  # original edge IDs
+    tensor([1, 4])
+    >>> sg.edata['w']  # also extract the features
+    tensor([[2, 3],
+            [8, 9]])
+
+    Extract a subgraph from a heterogeneous graph.

    >>> g = dgl.heterograph({
    ...     ('user', 'plays', 'game'): ([0, 1, 1, 2], [0, 0, 2, 1]),
    ...     ('user', 'follows', 'user'): ([0, 1, 1], [1, 2, 2])})
-    >>> # Set edge features
-    >>> g.edges['follows'].data['h'] = torch.tensor([[0.], [1.], [2.]])
-
-    Get subgraphs.
-
    >>> sub_g = g.in_subgraph({'user': [2], 'game': [2]})
-    >>> print(sub_g)
+    >>> sub_g
    Graph(num_nodes={'game': 3, 'user': 3},
          num_edges={('user', 'plays', 'game'): 1, ('user', 'follows', 'user'): 2},
          metagraph=[('user', 'game', 'plays'), ('user', 'user', 'follows')])

-    Get the original node/edge indices.
-
-    >>> sub_g.edges['plays'].data[dgl.EID]
-    tensor([2])
-    >>> sub_g.edges['follows'].data[dgl.EID]
-    tensor([1, 2])
-
-    Get the copied edge features.
-
-    >>> sub_g.edges['follows'].data['h']
-    tensor([[1.],
-            [2.]])
-    >>> sub_g.edges['follows'].data['h'] += 1
-    >>> g.edges['follows'].data['h']          # Features are not shared.
-    tensor([[0.],
-            [1.],
-            [2.]])
-
    See also
    --------
    out_subgraph
@@ -341,73 +378,82 @@ def in_subgraph(g, nodes):
    induced_edges = sgi.induced_edges
    return _create_hetero_subgraph(g, sgi, None, induced_edges)

-DGLHeteroGraph.in_subgraph = in_subgraph
+DGLHeteroGraph.in_subgraph = utils.alias_func(in_subgraph)

 def out_subgraph(g, nodes):
-    """Return the subgraph induced on the outbound edges of all edge types of the
+    """Return the subgraph induced on the out-bound edges of all the edge types of the
    given nodes.

-    All the nodes are preserved regardless of whether they have an edge or not.
+    An edge-induced subgraph is equivalent to creating a new graph
+    with the same number of nodes using the given edges.  In addition to extracting
+    the subgraph, DGL conducts the following:

-    The metagraph of the returned subgraph is the same as the parent graph.
+    * Copy the features of the extracted nodes and edges to the resulting graph.
+      The copy is *lazy* and incurs data movement only when needed.

-    Features are copied from the original graph.
+    * Store the IDs of the extracted edges in the ``edata``
+      of the resulting graph under name ``dgl.EID``.
+
+    If the graph is heterogeneous, DGL extracts a subgraph per relation and composes
+    them as the resulting graph. Thus, the resulting graph has the same set of relations
+    as the input one.

    Parameters
    ----------
    g : DGLGraph
-        Full graph structure.
-    nodes : tensor or dict
-        Node ids to sample neighbors from. The allowed types
-        are dictionary of node types to node id tensors, or simply node id tensor if
-        the given graph g has only one type of nodes.
+        The input graph.
+    nodes : nodes or dict[str, nodes]
+        The nodes to form the subgraph. The allowed nodes formats are:
+
+        * Int Tensor: Each element is a node ID. The tensor must have the same device type
+          and ID data type as the graph's.
+        * iterable[int]: Each element is a node ID.
+
+        If the graph is homogeneous, one can directly pass the above formats.
+        Otherwise, the argument must be a dictionary with keys being node types
+        and values being the nodes.

    Returns
    -------
    DGLGraph
        The subgraph.

-        One can retrieve the mapping from subgraph edge ID to parent
-        edge ID via ``dgl.EID`` edge features of the subgraph.
-
    Examples
    --------
    The following example uses PyTorch backend.

-    Instantiate a heterograph.
+    >>> import dgl
+    >>> import torch
+
+    Extract a subgraph from a homogeneous graph.
+
+    >>> g = dgl.graph(([0, 1, 2, 3, 4], [1, 2, 3, 4, 0]))  # 5-node cycle
+    >>> g.edata['w'] = torch.arange(10).view(5, 2)
+    >>> sg = dgl.out_subgraph(g, [2, 0])
+    >>> sg
+    Graph(num_nodes=5, num_edges=2,
+          ndata_schemes={}
+          edata_schemes={'w': Scheme(shape=(2,), dtype=torch.int64),
+                         '_ID': Scheme(shape=(), dtype=torch.int64)})
+    >>> sg.edges()
+    (tensor([2, 0]), tensor([3, 1]))
+    >>> sg.edata[dgl.EID]  # original edge IDs
+    tensor([2, 0])
+    >>> sg.edata['w']  # also extract the features
+    tensor([[4, 5],
+            [0, 1]])
+
+    Extract a subgraph from a heterogeneous graph.

    >>> g = dgl.heterograph({
    ...     ('user', 'plays', 'game'): ([0, 1, 1, 2], [0, 0, 2, 1]),
    ...     ('user', 'follows', 'user'): ([0, 1, 1], [1, 2, 2])})
-    >>> # Set edge features
-    >>> g.edges['follows'].data['h'] = torch.tensor([[0.], [1.], [2.]])
-
-    Get subgraphs.
-
    >>> sub_g = g.out_subgraph({'user': [1]})
-    >>> print(sub_g)
+    >>> sub_g
    Graph(num_nodes={'game': 3, 'user': 3},
          num_edges={('user', 'plays', 'game'): 2, ('user', 'follows', 'user'): 2},
          metagraph=[('user', 'game', 'plays'), ('user', 'user', 'follows')])

-    Get the original node/edge indices.
-
-    >>> sub_g.edges['plays'].data[dgl.EID]
-    tensor([1, 2])
-    >>> sub_g.edges['follows'].data[dgl.EID]
-    tensor([1, 2])
-
-    Get the copied edge features.
-
-    >>> sub_g.edges['follows'].data['h']
-    tensor([[1.],
-            [2.]])
-    >>> sub_g.edges['follows'].data['h'] += 1
-    >>> g.edges['follows'].data['h']          # Features are not shared.
-    tensor([[0.],
-            [1.],
-            [2.]])
-
    See also
    --------
    in_subgraph
@@ -430,22 +476,23 @@ def out_subgraph(g, nodes):
    induced_edges = sgi.induced_edges
    return _create_hetero_subgraph(g, sgi, None, induced_edges)

-DGLHeteroGraph.out_subgraph = out_subgraph
+DGLHeteroGraph.out_subgraph = utils.alias_func(out_subgraph)

 def node_type_subgraph(graph, ntypes):
    """Return the subgraph induced on given node types.

-    The metagraph of the returned subgraph is the subgraph of the original
-    metagraph induced from the node types.
-
-    Features are shared with the original graph.
+    A node-type-induced subgraph contains all the nodes of the given subset of
+    the node types of a graph and any edges whose endpoints are both in this subset.
+    In addition to extracting the subgraph, DGL also copies the features of the
+    extracted nodes and edges to the resulting graph.
+    The copy is *lazy* and incurs data movement only when needed.

    Parameters
    ----------
    graph : DGLGraph
        The graph to extract subgraphs from.
    ntypes : list[str]
-        The node types
+        The type names of the nodes in the subgraph.

    Returns
    -------
@@ -456,6 +503,9 @@ def node_type_subgraph(graph, ntypes):
    --------
    The following example uses PyTorch backend.

+    >>> import dgl
+    >>> import torch
+
    Instantiate a heterograph.

    >>> g = dgl.heterograph({
@@ -473,17 +523,12 @@ def node_type_subgraph(graph, ntypes):
          ndata_schemes={'h': Scheme(shape=(1,), dtype=torch.float32)}
          edata_schemes={})

-    Get the shared node features.
+    Get the extracted node features.

    >>> sub_g.nodes['user'].data['h']
    tensor([[0.],
            [1.],
            [2.]])
-    >>> sub_g.nodes['user'].data['h'] += 1
-    >>> g.nodes['user'].data['h']          # Features are shared.
-    tensor([[1.],
-            [2.],
-            [3.]])

    See Also
    --------
@@ -498,22 +543,28 @@ def node_type_subgraph(graph, ntypes):
            etypes.append(graph.canonical_etypes[etid])
    return edge_type_subgraph(graph, etypes)

-DGLHeteroGraph.node_type_subgraph = node_type_subgraph
+DGLHeteroGraph.node_type_subgraph = utils.alias_func(node_type_subgraph)

 def edge_type_subgraph(graph, etypes):
    """Return the subgraph induced on given edge types.

-    The metagraph of the returned subgraph is the subgraph of the original metagraph
-    induced from the edge types.
-
-    Features are shared with the original graph.
+    An edge-type-induced subgraph contains all the edges of the given subset of
+    the edge types of a graph and the nodes incident by those edges.
+    In addition to extracting the subgraph, DGL also copies the features of the
+    extracted nodes and edges to the resulting graph.
+    The copy is *lazy* and incurs data movement only when needed.

    Parameters
    ----------
    graph : DGLGraph
        The graph to extract subgraphs from.
-    etypes : list[str or tuple]
-        The edge types
+    etypes : list[str] or list[(str, str, str)]
+        The type names of the edges in the subgraph. The allowed type name
+        formats are:
+
+        * ``(str, str, str)`` for source node type, edge type and destination node type.
+        * or one ``str`` for the edge type name  if the name can uniquely identify a
+          triplet format in the graph.

    Returns
    -------
@@ -524,6 +575,9 @@ def edge_type_subgraph(graph, etypes):
    --------
    The following example uses PyTorch backend.

+    >>> import dgl
+    >>> import torch
+
    Instantiate a heterograph.

    >>> g = dgl.heterograph({
@@ -536,7 +590,7 @@ def edge_type_subgraph(graph, etypes):
    Get subgraphs.

    >>> sub_g = g.edge_type_subgraph(['follows'])
-    >>> print(sub_g)
+    >>> sub_g
    Graph(num_nodes=3, num_edges=3,
          ndata_schemes={}
          edata_schemes={'h': Scheme(shape=(1,), dtype=torch.float32)})
@@ -547,11 +601,6 @@ def edge_type_subgraph(graph, etypes):
    tensor([[0.],
            [1.],
            [2.]])
-    >>> sub_g.edges['follows'].data['h'] += 1
-    >>> g.edges['follows'].data['h']          # Features are shared.
-    tensor([[1.],
-            [2.],
-            [3.]])

    See Also
    --------
@@ -579,7 +628,7 @@ def edge_type_subgraph(graph, etypes):
    hg = DGLHeteroGraph(hgidx, induced_ntypes, induced_etypes, node_frames, edge_frames)
    return hg

-DGLHeteroGraph.edge_type_subgraph = edge_type_subgraph
+DGLHeteroGraph.edge_type_subgraph = utils.alias_func(edge_type_subgraph)

 #################### Internal functions ####################


--- a/python/dgl/transform.py
+++ b/python/dgl/transform.py
@@ -59,30 +59,29 @@ def pairwise_squared_distance(x):

 #pylint: disable=invalid-name
 def knn_graph(x, k):
-    """Convert a tensor into k-nearest-neighbor (KNN) graph(s) according
-    to Euclidean distance.
+    """Construct a graph from a set of points according to k-nearest-neighbor (KNN)
+    and return.

    The function transforms the coordinates/features of a point set
-    into a directed homogeneous graph.  The coordinates of the point
+    into a directed homogeneous graph. The coordinates of the point
    set is specified as a matrix whose rows correspond to points and
    columns correspond to coordinate/feature dimensions.

-    The nodes of the returned graph correspond to the points.  An edge
-    exists if the source node is one of the k-nearest neighbors of the
-    destination node.
+    The nodes of the returned graph correspond to the points, where the predecessors
+    of each point are its k-nearest neighbors measured by the Euclidean distance.

-    If you give a 3D tensor, then each submatrix will be transformed
-    into a separate graph.  DGL then composes the graphs into a large
+    If :attr:`x` is a 3D tensor, then each submatrix will be transformed
+    into a separate graph. DGL then composes the graphs into a large
    graph of multiple connected components.

    Parameters
    ----------
-    x : 2D or 3D Tensor
-        The input tensor.  It can be either on CPU or GPU.
+    x : Tensor
+        The point coordinates. It can be either on CPU or GPU.

-        * If 2D, ``x[i]`` corresponds to the i-th node in the KNN graph.
+        * If is 2D, ``x[i]`` corresponds to the i-th node in the KNN graph.

-        * If 3D, ``x[i]`` corresponds to the i-th KNN graph and
+        * If is 3D, ``x[i]`` corresponds to the i-th KNN graph and
          ``x[i][j]`` corresponds to the j-th node in the i-th KNN graph.
    k : int
        The number of nearest neighbors per node.
@@ -90,7 +89,7 @@ def knn_graph(x, k):
    Returns
    -------
    DGLGraph
-        The graph. The node IDs are in the same order as :attr:`x`.
+        The constructred graph. The node IDs are in the same order as :attr:`x`.

        The returned graph is on CPU, regardless of the context of input :attr:`x`.

@@ -152,22 +151,25 @@ def knn_graph(x, k):

 #pylint: disable=invalid-name
 def segmented_knn_graph(x, k, segs):
-    """Convert a tensor into multiple k-nearest-neighbor (KNN) graph(s)
-    with different number of nodes.
+    """Construct multiple graphs from multiple sets of points according to
+    k-nearest-neighbor (KNN) and return.

-    Each chunk of :attr:`x` contains coordinates/features of a point set.
+    Compared with :func:`dgl.knn_graph`, this allows multiple point sets with
+    different capacity. The points from different sets are stored contiguously
+    in the :attr:`x` tensor.
    :attr:`segs` specifies the number of points in each point set. The
    function constructs a KNN graph for each point set, where the predecessors
-    of each point are its k-nearest neighbors. DGL then composes all KNN graphs
+    of each point are its k-nearest neighbors measured by the Euclidean distance.
+    DGL then composes all KNN graphs
    into a graph with multiple connected components.

    Parameters
    ----------
-    x : 2D Tensor
-        Coordinates/features of points.  It can be either on CPU or GPU.
+    x : Tensor
+        Coordinates/features of points. Must be 2D. It can be either on CPU or GPU.
    k : int
        The number of nearest neighbors per node.
-    segs : list of int
+    segs : list[int]
        Number of points in each point set. The numbers in :attr:`segs`
        must sum up to the number of rows in :attr:`x`.

@@ -222,37 +224,28 @@ def segmented_knn_graph(x, k, segs):

    return convert.from_scipy(adj)

-def to_bidirected(g, readonly=None, copy_ndata=False):
-    r"""Convert the graph to a bidirectional simple graph, adding reverse edges and
-    removing parallel edges.
-
-    The function generates a new graph with no edge features.  In the new graph,
-    a single edge ``(u, v)`` exists if and only if there exists an edge connecting ``u``
-    to ``v`` or an edge connecting ``v`` to ``u`` in the original graph.
+def to_bidirected(g, copy_ndata=False, readonly=None):
+    r"""Convert the graph to a bi-directional simple graph and return.

-    For a heterogeneous graph with multiple edge types, DGL treats edges corresponding
-    to each type as a separate graph and convert the graph to a bidirected one
-    for each of them.
+    For an input graph :math:`G`, return a new graph :math:`G'` such that an edge
+    :math:`(u, v)\in G'` if and only if there exists an edge :math:`(u, v)\in G` or
+    an edge :math:`(v, u)\in G`. The resulting graph :math:`G'` is a simple graph,
+    meaning there is no parallel edge.

-    Since :func:`to_bidirected` **is not well defined for unidirectional
-    bipartite graphs**, DGL will raise an error if an edge type whose source node type is
-    different from the destination node type exists.
+    The operation only works for edges whose two endpoints belong to the same node type.
+    DGL will raise error if the input graph is heterogeneous and contains edges
+    with different types of endpoints.

    Parameters
    ----------
    g : DGLGraph
        The input graph.
-    readonly : bool
-        Deprecated. There will be no difference between readonly and non-readonly
-
-        (Default: True)
    copy_ndata: bool, optional
        If True, the node features of the bidirected graph are copied from the
-        original graph.
-
-        If False, the bidirected graph will not have any node features.
-
+        original graph. If False, the bidirected graph will not have any node features.
        (Default: False)
+    readonly : bool
+        **DEPRECATED**.

    Returns
    -------
@@ -261,10 +254,9 @@ def to_bidirected(g, readonly=None, copy_ndata=False):

    Notes
    -----
-    If :attr:`copy_ndata` is True, same tensors will be used for
-    the features of the original graph and the returned graph to save memory cost.
-    As a result, users should avoid performing in-place operations on the features of
-    the returned graph, which will corrupt the features of the original graph as well.
+    If :attr:`copy_ndata` is True, the resulting graph will share the node feature
+    tensors with the input graph. Hence, users should try to avoid in-place operations
+    which will be visible to both graphs.

    Examples
    --------
@@ -314,24 +306,21 @@ def to_bidirected(g, readonly=None, copy_ndata=False):

 def add_reverse_edges(g, readonly=None, copy_ndata=True,
                      copy_edata=False, ignore_bipartite=False):
-    r"""Add reverse edges to a graph.
+    r"""Add an reversed edge for each edge in the input graph and return a new graph.

    For a graph with edges :math:`(i_1, j_1), \cdots, (i_n, j_n)`, this
    function creates a new graph with edges
    :math:`(i_1, j_1), \cdots, (i_n, j_n), (j_1, i_1), \cdots, (j_n, i_n)`.

-    For a heterogeneous graph with multiple edge types, DGL treats the edges corresponding
-    to each type as a separate graph and add reverse edges for each of them.
-
-    Since :func:`add_reverse_edges` **is not well defined for unidirectional bipartite graphs**,
-    an error will be raised if an edge type of the input heterogeneous graph is for a
-    unidirectional bipartite graph.  DGL simply skips the edge types corresponding
-    to unidirectional bipartite graphs by specifying ``ignore_bipartite=True``.
+    The operation only works for edges whose two endpoints belong to the same node type.
+    DGL will raise error if the input graph is heterogeneous and contains edges
+    with different types of endpoints. If :attr:`ignore_bipartite` is true, DGL will
+    ignore those edges instead.

    Parameters
    ----------
    g : DGLGraph
-        The input graph.  Can be on either CPU or GPU.
+        The input graph.
    readonly : bool, default to be True
        Deprecated. There will be no difference between readonly and non-readonly
    copy_ndata: bool, optional
@@ -360,13 +349,10 @@ def add_reverse_edges(g, readonly=None, copy_ndata=True,

    Notes
    -----
-    If :attr:`copy_ndata` is True, same tensors are used as
-    the node features of the original graph and the new graph.
-    As a result, users should avoid performing in-place operations
-    on the node features of the new graph to avoid feature corruption.
-
-    On the contrary, edge features are concatenated,
-    and they are not shared due to concatenation.
+    If :attr:`copy_ndata` is True, the resulting graph will share the node feature
+    tensors with the input graph. Hence, users should try to avoid in-place operations
+    which will be visible to both graphs. On the contrary, the two graphs do not share
+    the same edge feature storage.

    Examples
    --------
@@ -377,7 +363,7 @@ def add_reverse_edges(g, readonly=None, copy_ndata=True,
    >>> bg1.edges()
    (tensor([0, 0, 0, 1]), tensor([0, 1, 0, 0]))

-    **Heterogeneous graphs with Multiple Edge Types**
+    **Heterogeneous graphs**

    >>> g = dgl.heterograph({
    >>>     ('user', 'wins', 'user'): (th.tensor([0, 2, 0, 2, 2]), th.tensor([1, 1, 2, 1, 0])),
@@ -489,12 +475,11 @@ def line_graph(g, backtracking=True, shared=False):

    Notes
    -----
-    If :attr:`shared` is True, same tensors will be used for
-    the features of the original graph and the returned graph to save memory cost.
-    As a result, users should avoid performing in-place operations on the features of
-    the returned graph, which will corrupt the features of the original graph as well.
+    * If :attr:`shared` is True, the node features of the resulting graph share the same
+      storage with the edge features of the input graph. Hence, users should try to
+      avoid in-place operations which will be visible to both graphs.

-    The implementation is done on CPU, even if the input and output graphs are on GPU.
+    * The function supports input graph on GPU but copies it to CPU during computation.

    Examples
    --------
@@ -532,15 +517,13 @@ def line_graph(g, backtracking=True, shared=False):

    return lg

-DGLHeteroGraph.line_graph = line_graph
+DGLHeteroGraph.line_graph = utils.alias_func(line_graph)

 def khop_adj(g, k):
    """Return the matrix of :math:`A^k` where :math:`A` is the adjacency matrix of the graph
-    :math:`g`, where rows represent source nodes and columns represent destination nodes.
-
-    The returned matrix is a 32-bit float dense matrix on CPU.
+    :math:`g`.

-    The graph must be homogeneous.
+    The returned matrix is a 32-bit float dense matrix on CPU. The graph must be homogeneous.

    Parameters
    ----------
@@ -551,7 +534,7 @@ def khop_adj(g, k):

    Returns
    -------
-    tensor
+    Tensor
        The returned tensor.

    Examples
@@ -607,10 +590,9 @@ def khop_graph(g, k, copy_ndata=True):

    Notes
    -----
-    If :attr:`copy_ndata` is True, same tensors will be used for
-    the features of the original graph and the returned graph to save memory cost.
-    As a result, users should avoid performing in-place operations on the features of
-    the returned graph, which will corrupt the features of the original graph as well.
+    If :attr:`copy_ndata` is True, the resulting graph will share the node feature
+    tensors with the input graph. Hence, users should try to avoid in-place operations
+    which will be visible to both graphs.

    Examples
    --------
@@ -656,19 +638,11 @@ def khop_graph(g, k, copy_ndata=True):
    return new_g

 def reverse(g, copy_ndata=True, copy_edata=False, *, share_ndata=None, share_edata=None):
-    r"""Return the reverse of a graph.
+    r"""Return a new graph with every edges being the reverse ones in the input graph.

    The reverse (also called converse, transpose) of a graph with edges
-    :math:`(i_1, j_1), (i_2, j_2), \cdots` is a new graph with edges
-    :math:`(j_1, i_1), (j_2, i_2), \cdots`.
-
-    For a heterogeneous graph with multiple edge types, DGL treats the edges corresponding
-    to each type as a separate graph and compute the reverse for each of them.
-    If the original edge type is ``(A, B, C)``, its reverse will have edge type
-    ``(C, B, A)``.
-
-    Given a :class:`DGLGraph` object, DGL returns another :class:`DGLGraph`
-    object representing its reverse.
+    :math:`(i_1, j_1), (i_2, j_2), \cdots` of type ``(U, E, V)`` is a new graph with edges
+    :math:`(j_1, i_1), (j_2, i_2), \cdots` of type ``(V, E, U)``.

    Parameters
    ----------
@@ -676,17 +650,11 @@ def reverse(g, copy_ndata=True, copy_edata=False, *, share_ndata=None, share_eda
        The input graph.
    copy_ndata: bool, optional
        If True, the node features of the reversed graph are copied from the
-        original graph.
-
-        If False, the reversed graph will not have any node features.
-
+        original graph. If False, the reversed graph will not have any node features.
        (Default: True)
    copy_edata: bool, optional
        If True, the edge features of the reversed graph are copied from the
-        original graph.
-
-        If False, the reversed graph will not have any edge features.
-
+        original graph. If False, the reversed graph will not have any edge features.
        (Default: False)

    Return
@@ -696,14 +664,14 @@ def reverse(g, copy_ndata=True, copy_edata=False, *, share_ndata=None, share_eda

    Notes
    -----
-    If :attr:`copy_ndata` or :attr:`copy_edata` is True, same tensors will be used for
-    the features of the original graph and the reversed graph to save memory cost.
-    As a result, users should avoid performing in-place operations on the features of
-    the reversed graph, which will corrupt the features of the original graph as well.
+    If :attr:`copy_ndata` or :attr:`copy_edata` is True,
+    the resulting graph will share the node or edge feature
+    tensors with the input graph. Hence, users should try to avoid in-place operations
+    which will be visible to both graphs.

    Examples
    --------
-    **Homogeneous graphs or Heterogeneous graphs with A Single Edge Type**
+    **Homogeneous graphs**

    Create a graph to reverse.

@@ -731,24 +699,7 @@ def reverse(g, copy_ndata=True, copy_edata=False, *, share_ndata=None, share_eda
            [4.],
            [5.]])

-    **In-place operations on features of one graph will be reflected on features of
-    its reverse, which is dangerous. Out-place operations will not be reflected.**
-
-    >>> rg.ndata['h'] += 1
-    >>> g.ndata['h']
-    tensor([[1.],
-            [2.],
-            [3.]])
-    >>> g.ndata['h'] += 1
-    >>> rg.ndata['h']
-    tensor([[2.],
-            [3.],
-            [4.]])
-    >>> rg.ndata['h2'] = th.ones(3, 1)
-    >>> 'h2' in g.ndata
-    False
-
-    **Heterogenenous graphs with Multiple Edge Types**
+    **Heterogenenous graphs**

    >>> g = dgl.heterograph({
    ...     ('user', 'follows', 'user'): (th.tensor([0, 2]), th.tensor([1, 2])),
@@ -758,7 +709,7 @@ def reverse(g, copy_ndata=True, copy_edata=False, *, share_ndata=None, share_eda
    >>> g.edges['plays'].data['he'] = th.zeros(3, 1)

    The resulting graph will have edge types
-    ``('user', 'follows', 'user)`` and ``('user', 'plays', 'game')``.
+    ``('user', 'follows', 'user)`` and ``('game', 'plays', 'user')``.

    >>> rg = dgl.reverse(g, copy_ndata=True)
    >>> rg
@@ -804,7 +755,7 @@ def reverse(g, copy_ndata=True, copy_edata=False, *, share_ndata=None, share_eda

    return new_g

-DGLHeteroGraph.reverse = reverse
+DGLHeteroGraph.reverse = utils.alias_func(reverse)

 def to_simple_graph(g):
    """Convert the graph to a simple graph with no multi-edge.
@@ -874,14 +825,15 @@ def to_bidirected_stale(g, readonly=True):

 def laplacian_lambda_max(g):
    """Return the largest eigenvalue of the normalized symmetric Laplacian of a graph.
+
    If the graph is batched from multiple graphs, return the list of the largest eigenvalue
    for each graph instead.

    Parameters
    ----------
    g : DGLGraph
-        The input graph, it should be an undirected graph.  It must be homogeneous.
-
+        The input graph, it must be a bi-directed homogeneous graph, i.e., every edge
+        should have an accompanied reverse edge in the graph.
        The graph can be batched from multiple graphs.

    Returns
@@ -938,7 +890,7 @@ def metapath_reachable_graph(g, metapath):
    Returns
    -------
    DGLGraph
-        A homogeneous or unidirectional bipartite graph.  It will be on CPU regardless of
+        A homogeneous or unidirectional bipartite graph. It will be on CPU regardless of
        whether the input graph is on CPU or GPU.

    Examples
@@ -970,21 +922,20 @@ def metapath_reachable_graph(g, metapath):
    return new_g

 def add_nodes(g, num, data=None, ntype=None):
-    r"""Append new nodes of the given node type.
+    r"""Add the given number of nodes to the graph and return a new graph.

-    The new nodes will have IDs starting from ``g.number_of_nodes(ntype)``.
-
-    A new graph with newly added nodes is returned.
+    The new nodes will have IDs starting from ``g.num_nodes(ntype)``.

    Parameters
    ----------
    num : int
-        Number of nodes to add.
-    data : dict, optional
-        Feature data of the added nodes.
+        The number of nodes to add.
+    data : dict[str, Tensor], optional
+        Feature data of the added nodes. The keys are feature names
+        while the values are feature data.
    ntype : str, optional
-        The type of the new nodes. Can be omitted if there is
-        only one node type in the graph.
+        The node type name. Can be omitted if there is
+        only one type of nodes in the graph.

    Return
    ------
@@ -993,11 +944,10 @@ def add_nodes(g, num, data=None, ntype=None):

    Notes
    -----
-    * If the key of :attr:`data` does not contain some existing feature fields,
-    those features for the new nodes will be filled with zeros).
-
-    * If the key of :attr:`data` contains new feature fields, those features for
-    the old nodes will be filled zeros).
+    * For features in :attr:`g` but not in :attr:`data`,
+      DGL assigns zero features for the newly added nodes.
+    * For feature in :attr:`data` but not in :attr:`g`, DGL assigns zero features
+      for the existing nodes in the graph.

    Examples
    --------
@@ -1007,7 +957,7 @@ def add_nodes(g, num, data=None, ntype=None):
    >>> import dgl
    >>> import torch

-    **Homogeneous Graphs or Heterogeneous Graphs with A Single Node Type**
+    **Homogeneous Graphs**

    >>> g = dgl.graph((torch.tensor([0, 1]), torch.tensor([1, 2])))
    >>> g.num_nodes()
@@ -1017,26 +967,26 @@ def add_nodes(g, num, data=None, ntype=None):
    5

    If the graph has some node features and new nodes are added without
-    features, their features will be created with zeros.
+    features, their features will be filled with zeros.

    >>> g.ndata['h'] = torch.ones(5, 1)
    >>> g = dgl.add_nodes(g, 1)
    >>> g.ndata['h']
    tensor([[1.], [1.], [1.], [1.], [1.], [0.]])

-    You can also assign features for the new nodes in adding new nodes.
+    Assign features for the new nodes.

    >>> g = dgl.add_nodes(g, 1, {'h': torch.ones(1, 1), 'w': torch.ones(1, 1)})
    >>> g.ndata['h']
    tensor([[1.], [1.], [1.], [1.], [1.], [0.], [1.]])

-    Since :attr:`data` contains new feature fields, the features for old nodes
-    will be created with zeros.
+    Since :attr:`data` contains new feature fields, the features for existing nodes
+    will be filled with zeros.

    >>> g.ndata['w']
    tensor([[0.], [0.], [0.], [0.], [0.], [0.], [1.]])

-    **Heterogeneous Graphs with Multiple Node Types**
+    **Heterogeneous Graphs**

    >>> g = dgl.heterograph({
    ...     ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]),
@@ -1061,25 +1011,28 @@ def add_nodes(g, num, data=None, ntype=None):
    return g

 def add_edges(g, u, v, data=None, etype=None):
-    r"""Append multiple new edges for the specified edge type.
-
-    A new graph with newly added edges is returned.
+    r"""Add the edges to the graph and return a new graph.

    The i-th new edge will be from ``u[i]`` to ``v[i]``.  The IDs of the new
-    edges will start from ``g.number_of_edges(etype)``.
+    edges will start from ``g.num_edges(etype)``.

    Parameters
    ----------
-    u : int, tensor, numpy.ndarray, list
+    u : int, Tensor or iterable[int]
        Source node IDs, ``u[i]`` gives the source node for the i-th new edge.
-    v : int, tensor, numpy.ndarray, list
+    v : int, Tensor or iterable[int]
        Destination node IDs, ``v[i]`` gives the destination node for the i-th new edge.
-    data : dict, optional
-        Feature data of the added edges. The i-th row of the feature data
-        corresponds to the i-th new edge.
-    etype : str or tuple of str, optional
-        The type of the new edges. Can be omitted if there is
-        only one edge type in the graph.
+    data : dict[str, Tensor], optional
+        Feature data of the added edges. The keys are feature names
+        while the values are feature data.
+    etype : str or (str, str, str), optional
+        The type names of the edges. The allowed type name formats are:
+
+        * ``(str, str, str)`` for source node type, edge type and destination node type.
+        * or one ``str`` edge type name if the name can uniquely identify a
+          triplet format in the graph.
+
+        Can be omitted if the graph has only one type of edges.

    Return
    ------
@@ -1088,15 +1041,13 @@ def add_edges(g, u, v, data=None, etype=None):

    Notes
    -----
-    * If end nodes of adding edges does not exists, add_nodes is invoked
-    to add new nodes. The node features of the new nodes will be created
-    with zeros.
-
-    * If the key of :attr:`data` does not contain some existing feature fields,
-    those features for the new edges will be created with zeros.
-
-    * If the key of :attr:`data` contains new feature fields, those features for
-    the old edges will be created with zeros.
+    * If the end nodes of the given edges do not exist in :attr:`g`,
+      :func:`dgl.add_nodes` is invoked to add those nodes.
+      The node features of the new nodes will be filled with zeros.
+    * For features in :attr:`g` but not in :attr:`data`,
+      DGL assigns zero features for the newly added nodes.
+    * For feature in :attr:`data` but not in :attr:`g`, DGL assigns zero features
+      for the existing nodes in the graph.

    Examples
    --------
@@ -1105,7 +1056,7 @@ def add_edges(g, u, v, data=None, etype=None):
    >>> import dgl
    >>> import torch

-    **Homogeneous Graphs or Heterogeneous Graphs with A Single Edge Type**
+    **Homogeneous Graphs**

    >>> g = dgl.graph((torch.tensor([0, 1]), torch.tensor([1, 2])))
    >>> g.num_edges()
@@ -1121,7 +1072,7 @@ def add_edges(g, u, v, data=None, etype=None):
    4

    If the graph has some edge features and new edges are added without
-    features, their features will be created with zeros.
+    features, their features will be filled with zeros.

    >>> g.edata['h'] = torch.ones(4, 1)
    >>> g = dgl.add_edges(g, torch.tensor([1]), torch.tensor([1]))
@@ -1136,12 +1087,12 @@ def add_edges(g, u, v, data=None, etype=None):
    tensor([[1.], [1.], [1.], [1.], [0.], [1.], [2.]])

    Since :attr:`data` contains new feature fields, the features for old edges
-    will be created with zeros.
+    will be filled with zeros.

    >>> g.edata['w']
    tensor([[0.], [0.], [0.], [0.], [0.], [1.], [1.]])

-    **Heterogeneous Graphs with Multiple Edge Types**
+    **Heterogeneous Graphs**

    >>> g = dgl.heterograph({
    ...     ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]),
@@ -1166,22 +1117,24 @@ def add_edges(g, u, v, data=None, etype=None):
    return g

 def remove_edges(g, eids, etype=None):
-    r"""Remove multiple edges with the specified edge type.
-    A new graph with certain edges deleted is returned.
-
-    Nodes will not be removed. After removing edges, the rest
-    edges will be re-indexed using consecutive integers from 0,
-    with their relative order preserved.
+    r"""Remove the specified edges and return a new graph.

-    The features for the removed edges will be removed accordingly.
+    Also delete the features of the edges. The edges must exist in the graph.
+    The resulting graph has the same number of the nodes as the input one,
+    even if some nodes become isolated after the the edge removal.

    Parameters
    ----------
-    eids : int, tensor, numpy.ndarray, list
-        IDs for the edges to remove.
-    etype : str or tuple of str, optional
-        The type of the edges to remove. Can be omitted if there is
-        only one edge type in the graph.
+    eids : int, Tensor, iterable[int]
+        The IDs of the edges to remove.
+    etype : str or (str, str, str), optional
+        The type names of the edges. The allowed type name formats are:
+
+        * ``(str, str, str)`` for source node type, edge type and destination node type.
+        * or one ``str`` edge type name if the name can uniquely identify a
+          triplet format in the graph.
+
+        Can be omitted if the graph has only one type of edges.

    Return
    ------
@@ -1193,7 +1146,7 @@ def remove_edges(g, eids, etype=None):
    >>> import dgl
    >>> import torch

-    **Homogeneous Graphs or Heterogeneous Graphs with A Single Edge Type**
+    **Homogeneous Graphs**

    >>> g = dgl.graph((torch.tensor([0, 0, 2]), torch.tensor([0, 1, 2])))
    >>> g.edata['he'] = torch.arange(3).float().reshape(-1, 1)
@@ -1207,7 +1160,7 @@ def remove_edges(g, eids, etype=None):
    >>> g.edata['he']
    tensor([[2.]])

-    **Heterogeneous Graphs with Multiple Edge Types**
+    **Heterogeneous Graphs**

    >>> g = dgl.heterograph({
    ...     ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]),
@@ -1231,19 +1184,16 @@ def remove_edges(g, eids, etype=None):


 def remove_nodes(g, nids, ntype=None):
-    r"""Remove multiple nodes with the specified node type.
-    A new graph with certain nodes deleted is returned.
-
-    Edges that connect to the nodes will be removed as well. After removing
-    nodes and edges, the rest nodes and edges will be re-indexed using
-    consecutive integers from 0, with their relative order preserved.
+    r"""Remove the specified nodes and return a new graph.

-    The features for the removed nodes/edges will be removed accordingly.
+    Also delete the features. Edges that connect from/to the nodes will be
+    removed as well. After the removal, DGL re-labels the remaining nodes and edges
+    with IDs from 0.

    Parameters
    ----------
-    nids : int, tensor, numpy.ndarray, list
-        Nodes to remove.
+    nids : int, Tensor, iterable[int]
+        The nodes to be removed.
    ntype : str, optional
        The type of the nodes to remove. Can be omitted if there is
        only one node type in the graph.
@@ -1259,7 +1209,7 @@ def remove_nodes(g, nids, ntype=None):
    >>> import dgl
    >>> import torch

-    **Homogeneous Graphs or Heterogeneous Graphs with A Single Node Type**
+    **Homogeneous Graphs**

    >>> g = dgl.graph((torch.tensor([0, 0, 2]), torch.tensor([0, 1, 2])))
    >>> g.ndata['hv'] = torch.arange(3).float().reshape(-1, 1)
@@ -1274,7 +1224,7 @@ def remove_nodes(g, nids, ntype=None):
    >>> g.edata['he']
    tensor([[2.]])

-    **Heterogeneous Graphs with Multiple Node Types**
+    **Heterogeneous Graphs**

    >>> g = dgl.heterograph({
    ...     ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]),
@@ -1301,42 +1251,41 @@ def remove_nodes(g, nids, ntype=None):
    return g

 def add_self_loop(g, etype=None):
-    r"""Add self-loop for each node in the graph for the given edge type.
-    A new graph with self-loop is returned.
-
-    If the graph is heterogeneous, the given edge type must have its source
-    node type the same as its destination node type.
+    r"""Add self-loops for each node in the graph and return a new graph.

    Parameters
    ----------
    g : DGLGraph
        The graph.
-    etype : str or tuple of str, optional
-        The type of the edges to remove. Can be omitted if there is
-        only one edge type in the graph.
+    etype : str or (str, str, str), optional
+        The type names of the edges. The allowed type name formats are:
+
+        * ``(str, str, str)`` for source node type, edge type and destination node type.
+        * or one ``str`` edge type name if the name can uniquely identify a
+          triplet format in the graph.

-        Its source node type must be the same as its destination node type.
+        Can be omitted if the graph has only one type of edges.

    Return
    ------
    DGLGraph
-        The graph with self-loop.
+        The graph with self-loops.

    Notes
    -----
-    * :func:`add_self_loop` adds self loops regardless of whether the self-loop already exists.
-
-      If you would like to have exactly one self-loop for every node, you would need to
+    * The function only supports homogeneous graphs or heterogeneous graphs but
+      the relation graph specified by the :attr:`etype` argument is homogeneous.
+    * The function adds self-loops regardless of whether they already exist or not.
+      If one wishes to have exactly one self-loop for every node,
      call :func:`remove_self_loop` before invoking :func:`add_self_loop`.
-
-    * Features for the new edges (self-loop edges) will be created with zeros.
+    * Features of the new edges (self-loop edges) will be filled with zeros.

    Examples
    --------
    >>> import dgl
    >>> import torch

-    **Homogeneous Graphs or Heterogeneous Graphs with A Single Node Type**
+    **Homogeneous Graphs**

    >>> g = dgl.graph((torch.tensor([0, 0, 2]), torch.tensor([2, 1, 0])))
    >>> g.ndata['hv'] = torch.arange(3).float().reshape(-1, 1)
@@ -1354,7 +1303,7 @@ def add_self_loop(g, etype=None):
            [0.],
            [0.]])

-    **Heterogeneous Graphs with Multiple Node Types**
+    **Heterogeneous Graphs**

    >>> g = dgl.heterograph({
    ...     ('user', 'follows', 'user'): (torch.tensor([1, 2]),
@@ -1377,20 +1326,28 @@ def add_self_loop(g, etype=None):
    new_g = add_edges(g, nodes, nodes, etype=etype)
    return new_g

-DGLHeteroGraph.add_self_loop = add_self_loop
+DGLHeteroGraph.add_self_loop = utils.alias_func(add_self_loop)

 def remove_self_loop(g, etype=None):
-    r""" Remove self loops for each node in the graph.
-    A new graph with self-loop removed is returned.
-
-    If there are multiple self loops for a certain node,
-    all of them will be removed.
+    r""" Remove self-loops for each node in the graph and return a new graph.

    Parameters
    ----------
-    etype : str or tuple of str, optional
-        The type of the edges to remove. Can be omitted if there is
-        only one edge type in the graph.
+    g : DGLGraph
+        The graph.
+    etype : str or (str, str, str), optional
+        The type names of the edges. The allowed type name formats are:
+
+        * ``(str, str, str)`` for source node type, edge type and destination node type.
+        * or one ``str`` edge type name if the name can uniquely identify a
+          triplet format in the graph.
+
+        Can be omitted if the graph has only one type of edges.
+
+    Notes
+    -----
+    If a node has multiple self-loops, remove them all. Do nothing for nodes without
+    self-loops.

    Examples
    ---------
@@ -1398,7 +1355,7 @@ def remove_self_loop(g, etype=None):
    >>> import dgl
    >>> import torch

-    **Homogeneous Graphs or Heterogeneous Graphs with A Single Node Type**
+    **Homogeneous Graphs**

    >>> g = dgl.graph((torch.tensor([0, 0, 0, 1]), torch.tensor([1, 0, 0, 2])))
    >>> g.edata['he'] = torch.arange(4).float().reshape(-1, 1)
@@ -1409,7 +1366,7 @@ def remove_self_loop(g, etype=None):
    >>> g.edata['he']
    tensor([[0.],[3.]])

-    **Heterogeneous Graphs with Multiple Node Types**
+    **Heterogeneous Graphs**

    >>> g = dgl.heterograph({
    ...     ('user', 'follows', 'user'): (torch.tensor([0, 1, 1, 1, 2]),
@@ -1442,7 +1399,7 @@ def remove_self_loop(g, etype=None):
    new_g = remove_edges(g, self_loop_eids, etype=etype)
    return new_g

-DGLHeteroGraph.remove_self_loop = remove_self_loop
+DGLHeteroGraph.remove_self_loop = utils.alias_func(remove_self_loop)

 def compact_graphs(graphs, always_preserve=None, copy_ndata=True, copy_edata=True):
    """Given a list of graphs with the same set of nodes, find and eliminate the common
@@ -1502,10 +1459,9 @@ def compact_graphs(graphs, always_preserve=None, copy_ndata=True, copy_edata=Tru
    This function currently requires that the same node type of all graphs should have
    the same node type ID, i.e. the node types are *ordered* the same.

-    If :attr:`copy_edata` is True, same tensors will be used for
-    the features of the original graphs and the returned graphs to save memory cost.
-    As a result, users should avoid performing in-place operations on the edge features of
-    the returned graph, which will corrupt the edge features of the original graph as well.
+    If :attr:`copy_edata` is True, the resulting graph will share the edge feature
+    tensors with the input graph. Hence, users should try to avoid in-place operations
+    which will be visible to both graphs.

    Examples
    --------
@@ -1666,6 +1622,13 @@ def to_block(g, dst_nodes=None, include_dst_in_src=True):
        If :attr:`dst_nodes` is specified but it is not a superset of all the nodes that
        have at least one inbound edge.

+    Notes
+    -----
+    :func:`to_block` is most commonly used in customizing neighborhood sampling
+    for stochastic training on a large graph.  Please refer to the user guide
+    :ref:`guide-minibatch` for a more thorough discussion about the methodology
+    of stochastic training.
+
    Examples
    --------
    Converting a homogeneous graph to a block as described above:
@@ -1727,13 +1690,6 @@ def to_block(g, dst_nodes=None, include_dst_in_src=True):

    >>> block.srcnodes['A'].data[dgl.NID]
    tensor([2, 1])
-
-    Notes
-    -----
-    :func:`to_block` is most commonly used in customizing neighborhood sampling
-    for stochastic training on a large graph.  Please refer to User Guide Chapter 6
-    for a more thorough discussion driven by the methodology of stochastic training on a
-    large graph.
    """
    assert g.device == F.cpu(), 'the graph must be on CPU'

@@ -1781,14 +1737,19 @@ def to_block(g, dst_nodes=None, include_dst_in_src=True):

    return new_graph

-def to_simple(g, return_counts='count', writeback_mapping=False, copy_ndata=True, copy_edata=False):
-    r"""Convert a graph to a simple graph, removing the parallel edges.
+def to_simple(g,
+              return_counts='count',
+              writeback_mapping=False,
+              copy_ndata=True,
+              copy_edata=False):
+    r"""Convert a graph to a simple graph without parallel edges and return.

-    For a heterogeneous graph with multiple edge types, DGL removes the parallel edges
-    with the same edge type.
-
-    Optionally, the number of parallel edges and/or the mapping from the edges in the simple graph
-    to the edges in the original graph is returned.
+    For a heterogeneous graph with multiple edge types, DGL treats edges with the same
+    edge type and endpoints as parallel edges and removes them.
+    Optionally, one can get the the number of parallel edges by specifying the
+    :attr:`return_counts` argument. To get the a mapping from the edge IDs in the
+    input graph to the edge IDs in the resulting graph, set :attr:`writeback_mapping`
+    to true.

    Parameters
    ----------
@@ -1801,10 +1762,10 @@ def to_simple(g, return_counts='count', writeback_mapping=False, copy_ndata=True

        (Default: "count")
    writeback_mapping: bool, optional
-        If True, a write-back mapping is returned for each edge
-        type subgraph.  The write-back mapping is a tensor recording
-        the mapping from the IDs of the edges in the new graph to
-        the IDs of the edges in the original graph.  If the graph is
+        If True, return an extra write-back mapping for each edge
+        type.  The write-back mapping is a tensor recording
+        the mapping from the edge IDs in the input graph to
+        the edge IDs in the result graph. If the graph is
        heterogeneous, DGL returns a dictionary of edge types and such
        tensors.

@@ -1833,21 +1794,17 @@ def to_simple(g, return_counts='count', writeback_mapping=False, copy_ndata=True
    DGLGraph
        The graph.
    tensor or dict of tensor
-        The writeback mapping.
-
-        Only returned if ``writeback_mapping`` is True.
+        The writeback mapping. Only when ``writeback_mapping`` is True.

    Notes
    -----
-    If ``copy_ndata`` is ``True``, same tensors will be used for
-    the features of the original graph and the to_simpled graph. As a result, users
-    should avoid performing in-place operations on the features of the to_simpled
-    graph, which will corrupt the features of the original graph as well. For
-    concrete examples, refer to the ``Examples`` section below.
+    If :attr:`copy_ndata` is True, the resulting graph will share the node feature
+    tensors with the input graph. Hence, users should try to avoid in-place operations
+    which will be visible to both graphs.

    Examples
    --------
-    **Homogeneous Graphs or Heterogeneous Graphs with A Single Edge Type**
+    **Homogeneous Graphs**

    Create a graph for demonstrating to_simple API.
    In the original graph, there are multiple edges between 1 and 2.
@@ -1881,24 +1838,7 @@ def to_simple(g, return_counts='count', writeback_mapping=False, copy_ndata=True
    >>> 'h' in g.edata
    False

-    **In-place operations on features of one graph will be reflected on features of
-    the simple graph, which is dangerous. Out-place operations will not be reflected.**
-
-    >>> sg.ndata['h'] += 1
-    >>> g.ndata['h']
-    tensor([[1.],
-            [2.],
-            [3.]])
-    >>> g.ndata['h'] += 1
-    >>> sg.ndata['h']
-    tensor([[2.],
-            [3.],
-            [4.]])
-    >>> sg.ndata['h2'] = th.ones(3, 1)
-    >>> 'h2' in g.ndata
-    False
-
-    **Heterogeneous Graphs with Multiple Edge Types**
+    **Heterogeneous Graphs**

    >>> g = dgl.heterograph({
    ...     ('user', 'wins', 'user'): (th.tensor([0, 2, 0, 2, 2]), th.tensor([1, 1, 2, 1, 0])),
@@ -1968,7 +1908,7 @@ def to_simple(g, return_counts='count', writeback_mapping=False, copy_ndata=True

    return simple_graph

-DGLHeteroGraph.to_simple = to_simple
+DGLHeteroGraph.to_simple = utils.alias_func(to_simple)

 def as_heterograph(g, ntype='_U', etype='_E'):  # pylint: disable=unused-argument
    """Convert a DGLGraph to a DGLHeteroGraph with one node and edge type.

--- a/python/dgl/utils/internal.py
+++ b/python/dgl/utils/internal.py
@@ -891,4 +891,12 @@ def set_num_threads(num_threads):
    """
    _CAPI_DGLSetOMPThreads(num_threads)

+def alias_func(func):
+    """Return an alias function with proper docstring."""
+    @wraps(func)
+    def _fn(*args, **kwargs):
+        return func(*args, **kwargs)
+    _fn.__doc__ = """Alias of :func:`dgl.{}`.""".format(func.__name__)
+    return _fn
+
 _init_api("dgl.utils.internal")