[Doc/Feature] Refactor, doc update and behavior fix for graphs (#1983)

* Update graph * Fix for dgl.graph * from_scipy * Replace canonical_etypes with relations * from_networkx * Update for hetero_from_relations * Roll back the change of canonical_etypes to relations * heterograph * bipartite * Update doc * Fix lint * Fix lint * Fix test cases * Fix * Fix * Fix * Fix * Fix * Fix * Update * Fix test * Fix * Update * Use DGLError * Update * Update * Update * Update * Fix * Fix * Fix * Fix * Fix * Fix * Fix * Fix * Update * Fix * Update * Fix * Fix * Fix * Update * Fix * Update * Fix * Update * Update * Update * Update * Update * Update * Update * Fix * Fix * Update * Update * Update * Update * Update * Update * rewrite sanity checks * delete unnecessary checks * Update * Update * Update * Update * Update * Update * Update * Update * Fix * Update * Update * Update * Fix * Fix * Fix * Update * Fix * Update * Fix * Fix * Update * Fix * Update * Fix Co-authored-by: xiang song(charlie.song) <classicxsong@gmail.com> Co-authored-by: Minjie Wang <wmjlyjemaine@gmail.com> Co-authored-by: Quan Gan <coin2028@hotmail.com>

[Doc/Feature] Refactor, doc update and behavior fix for graphs (#1983)
* Update graph * Fix for dgl.graph * from_scipy * Replace canonical_etypes with relations * from_networkx * Update for hetero_from_relations * Roll back the change of canonical_etypes to relations * heterograph * bipartite * Update doc * Fix lint * Fix lint * Fix test cases * Fix * Fix * Fix * Fix * Fix * Fix * Update * Fix test * Fix * Update * Use DGLError * Update * Update * Update * Update * Fix * Fix * Fix * Fix * Fix * Fix * Fix * Fix * Update * Fix * Update * Fix * Fix * Fix * Update * Fix * Update * Fix * Update * Update * Update * Update * Update * Update * Update * Fix * Fix * Update * Update * Update * Update * Update * Update * rewrite sanity checks * delete unnecessary checks * Update * Update * Update * Update * Update * Update * Update * Update * Fix * Update * Update * Update * Fix * Fix * Fix * Update * Fix * Update * Fix * Fix * Update * Fix * Update * Fix Co-authored-by: xiang song(charlie.song) <classicxsong@gmail.com> Co-authored-by: Minjie Wang <wmjlyjemaine@gmail.com> Co-authored-by: Quan Gan <coin2028@hotmail.com>
be444e52 · Mufei Li · GitHub · 0afc3cf8 · be444e52 · be444e52
Unverified Commit be444e52 authored Aug 18, 2020 by Mufei Li Committed by GitHub Aug 18, 2020
20 changed files
--- a/examples/tensorflow/rgcn/entity_classify.py
+++ b/examples/tensorflow/rgcn/entity_classify.py
@@ -66,7 +66,6 @@ def main(args):
        hg = dataset[0]

        num_rels = len(hg.canonical_etypes)
-        num_of_ntype = len(hg.ntypes)
        category = dataset.predict_category
        num_classes = dataset.num_classes
        train_mask = hg.nodes[category].data.pop('train_mask')
@@ -98,7 +97,7 @@ def main(args):
                category_id = i

        # edge type and normalization factor
-        g = dgl.to_homo(hg)
+        g = dgl.to_homogeneous(hg, edata=['norm'])

    # check cuda
    if args.gpu < 0:

--- a/include/dgl/transform.h
+++ b/include/dgl/transform.h
@@ -89,15 +89,15 @@ ToBlock(HeteroGraphPtr graph, const std::vector<IdArray> &rhs_nodes, bool includ
 * * \c count : The array of edge occurrences per edge type.
 * * \c edge_map : The mapping from original edge IDs to new edge IDs per edge type.
 *
- * \note Example: consider the following graph:
+ * \note Example: consider a graph with the following edges
 *
- *     g = dgl.graph([(0, 1), (1, 3), (2, 2), (1, 3), (1, 4), (1, 4)])
+ *     [(0, 1), (1, 3), (2, 2), (1, 3), (1, 4), (1, 4)]
 *
 * Then ToSimpleGraph(g) would yield the following elements:
 *
- * * The first element would be the simple graph itself:
+ * * The first element would be the simple graph itself with the following edges
 *
- *       simple_g = dgl.graph([(0, 1), (1, 3), (1, 4), (2, 2)])
+ *       [(0, 1), (1, 3), (1, 4), (2, 2)]
 *
 * * The second element is an array \c count.  \c count[i] stands for the number of edges
 *   connecting simple_g.src[i] and simple_g.dst[i] in the original graph.

--- a/python/dgl/_deprecate/graph.py
+++ b/python/dgl/_deprecate/graph.py
@@ -4038,6 +4038,7 @@ class DGLGraph(DGLBaseGraph):
        self._node_frame = old_nframe
        self._edge_frame = old_eframe

+    @property
    def is_homogeneous(self):
        """Return if the graph is homogeneous."""
        return True

--- a/python/dgl/batch.py
+++ b/python/dgl/batch.py
@@ -397,7 +397,7 @@ def unbatch(g, node_split=None, edge_split=None):
                          for i in range(num_split)]

    # Create graphs
-    gs = [convert.heterograph(edge_dict, num_nodes_dict, validate=True, idtype=g.idtype)
+    gs = [convert.heterograph(edge_dict, num_nodes_dict, idtype=g.idtype)
          for edge_dict, num_nodes_dict in zip(edge_dict_per, num_nodes_dict_per)]

    # Unbatch node features

--- a/python/dgl/convert.py
+++ b/python/dgl/convert.py
 """Module for converting graph from/to other object."""
-# pylint: disable=dangerous-default-value
 from collections import defaultdict
+from scipy.sparse import spmatrix
 import numpy as np
 import networkx as nx

@@ -17,397 +17,165 @@ __all__ = [
    'hetero_from_relations',
    'hetero_from_shared_memory',
    'heterograph',
+    'to_heterogeneous',
    'to_hetero',
+    'to_homogeneous',
    'to_homo',
    'from_scipy',
+    'bipartite_from_scipy',
    'from_networkx',
+    'bipartite_from_networkx',
    'to_networkx',
 ]

 def graph(data,
-          ntype='_N', etype='_E',
+          ntype=None, etype=None,
+          *,
          num_nodes=None,
-          validate=True,
-          formats=['coo', 'csr', 'csc'],
          idtype=None,
          device=None,
-          card=None,
          **deprecated_kwargs):
-    """Create a graph with one type of nodes and edges.
-
-    In the sparse matrix perspective, :func:`dgl.graph` creates a graph
-    whose adjacency matrix must be square while :func:`dgl.bipartite`
-    creates a graph that does not necessarily have square adjacency matrix.
+    """Create a graph.

    Parameters
    ----------
    data : graph data
-        Data to initialize graph structure. Supported data formats are
-
-        (1) list of edge pairs (e.g. [(0, 2), (3, 1), ...])
-        (2) pair of vertex IDs representing end nodes (e.g. ([0, 3, ...],  [2, 1, ...]))
-        (3) scipy sparse matrix
-        (4) networkx graph
-
+        The data for constructing a graph, which takes the form of :math:`(U, V)`.
+        :math:`(U[i], V[i])` forms the edge with ID :math:`i` in the graph.
+        The allowed data formats are:
+
+        - ``(Tensor, Tensor)``: Each tensor must be a 1D tensor containing node IDs.
+          DGL calls this format "tuple of node-tensors". The tensors should have the same
+          data type of int32/int64 and device context (see below the descriptions of
+          :attr:`idtype` and :attr:`device`).
+        - ``(iterable[int], iterable[int])``: Similar to the tuple of node-tensors
+          format, but stores node IDs in two sequences (e.g. list, tuple, numpy.ndarray).
    ntype : str, optional
-        Node type name. (Default: _N)
+        Deprecated. To construct a graph with named node types, use :func:`dgl.heterograph`.
    etype : str, optional
-        Edge type name. (Default: _E)
+        Deprecated. To construct a graph with named edge types, use :func:`dgl.heterograph`.
    num_nodes : int, optional
-        Number of nodes in the graph. If None, infer from input data, i.e.
-        the largest node ID plus 1. (Default: None)
-    validate : bool, optional
-        If True, check if node ids are within cardinality, the check process may take
-        some time. (Default: True)
-        If False and card is not None, user would receive a warning.
-    formats : str or list of str
-        It can be ``'coo'``/``'csr'``/``'csc'`` or a sublist of them,
-        Force the storage formats.  Default: ``['coo', 'csr', 'csc']``.
-    idtype : int32, int64, optional
-        Integer ID type. Valid options are int32 or int64. If None, try infer from
-        the given data.
-    device : Device context, optional
-        Device on which the graph is created. Default: infer from data.
-    card : int, optional
-        Deprecated (see :attr:`num_nodes`). Cardinality (number of nodes in the graph).
-        If None, infer from input data, i.e. the largest node ID plus 1. (Default: None)
+        The number of nodes in the graph. If not given, this will be the largest node ID
+        plus 1 from the :attr:`data` argument. If given and the value is no greater than
+        the largest node ID from the :attr:`data` argument, DGL will raise an error.
+    idtype : int32 or int64, optional
+        The data type for storing the structure-related graph information such as node and
+        edge IDs. It should be a framework-specific data type object (e.g., ``torch.int32``).
+        If ``None`` (default), DGL infers the ID type from the :attr:`data` argument.
+        See "Notes" for more details.
+    device : device context, optional
+        The device of the returned graph, which should be a framework-specific device object
+        (e.g., ``torch.device``). If ``None`` (default), DGL uses the device of the tensors of
+        the :attr:`data` argument. If :attr:`data` is not a tuple of node-tensors, the
+        returned graph is on CPU.  If the specified :attr:`device` differs from that of the
+        provided tensors, it casts the given tensors to the specified device first.

    Returns
    -------
-    DGLHeteroGraph
+    DGLGraph
+        The created graph.
+
+    Notes
+    -----
+    1. If the :attr:`idtype` argument is not given then:
+
+       - in the case of the tuple of node-tensor format, DGL uses the
+         data type of the given ID tensors.
+       - in the case of the tuple of sequence format, DGL uses int64.
+
+       Once the graph has been created, you can change the data type by using
+       :func:`dgl.DGLGraph.long` or :func:`dgl.DGLGraph.int`.
+
+       If the specified :attr:`idtype` argument differs from the data type of the provided
+       tensors, it casts the given tensors to the specified data type first.
+    2. The most efficient construction approach is to provide a tuple of node tensors without
+       specifying :attr:`idtype` and :attr:`device`. This is because the returned graph shares
+       the storage with the input node-tensors in this case.
+    3. DGL internally maintains multiple copies of the graph structure in different
+       `sparse formats <https://en.wikipedia.org/wiki/Sparse_matrix>`_ and chooses the most
+       efficient one depending on the computation invoked. If memory usage becomes an issue
+       in the case of large graphs, use :func:`dgl.DGLGraph.formats` to restrict the allowed
+       formats.

    Examples
    --------
-    Create from pairs of edges with form (src, dst)

-    >>> g = dgl.graph([(0, 2), (0, 3), (1, 2)])
+    The following example uses PyTorch backend.

-    Create from source and destination vertex ID lists
+    >>> import dgl
+    >>> import torch

-    >>> u = [0, 0, 1]
-    >>> v = [2, 3, 2]
-    >>> g = dgl.graph((u, v))
+    Create a small three-edge graph.

-    The IDs can also be stored in framework-specific tensors
+    >>> # Source nodes for edges (2, 1), (3, 2), (4, 3)
+    >>> src_ids = torch.tensor([2, 3, 4])
+    >>> # Destination nodes for edges (2, 1), (3, 2), (4, 3)
+    >>> dst_ids = torch.tensor([1, 2, 3])
+    >>> g = dgl.graph((src_ids, dst_ids))

-    >>> import torch
-    >>> u = torch.tensor([0, 0, 1])
-    >>> v = torch.tensor([2, 3, 2])
-    >>> g = dgl.graph((u, v))
+    Explicitly specify the number of nodes in the graph.

-    Create from scipy sparse matrix
+    >>> g = dgl.graph((src_ids, dst_ids), num_nodes=100)

-    >>> from scipy.sparse import coo_matrix
-    >>> spmat = coo_matrix(([1,1,1], ([0, 0, 1], [2, 3, 2])), shape=(4, 4))
-    >>> g = dgl.graph(spmat)
+    Create a graph on the first GPU with data type int32.

-    Create from networkx graph
+    >>> g = dgl.graph((src_ids, dst_ids), idtype=torch.int32, device='cuda:0')

-    >>> import networkx as nx
-    >>> nxg = nx.path_graph(3)
-    >>> g = dgl.graph(nxg)
-
-    Specify node and edge type names
-
-    >>> g = dgl.graph(..., 'user', 'follows')
-    >>> g.ntypes
-    ['user']
-    >>> g.etypes
-    ['follows']
-    >>> g.canonical_etypes
-    [('user', 'follows', 'user')]
-
-    Check if node ids are within num_nodes specified
-
-    >>> g = dgl.graph(([0, 1, 2], [1, 2, 0]), num_nodes=2, validate=True)
-    ...
-    dgl._ffi.base.DGLError: Invalid node id 2 (should be less than cardinality 2).
-    >>> g = dgl.graph(([0, 1, 2], [1, 2, 0]), num_nodes=3, validate=True)
-    Graph(num_nodes=3, num_edges=3,
-          ndata_schemes={}
-          edata_schemes={})
+    See Also
+    --------
+    from_scipy
+    from_networkx
    """
+    # Deprecated arguments
+    if ntype is not None:
+        raise DGLError('The ntype argument is deprecated for dgl.graph. To construct ' \
+                       'a graph with named node types, use dgl.heterograph.')
+    if etype is not None:
+        raise DGLError('The etype argument is deprecated for dgl.graph. To construct ' \
+                       'a graph with named edge types, use dgl.heterograph.')
+
+    if isinstance(data, spmatrix):
+        raise DGLError("dgl.graph no longer supports graph construction from a SciPy "
+                       "sparse matrix, use dgl.from_scipy instead.")
+
+    if isinstance(data, nx.Graph):
+        raise DGLError("dgl.graph no longer supports graph construction from a NetworkX "
+                       "graph, use dgl.from_networkx instead.")
+
    if len(deprecated_kwargs) != 0:
        raise DGLError("Key word arguments {} have been removed from dgl.graph()."
                       " They are moved to dgl.from_scipy() and dgl.from_networkx()."
                       " Please refer to their API documents for more details.".format(
                           deprecated_kwargs.keys()))

-    if isinstance(data, DGLHeteroGraph):
-        return data.astype(idtype).to(device)
-
-    if card is not None:
-        dgl_warning("Argument 'card' will be deprecated. "
-                    "Please use num_nodes={} instead.".format(card))
-        num_nodes = card
-
    u, v, urange, vrange = utils.graphdata2tensors(data, idtype)
    if num_nodes is not None:  # override the number of nodes
+        if num_nodes < max(urange, vrange):
+            raise DGLError('The num_nodes argument must be larger than the max ID in the data,'
+                           ' but got {} and {}.'.format(num_nodes, max(urange, vrange) - 1))
        urange, vrange = num_nodes, num_nodes

-    g = create_from_edges(u, v, ntype, etype, ntype, urange, vrange,
-                          validate, formats=formats)
+    g = create_from_edges(u, v, '_N', '_E', '_N', urange, vrange)

    return g.to(device)

 def bipartite(data,
              utype='_U', etype='_E', vtype='_V',
              num_nodes=None,
-              validate=True,
-              formats=['coo', 'csr', 'csc'],
-              idtype=None,
-              device=None,
              card=None,
-              **deprecated_kwargs):
-    """Create a bipartite graph.
-
-    The result graph is directed and edges must be from ``utype`` nodes
-    to ``vtype`` nodes. Nodes of each type have their own ID counts.
-
-    In the sparse matrix perspective, :func:`dgl.graph` creates a graph
-    whose adjacency matrix must be square while :func:`dgl.bipartite`
-    creates a graph that does not necessarily have square adjacency matrix.
-
-    Parameters
-    ----------
-    data : graph data
-        Data to initialize graph structure. Supported data formats are
-
-        (1) list of edge pairs (e.g. [(0, 2), (3, 1), ...])
-        (2) pair of vertex IDs representing end nodes (e.g. ([0, 3, ...],  [2, 1, ...]))
-        (3) scipy sparse matrix
-        (4) networkx graph
-
-    utype : str, optional
-        Source node type name. (Default: _U)
-    etype : str, optional
-        Edge type name. (Default: _E)
-    vtype : str, optional
-        Destination node type name. (Default: _V)
-    num_nodes : 2-tuple of int, optional
-        Number of nodes in the source and destination group. If None, infer from input data,
-        i.e. the largest node ID plus 1 for each type. (Default: None)
-    validate : bool, optional
-        If True, check if node ids are within cardinality, the check process may take
-        some time. (Default: True)
-        If False and card is not None, user would receive a warning.
-    formats : str or list of str
-        It can be ``'coo'``/``'csr'``/``'csc'`` or a sublist of them,
-        Force the storage formats.  Default: ``['coo', 'csr', 'csc']``.
-    idtype : int32, int64, optional
-        Integer ID type. Valid options are int32 or int64. If None, try infer from
-        the given data.
-    device : Device context, optional
-        Device on which the graph is created. Default: infer from data.
-    card : 2-tuple of int, optional
-        Deprecated (see :attr:`num_nodes`). Cardinality (number of nodes in the source and
-        destination group). If None, infer from input data, i.e. the largest node ID plus 1
-        for each type. (Default: None)
-
-    Returns
-    -------
-    DGLHeteroGraph
-
-    Examples
-    --------
-    Create from pairs of edges
-
-    >>> g = dgl.bipartite([(0, 2), (0, 3), (1, 2)], 'user', 'plays', 'game')
-    >>> g.ntypes
-    ['user', 'game']
-    >>> g.etypes
-    ['plays']
-    >>> g.canonical_etypes
-    [('user', 'plays', 'game')]
-    >>> g.number_of_nodes('user')
-    2
-    >>> g.number_of_nodes('game')
-    4
-    >>> g.number_of_edges('plays')  # 'plays' could be omitted here
-    3
-
-    Create from source and destination vertex ID lists
-
-    >>> u = [0, 0, 1]
-    >>> v = [2, 3, 2]
-    >>> g = dgl.bipartite((u, v))
-
-    The IDs can also be stored in framework-specific tensors
-
-    >>> import torch
-    >>> u = torch.tensor([0, 0, 1])
-    >>> v = torch.tensor([2, 3, 2])
-    >>> g = dgl.bipartite((u, v))
-
-    Create from scipy sparse matrix. Since scipy sparse matrix has explicit
-    shape, the cardinality of the result graph is derived from that.
-
-    >>> from scipy.sparse import coo_matrix
-    >>> spmat = coo_matrix(([1,1,1], ([0, 0, 1], [2, 3, 2])), shape=(4, 4))
-    >>> g = dgl.bipartite(spmat, 'user', 'plays', 'game')
-    >>> g.number_of_nodes('user')
-    4
-    >>> g.number_of_nodes('game')
-    4
-
-    Create from networkx graph. The given graph must follow the bipartite
-    graph convention in networkx. Each node has a ``bipartite`` attribute
-    with values 0 or 1. The result graph has two types of nodes and only
-    edges from ``bipartite=0`` to ``bipartite=1`` will be included.
-
-    >>> import networkx as nx
-    >>> nxg = nx.complete_bipartite_graph(3, 4)
-    >>> g = dgl.bipartite(nxg, 'user', 'plays', 'game')
-    >>> g.number_of_nodes('user')
-    3
-    >>> g.number_of_nodes('game')
-    4
-    >>> g.edges()
-    (tensor([0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2]), tensor([0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3]))
-
-    Check if node ids are within num_nodes specified
-
-    >>> g = dgl.bipartite(([0, 1, 2], [1, 2, 3]), num_nodes=(2, 4), validate=True)
-    ...
-    dgl._ffi.base.DGLError: Invalid node id 2 (should be less than cardinality 2).
-    >>> g = dgl.bipartite(([0, 1, 2], [1, 2, 3]), num_nodes=(3, 4), validate=True)
-    >>> g
-    Graph(num_nodes={'_U': 3, '_V': 4},
-          num_edges={('_U', '_E', '_V'): 3},
-          metagraph=[('_U', '_V')])
-    """
-    if len(deprecated_kwargs) != 0:
-        raise DGLError("Key word arguments {} have been removed from dgl.graph()."
-                       " They are moved to dgl.from_scipy() and dgl.from_networkx()."
-                       " Please refer to their API documents for more details.".format(
-                           deprecated_kwargs.keys()))
-
-    if utype == vtype:
-        raise DGLError('utype should not be equal to vtype. Use ``dgl.graph`` instead.')
-    if card is not None:
-        dgl_warning("Argument 'card' will be deprecated. "
-                    "Please use num_nodes={} instead.".format(card))
-        num_nodes = card
-
-    u, v, urange, vrange = utils.graphdata2tensors(data, idtype, bipartite=True)
-    if num_nodes is not None:  # override the number of nodes
-        urange, vrange = num_nodes
-
-    g = create_from_edges(
-        u, v, utype, etype, vtype, urange, vrange, validate,
-        formats=formats)
-
-    return g.to(device)
+              validate=True,
+              restrict_format='any',
+              **kwargs):
+    """DEPRECATED: use dgl.heterograph instead."""
+    raise DGLError(
+        'dgl.bipartite is deprecated. Use dgl.heterograph({' +
+        "('{}', '{}', '{}')".format(utype, etype, vtype) +
+        ' : data} to create a bipartite graph instead.')

 def hetero_from_relations(rel_graphs, num_nodes_per_type=None):
-    """Create a heterograph from graphs representing connections of each relation.
-
-    The input is a list of heterographs where the ``i``th graph contains edges of type
-    :math:`(s_i, e_i, d_i)`.
-
-    If two graphs share a same node type, the number of nodes for the corresponding type
-    should be the same. See **Examples** for details.
-
-    Parameters
-    ----------
-    rel_graphs : list of DGLHeteroGraph
-        Each element corresponds to a heterograph for one (src, edge, dst) relation.
-    num_nodes_per_type : dict[str, Tensor], optional
-        Number of nodes per node type.  If not given, DGL will infer the number of nodes
-        from the given relation graphs.
-
-    Returns
-    -------
-    DGLHeteroGraph
-        A heterograph consisting of all relations.
-
-    Examples
-    --------
-
-    >>> import dgl
-    >>> follows_g = dgl.graph([(0, 1), (1, 2)], 'user', 'follows')
-    >>> plays_g = dgl.bipartite([(0, 0), (3, 1)], 'user', 'plays', 'game')
-    >>> devs_g = dgl.bipartite([(0, 0), (1, 1)], 'developer', 'develops', 'game')
-    >>> g = dgl.hetero_from_relations([follows_g, plays_g, devs_g])
-
-    will raise an error as we have 3 nodes of type 'user' in follows_g and 4 nodes of type
-    'user' in plays_g.
-
-    We have two possible methods to avoid the construction.
-
-    **Method 1**: Manually specify the number of nodes for all types when constructing
-    the relation graphs.
-
-    >>> # A graph with 4 nodes of type 'user'
-    >>> follows_g = dgl.graph([(0, 1), (1, 2)], 'user', 'follows', num_nodes=4)
-    >>> # A bipartite graph with 4 nodes of src type ('user') and 2 nodes of dst type ('game')
-    >>> plays_g = dgl.bipartite([(0, 0), (3, 1)], 'user', 'plays', 'game', num_nodes=(4, 2))
-    >>> devs_g = dgl.bipartite([(0, 0), (1, 1)], 'developer', 'develops', 'game')
-    >>> g = dgl.hetero_from_relations([follows_g, plays_g, devs_g])
-    >>> print(g)
-    Graph(num_nodes={'user': 4, 'game': 2, 'developer': 2},
-          num_edges={('user', 'follows', 'user'): 2, ('user', 'plays', 'game'): 2,
-                     ('developer', 'develops', 'game'): 2},
-          metagraph=[('user', 'user'), ('user', 'game'), ('developer', 'game')])
-
-    ``devs_g`` does not have nodes of type ``'user'`` so no error will be raised.
-
-    **Method 2**: Construct a heterograph at once without intermediate relation graphs,
-    in which case we will infer the number of nodes for each type.
-
-    >>> g = dgl.heterograph({
-    >>>     ('user', 'follows', 'user'): [(0, 1), (1, 2)],
-    >>>     ('user', 'plays', 'game'): [(0, 0), (3, 1)],
-    >>>     ('developer', 'develops', 'game'): [(0, 0), (1, 1)]
-    >>> })
-    >>> print(g)
-    Graph(num_nodes={'user': 4, 'game': 2, 'developer': 2},
-          num_edges={('user', 'follows', 'user'): 2,
-                     ('user', 'plays', 'game'): 2,
-                     ('developer', 'develops', 'game'): 2},
-          metagraph=[('user', 'user'), ('user', 'game'), ('developer', 'game')])
-    """
-    utils.check_all_same_idtype(rel_graphs, 'rel_graphs')
-    utils.check_all_same_device(rel_graphs, 'rel_graphs')
-    # TODO(minjie): this API can be generalized as a union operation of the input graphs
-    # TODO(minjie): handle node/edge data
-    # infer meta graph
-    meta_edges_src, meta_edges_dst = [], []
-    ntypes = []
-    etypes = []
-    # TODO(BarclayII): I'm keeping the node type names sorted because even if
-    # the metagraph is the same, the same node type name in different graphs may
-    # map to different node type IDs.
-    # In the future, we need to lower the type names into C++.
-    if num_nodes_per_type is None:
-        ntype_set = set()
-        for rgrh in rel_graphs:
-            assert len(rgrh.etypes) == 1
-            stype, etype, dtype = rgrh.canonical_etypes[0]
-            ntype_set.add(stype)
-            ntype_set.add(dtype)
-        ntypes = list(sorted(ntype_set))
-    else:
-        ntypes = list(sorted(num_nodes_per_type.keys()))
-        num_nodes_per_type = utils.toindex([num_nodes_per_type[ntype] for ntype in ntypes], "int64")
-    ntype_dict = {ntype: i for i, ntype in enumerate(ntypes)}
-    for rgrh in rel_graphs:
-        stype, etype, dtype = rgrh.canonical_etypes[0]
-        meta_edges_src.append(ntype_dict[stype])
-        meta_edges_dst.append(ntype_dict[dtype])
-        etypes.append(etype)
-    # metagraph is DGLGraph, currently still using int64 as index dtype
-    metagraph = graph_index.from_coo(len(ntypes), meta_edges_src, meta_edges_dst, True)
-
-    # create graph index
-    hgidx = heterograph_index.create_heterograph_from_relations(
-        metagraph, [rgrh._graph for rgrh in rel_graphs], num_nodes_per_type)
-    retg = DGLHeteroGraph(hgidx, ntypes, etypes)
-    for i, rgrh in enumerate(rel_graphs):
-        for ntype in rgrh.ntypes:
-            retg.nodes[ntype].data.update(rgrh.nodes[ntype].data)
-        retg._edge_frames[i].update(rgrh._edge_frames[0])
-    return retg
+    """DEPRECATED: use dgl.heterograph instead."""
+    raise DGLError('dgl.hetero_from_relations is deprecated.\n\n'
+                   'Use dgl.heterograph instead.')

 def hetero_from_shared_memory(name):
    """Create a heterograph from shared memory with the given name.
@@ -429,94 +197,163 @@ def hetero_from_shared_memory(name):

 def heterograph(data_dict,
                num_nodes_dict=None,
-                validate=True,
-                formats=['coo', 'csr', 'csc'],
                idtype=None,
                device=None):
-    """Create a heterogeneous graph from a dictionary between edge types and edge lists.
+    """Create a heterogeneous graph.

    Parameters
    ----------
-    data_dict : dict
-        The dictionary between edge types and edge list data.
+    data_dict : graph data
+        The dictionary data for constructing a heterogeneous graph. The keys are in the form of
+        string triplets (src_type, edge_type, dst_type), specifying the source node,
+        edge, and destination node types. The values are graph data in the form of
+        :math:`(U, V)`, where :math:`(U[i], V[i])` forms the edge with ID :math:`i`.
+        The allowed graph data formats are:
+
+        - ``(Tensor, Tensor)``: Each tensor must be a 1D tensor containing node IDs. DGL calls
+          this format "tuple of node-tensors". The tensors should have the same data type,
+          which must be either int32 or int64. They should also have the same device context
+          (see below the descriptions of :attr:`idtype` and :attr:`device`).
+        - ``(iterable[int], iterable[int])``: Similar to the tuple of node-tensors
+          format, but stores node IDs in two sequences (e.g. list, tuple, numpy.ndarray).
+    num_nodes_dict : dict[str, int], optional
+        The number of nodes for some node types, which is a dictionary mapping a node type
+        :math:`T` to the number of :math:`T`-typed nodes. If not given for a node type
+        :math:`T`, DGL finds the largest ID appearing in *every* graph data whose source
+        or destination node type is :math:`T`, and sets the number of nodes to be that ID
+        plus one. If given and the value is no greater than the largest ID for some node type,
+        DGL will raise an error. By default, DGL infers the number of nodes for all node types.
+    idtype : int32 or int64, optional
+        The data type for storing the structure-related graph information such as node and
+        edge IDs. It should be a framework-specific data type object (e.g., ``torch.int32``).
+        If ``None`` (default), DGL infers the ID type from the :attr:`data_dict` argument.
+    device : device context, optional
+        The device of the returned graph, which should be a framework-specific device object
+        (e.g., ``torch.device``). If ``None`` (default), DGL uses the device of the tensors of
+        the :attr:`data` argument. If :attr:`data` is not a tuple of node-tensors, the
+        returned graph is on CPU.  If the specified :attr:`device` differs from that of the
+        provided tensors, it casts the given tensors to the specified device first.

-        The edge types are specified as a triplet of (source node type name, edge type
-        name, destination node type name).
+    Returns
+    -------
+    DGLGraph
+        The created graph.

-        The edge list data can be anything acceptable by :func:`dgl.graph` or
-        :func:`dgl.bipartite`, or objects returned by the two functions themselves.
-    num_nodes_dict : dict[str, int]
-        The number of nodes for each node type.
+    Notes
+    -----
+    1. If the :attr:`idtype` argument is not given then:

-        By default DGL infers the number of nodes for each node type from ``data_dict``
-        by taking the maximum node ID plus one for each node type.
-    validate : bool, optional
-        If True, check if node ids are within cardinality, the check process may take
-        some time. (Default: True)
-        If False and num_nodes_dict is not None, user would receive a warning.
-    formats : str or list of str
-        It can be ``'coo'``/``'csr'``/``'csc'`` or a sublist of them,
-        Force the storage formats.  Default: ``['coo', 'csr', 'csc']``.
-    idtype : int32, int64, optional
-        Integer ID type. Valid options are int32 or int64. If None, try infer from
-        the given data.
-    device : Device context, optional
-        Device on which the graph is created. Default: infer from data.
+       - in the case of the tuple of node-tensor format, DGL uses
+         the data type of the given ID tensors.
+       - in the case of the tuple of sequence format, DGL uses int64.

-    Returns
-    -------
-    DGLHeteroGraph
+       Once the graph has been created, you can change the data type by using
+       :func:`dgl.DGLGraph.long` or :func:`dgl.DGLGraph.int`.
+
+       If the specified :attr:`idtype` argument differs from the data type of the provided
+       tensors, it casts the given tensors to the specified data type first.
+    2. The most efficient construction approach is to provide a tuple of node tensors without
+       specifying :attr:`idtype` and :attr:`device`. This is because the returned graph shares
+       the storage with the input node-tensors in this case.
+    3. DGL internally maintains multiple copies of the graph structure in different sparse
+       formats and chooses the most efficient one depending on the computation invoked.
+       If memory usage becomes an issue in the case of large graphs, use
+       :func:`dgl.DGLGraph.formats` to restrict the allowed formats.

    Examples
    --------
-    >>> g = dgl.heterograph({
-    ...     ('user', 'follows', 'user'): [(0, 1), (1, 2)],
-    ...     ('user', 'plays', 'game'): [(0, 0), (1, 0), (1, 1), (2, 1)],
-    ...     ('developer', 'develops', 'game'): [(0, 0), (1, 1)],
-    ...     })
+
+    The following example uses PyTorch backend.
+
+    >>> import dgl
+    >>> import torch
+
+    Create a heterograph with three canonical edge types.
+
+    >>> data_dict = {
+    ...     ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])),
+    ...     ('user', 'follows', 'topic'): (torch.tensor([1, 1]), torch.tensor([1, 2])),
+    ...     ('user', 'plays', 'game'): (torch.tensor([0, 3]), torch.tensor([3, 4]))
+    ... }
+    >>> g = dgl.heterograph(data_dict)
+    >>> g
+    Graph(num_nodes={'game': 5, 'topic': 3, 'user': 4},
+          num_edges={('user', 'follows', 'user'): 2, ('user', 'follows', 'topic'): 2,
+                     ('user', 'plays', 'game'): 2},
+          metagraph=[('user', 'user', 'follows'), ('user', 'topic', 'follows'),
+                     ('user', 'game', 'plays')])
+
+    Explicitly specify the number of nodes for each node type in the graph.
+
+    >>> num_nodes_dict = {'user': 4, 'topic': 4, 'game': 6}
+    >>> g = dgl.heterograph(data_dict, num_nodes_dict=num_nodes_dict)
+
+    Create a graph on the first GPU with data type int32.
+
+    >>> g = dgl.heterograph(data_dict, idtype=torch.int32, device='cuda:0')
    """
-    # Try infer idtype
-    if idtype is None:
-        for data in data_dict.values():
-            if isinstance(data, tuple) and len(data) == 2 and F.is_tensor(data[0]):
-                idtype = F.dtype(data[0])
-                break
-
-    # Convert all data to edge tensors first.
-    data_dict = {(sty, ety, dty) : utils.graphdata2tensors(data, idtype, bipartite=(sty != dty))
-                 for (sty, ety, dty), data in data_dict.items()}
-
-    # infer number of nodes for each node type
+    # Convert all data to node tensors first
+    node_tensor_dict = {}
+    need_infer = num_nodes_dict is None
    if num_nodes_dict is None:
        num_nodes_dict = defaultdict(int)
-        for (srctype, etype, dsttype), data in data_dict.items():
-            _, _, nsrc, ndst = data
-            num_nodes_dict[srctype] = max(num_nodes_dict[srctype], nsrc)
-            num_nodes_dict[dsttype] = max(num_nodes_dict[dsttype], ndst)
+    for (sty, ety, dty), data in data_dict.items():
+        if isinstance(data, spmatrix):
+            raise DGLError("dgl.heterograph no longer supports graph construction from a SciPy "
+                           "sparse matrix, use dgl.from_scipy instead.")
+
+        if isinstance(data, nx.Graph):
+            raise DGLError("dgl.heterograph no longer supports graph construction from a NetworkX "
+                           "graph, use dgl.from_networkx instead.")
+        is_bipartite = (sty != dty)
+        u, v, urange, vrange = utils.graphdata2tensors(data, idtype, bipartite=is_bipartite)
+        node_tensor_dict[(sty, ety, dty)] = (u, v)
+        if need_infer:
+            num_nodes_dict[sty] = max(num_nodes_dict[sty], urange)
+            num_nodes_dict[dty] = max(num_nodes_dict[dty], vrange)
+        else:  # sanity check
+            if num_nodes_dict[sty] < urange:
+                raise DGLError('The given number of nodes of node type {} must be larger than'
+                               ' the max ID in the data, but got {} and {}.'.format(
+                                   sty, num_nodes_dict[sty], urange - 1))
+            if num_nodes_dict[dty] < vrange:
+                raise DGLError('The given number of nodes of node type {} must be larger than'
+                               ' the max ID in the data, but got {} and {}.'.format(
+                                   sty, num_nodes_dict[dty], vrange - 1))
+    # Create the graph
+
+    # Sort the ntypes and relation tuples to have a deterministic order for the same set
+    # of type names.
+    ntypes = list(sorted(num_nodes_dict.keys()))
+    relations = list(sorted(node_tensor_dict.keys()))
+
+    num_nodes_per_type = utils.toindex([num_nodes_dict[ntype] for ntype in ntypes], "int64")
+    ntype_dict = {ntype: i for i, ntype in enumerate(ntypes)}

+    meta_edges_src = []
+    meta_edges_dst = []
+    etypes = []
    rel_graphs = []
-    for (srctype, etype, dsttype), data in data_dict.items():
-        u, v, _, _ = data
-        if srctype == dsttype:
-            rel_graphs.append(graph(
-                (u, v), srctype, etype,
-                num_nodes=num_nodes_dict[srctype],
-                validate=validate,
-                formats=formats,
-                idtype=idtype, device=device))
-        else:
-            rel_graphs.append(bipartite(
-                (u, v), srctype, etype, dsttype,
-                num_nodes=(num_nodes_dict[srctype], num_nodes_dict[dsttype]),
-                validate=validate,
-                formats=formats,
-                idtype=idtype, device=device))
+    for srctype, etype, dsttype in relations:
+        meta_edges_src.append(ntype_dict[srctype])
+        meta_edges_dst.append(ntype_dict[dsttype])
+        etypes.append(etype)
+        src, dst = node_tensor_dict[(srctype, etype, dsttype)]
+        g = create_from_edges(src, dst, srctype, etype, dsttype,
+                              num_nodes_dict[srctype], num_nodes_dict[dsttype])
+        rel_graphs.append(g)

-    return hetero_from_relations(rel_graphs, num_nodes_dict)
+    # metagraph is DGLGraph, currently still using int64 as index dtype
+    metagraph = graph_index.from_coo(len(ntypes), meta_edges_src, meta_edges_dst, True)
+    # create graph index
+    hgidx = heterograph_index.create_heterograph_from_relations(
+        metagraph, [rgrh._graph for rgrh in rel_graphs], num_nodes_per_type)
+    retg = DGLHeteroGraph(hgidx, ntypes, etypes)

+    return retg.to(device)

-def to_hetero(G, ntypes, etypes, ntype_field=NTYPE, etype_field=ETYPE,
-              metagraph=None):
+def to_heterogeneous(G, ntypes, etypes, ntype_field=NTYPE,
+                     etype_field=ETYPE, metagraph=None):
    """Convert the given homogeneous graph to a heterogeneous graph.

    The input graph should have only one type of nodes and edges. Each node and edge
@@ -531,10 +368,13 @@ def to_hetero(G, ntypes, etypes, ntype_field=NTYPE, etype_field=ETYPE,
    (0, ty_A, 1) and (2, ty_B, 3). In another word, these two edges share the same edge
    type name, but can be distinguished by a canonical edge type tuple.

+    This function will copy any node/edge features from :attr:`G` to the returned heterogeneous
+    graph, except for node/edge types and IDs used to recover the heterogeneous graph.
+
    Parameters
    ----------
-    G : DGLHeteroGraph
-        Input homogeneous graph.
+    G : DGLGraph
+        The homogeneous graph.
    ntypes : list of str
        The node type names.
    etypes : list of str
@@ -551,8 +391,8 @@ def to_hetero(G, ntypes, etypes, ntype_field=NTYPE, etype_field=ETYPE,

    Returns
    -------
-    DGLHeteroGraph
-        A heterograph. The parent node and edge ID are stored in the column
+    DGLGraph
+        A heterogeneous graph. The parent node and edge ID are stored in the column
        ``dgl.NID`` and ``dgl.EID`` respectively for all node/edge types.

    Notes
@@ -568,47 +408,47 @@ def to_hetero(G, ntypes, etypes, ntype_field=NTYPE, etype_field=ETYPE,
    Examples
    --------

-    >>> g1 = dgl.bipartite([(0, 1), (1, 2)], 'user', 'develops', 'activity')
-    >>> g2 = dgl.bipartite([(0, 0), (1, 1)], 'developer', 'develops', 'game')
-    >>> hetero_g = dgl.hetero_from_relations([g1, g2])
-    >>> print(hetero_g)
+    >>> import dgl
+    >>> hg = dgl.heterograph({
+    ...     ('user', 'develops', 'activity'): ([0, 1], [1, 2]),
+    ...     ('developer', 'develops', 'game'): ([0, 1], [0, 1])
+    ... })
+    >>> print(hg)
    Graph(num_nodes={'user': 2, 'activity': 3, 'developer': 2, 'game': 2},
          num_edges={('user', 'develops', 'activity'): 2, ('developer', 'develops', 'game'): 2},
          metagraph=[('user', 'activity'), ('developer', 'game')])

    We first convert the heterogeneous graph to a homogeneous graph.

-    >>> homo_g = dgl.to_homo(hetero_g)
-    >>> print(homo_g)
+    >>> g = dgl.to_homogeneous(hg)
+    >>> print(g)
    Graph(num_nodes=9, num_edges=4,
          ndata_schemes={'_TYPE': Scheme(shape=(), dtype=torch.int64),
                         '_ID': Scheme(shape=(), dtype=torch.int64)}
          edata_schemes={'_TYPE': Scheme(shape=(), dtype=torch.int64),
                         '_ID': Scheme(shape=(), dtype=torch.int64)})
-    >>> homo_g.ndata
+    >>> g.ndata
    {'_TYPE': tensor([0, 0, 1, 1, 1, 2, 2, 3, 3]), '_ID': tensor([0, 1, 0, 1, 2, 0, 1, 0, 1])}
    Nodes 0, 1 for 'user', 2, 3, 4 for 'activity', 5, 6 for 'developer', 7, 8 for 'game'
-    >>> homo_g.edata
+    >>> g.edata
    {'_TYPE': tensor([0, 0, 1, 1]), '_ID': tensor([0, 1, 0, 1])}
    Edges 0, 1 for ('user', 'develops', 'activity'), 2, 3 for ('developer', 'develops', 'game')

    Now convert the homogeneous graph back to a heterogeneous graph.

-    >>> hetero_g_2 = dgl.to_hetero(homo_g, hetero_g.ntypes, hetero_g.etypes)
-    >>> print(hetero_g_2)
+    >>> hg_2 = dgl.to_heterogeneous(g, hg.ntypes, hg.etypes)
+    >>> print(hg_2)
    Graph(num_nodes={'user': 2, 'activity': 3, 'developer': 2, 'game': 2},
          num_edges={('user', 'develops', 'activity'): 2, ('developer', 'develops', 'game'): 2},
          metagraph=[('user', 'activity'), ('developer', 'game')])

    See Also
    --------
-    dgl.to_homo
+    to_homogeneous
    """
-    # TODO(minjie): use hasattr to support DGLGraph input; should be fixed once
-    #  DGLGraph is merged with DGLHeteroGraph
    if (hasattr(G, 'ntypes') and len(G.ntypes) > 1
            or hasattr(G, 'etypes') and len(G.etypes) > 1):
-        raise DGLError('The input graph should be homogenous and have only one '
+        raise DGLError('The input graph should be homogeneous and have only one '
                       ' type of nodes and edges.')

    num_ntypes = len(ntypes)
@@ -660,48 +500,49 @@ def to_hetero(G, ntypes, etypes, ntype_field=NTYPE, etype_field=ETYPE,
        etype_mask = (edge_ctids[None, :] == canonical_etids[:, None]).all(2)
    edge_groups = [etype_mask[i].nonzero()[0] for i in range(len(canonical_etids))]

-    rel_graphs = []
+    data_dict = dict()
+    canonical_etypes = []
    for i, (stid, etid, dtid) in enumerate(canonical_etids):
        src_of_etype = src_local[edge_groups[i]]
        dst_of_etype = dst_local[edge_groups[i]]
-        if stid == dtid:
-            rel_graph = graph(
-                (src_of_etype, dst_of_etype), ntypes[stid], etypes[etid],
-                num_nodes=ntype_count[stid], validate=False,
-                idtype=idtype, device=device)
-        else:
-            rel_graph = bipartite(
-                (src_of_etype,
-                 dst_of_etype), ntypes[stid], etypes[etid], ntypes[dtid],
-                num_nodes=(ntype_count[stid], ntype_count[dtid]),
-                validate=False, idtype=idtype, device=device)
-        rel_graphs.append(rel_graph)
-
-    hg = hetero_from_relations(rel_graphs,
-                               {ntype: count for ntype, count in zip(
-                                   ntypes, ntype_count)})
+        canonical_etypes.append((ntypes[stid], etypes[etid], ntypes[dtid]))
+        data_dict[canonical_etypes[-1]] = \
+            (src_of_etype, dst_of_etype)
+    hg = heterograph(data_dict,
+                     {ntype: count for ntype, count in zip(ntypes, ntype_count)},
+                     idtype=idtype, device=device)

    ntype2ngrp = {ntype : node_groups[ntid] for ntid, ntype in enumerate(ntypes)}

    # features
    for key, data in G.ndata.items():
+        if key in [ntype_field, NID]:
+            continue
        for ntid, ntype in enumerate(hg.ntypes):
            rows = F.copy_to(F.tensor(ntype2ngrp[ntype]), F.context(data))
            hg._node_frames[ntid][key] = F.gather_row(data, rows)
+
    for key, data in G.edata.items():
+        if key in [etype_field, EID]:
+            continue
        for etid in range(len(hg.canonical_etypes)):
            rows = F.copy_to(F.tensor(edge_groups[etid]), F.context(data))
-            hg._edge_frames[etid][key] = F.gather_row(data, rows)
+            hg._edge_frames[hg.get_etype_id(canonical_etypes[etid])][key] = \
+                F.gather_row(data, rows)

-    for ntid, ntype in enumerate(hg.ntypes):
-        hg._node_frames[ntid][NID] = F.tensor(ntype2ngrp[ntype])
+    return hg

-    for etid in range(len(hg.canonical_etypes)):
-        hg._edge_frames[etid][EID] = F.tensor(edge_groups[etid])
+def to_hetero(G, ntypes, etypes, ntype_field=NTYPE, etype_field=ETYPE,
+              metagraph=None):
+    """Convert the given homogeneous graph to a heterogeneous graph.

-    return hg
+    DEPRECATED: Please use to_heterogeneous
+    """
+    dgl_warning("dgl.to_hetero is deprecated. Please use dgl.to_heterogeneous")
+    return to_heterogeneous(G, ntypes, etypes, ntype_field=ntype_field,
+                            etype_field=etype_field, metagraph=metagraph)

-def to_homo(G):
+def to_homogeneous(G, ndata=None, edata=None):
    """Convert the given heterogeneous graph to a homogeneous graph.

    The returned graph has only one type of nodes and edges.
@@ -710,34 +551,62 @@ def to_homo(G):
    is an integer representing the type id, which can be used to retrieve the type
    names stored in ``G.ntypes`` and ``G.etypes`` arguments.

+    If all
+
    Parameters
    ----------
-    G : DGLHeteroGraph
-        Input heterogeneous graph.
+    G : DGLGraph
+        The heterogeneous graph.
+    ndata : list[str], optional
+        The node features to combine across all node types. For each feature ``feat`` in
+        :attr:`ndata`, it concatenates ``G.nodes[T].data[feat]`` across all node types ``T``.
+        As a result, the feature ``feat`` of all node types should have the same shape and
+        data type. By default, the returned graph will not have any node features.
+    edata : list[str], optional
+        The edge features to combine across all edge types. For each feature ``feat`` in
+        :attr:`edata`, it concatenates ``G.edges[T].data[feat]`` across all edge types ``T``.
+        As a result, the feature ``feat`` of all edge types should have the same shape and
+        data type. By default, the returned graph will not have any edge features.

    Returns
    -------
-    DGLHeteroGraph
+    DGLGraph
        A homogeneous graph. The parent node and edge type/ID are stored in
        columns ``dgl.NTYPE/dgl.NID`` and ``dgl.ETYPE/dgl.EID`` respectively.

    Examples
    --------

-    >>> follows_g = dgl.graph([(0, 1), (1, 2)], 'user', 'follows')
-    >>> devs_g = dgl.bipartite([(0, 0), (1, 1)], 'developer', 'develops', 'game')
-    >>> hetero_g = dgl.hetero_from_relations([follows_g, devs_g])
-    >>> homo_g = dgl.to_homo(hetero_g)
-    >>> homo_g.ndata
+    The following example uses PyTorch backend.
+
+    >>> import dgl
+    >>> import torch
+
+    >>> hg = dgl.heterograph({
+    ...     ('user', 'follows', 'user'): ([0, 1], [1, 2]),
+    ...     ('developer', 'develops', 'game'): ([0, 1], [0, 1])
+    ...     })
+    >>> hg.nodes['user'].data['h'] = torch.ones(3, 1)
+    >>> hg.nodes['developer'].data['h'] = torch.zeros(2, 1)
+    >>> hg.nodes['game'].data['h'] = torch.ones(2, 1)
+    >>> g = dgl.to_homogeneous(hg)
+    >>> # The first three nodes are for 'user', the next two are for 'developer',
+    >>> # and the last two are for 'game'
+    >>> g.ndata
    {'_TYPE': tensor([0, 0, 0, 1, 1, 2, 2]), '_ID': tensor([0, 1, 2, 0, 1, 0, 1])}
-    First three nodes for 'user', next two for 'developer' and the last two for 'game'
-    >>> homo_g.edata
+    >>> # The first two edges are for 'follows', and the next two are for 'develops' edges.
+    >>> g.edata
    {'_TYPE': tensor([0, 0, 1, 1]), '_ID': tensor([0, 1, 0, 1])}
-    First two edges for 'follows', next two for 'develops'
+
+    Combine feature 'h' across all node types in the conversion.
+
+    >>> g = dgl.to_homogeneous(hg, ndata=['h'])
+    >>> g.ndata['h']
+    tensor([[1.], [1.], [1.], [0.], [0.], [1.], [1.]])

    See Also
    --------
-    dgl.to_hetero
+    to_heterogeneous
    """
    num_nodes_per_ntype = [G.number_of_nodes(ntype) for ntype in G.ntypes]
    offset_per_ntype = np.insert(np.cumsum(num_nodes_per_ntype), 0, 0)
@@ -767,11 +636,15 @@ def to_homo(G):
        eids.append(F.arange(0, num_edges, G.idtype))

    retg = graph((F.cat(srcs, 0), F.cat(dsts, 0)), num_nodes=total_num_nodes,
-                 validate=False, idtype=G.idtype, device=G.device)
+                 idtype=G.idtype, device=G.device)

    # copy features
-    comb_nf = combine_frames(G._node_frames, range(len(G.ntypes)))
-    comb_ef = combine_frames(G._edge_frames, range(len(G.etypes)))
+    if ndata is None:
+        ndata = []
+    if edata is None:
+        edata = []
+    comb_nf = combine_frames(G._node_frames, range(len(G.ntypes)), col_names=ndata)
+    comb_ef = combine_frames(G._edge_frames, range(len(G.etypes)), col_names=edata)
    if comb_nf is not None:
        retg.ndata.update(comb_nf)
    if comb_ef is not None:
@@ -785,89 +658,317 @@ def to_homo(G):

    return retg

+def to_homo(G):
+    """Convert the given heterogeneous graph to a homogeneous graph.
+
+    DEPRECATED: Please use to_homogeneous
+    """
+    dgl_warning("dgl.to_homo is deprecated. Please use dgl.to_homogeneous")
+    return to_homogeneous(G)
+
 def from_scipy(sp_mat,
-               ntype='_N', etype='_E',
               eweight_name=None,
-               formats=['coo', 'csr', 'csc'],
-               idtype=None):
-    """Create a DGLGraph from a SciPy sparse matrix.
+               idtype=None,
+               device=None):
+    """Create a graph from a SciPy sparse matrix.

    Parameters
    ----------
-    sp_mat : SciPy sparse matrix
-        SciPy sparse matrix.
-    ntype : str
-        Type name for both source and destination nodes
-    etype : str
-        Type name for edges
+    sp_mat : scipy.sparse.spmatrix
+        The graph adjacency matrix. Each nonzero entry ``sp_mat[i, j]`` represents an edge from
+        node ``i`` to ``j``. The matrix must have square shape ``(N, N)``, where ``N`` is the
+        number of nodes in the graph.
    eweight_name : str, optional
-        If given, the edge weights in the matrix will be
-        stored in ``edata[eweight_name]``.
-    formats : str or list of str
-        It can be ``'coo'``/``'csr'``/``'csc'`` or a sublist of them,
-        Force the storage formats.  Default: ``['coo', 'csr', 'csc']``.
-    idtype : int32, int64, optional
-        Integer ID type. Must be int32 or int64. Default: int64.
+        The edata name for storing the nonzero values of :attr:`sp_mat`. If given, DGL will
+        store the nonzero values of :attr:`sp_mat` in ``edata[eweight_name]`` of the returned
+        graph.
+    idtype : int32 or int64, optional
+        The data type for storing the structure-related graph information such as node and
+        edge IDs. It should be a framework-specific data type object (e.g., ``torch.int32``).
+        By default, DGL uses int64.
+    device : device context, optional
+        The device of the resulting graph. It should be a framework-specific device object
+        (e.g., ``torch.device``). By default, DGL stores the graph on CPU.

    Returns
    -------
-    g : DGLGraph
+    DGLGraph
+        The created graph.
+
+    Notes
+    -----
+    1. The function supports all kinds of SciPy sparse matrix classes (e.g.,
+       :class:`scipy.sparse.csr.csr_matrix`). It converts the input matrix to the COOrdinate
+       format using :func:`scipy.sparse.spmatrix.tocoo` before creates a :class:`DGLGraph`.
+       Creating from a :class:`scipy.sparse.coo.coo_matrix` is hence the most efficient way.
+    2. DGL internally maintains multiple copies of the graph structure in different sparse
+       formats and chooses the most efficient one depending on the computation invoked.
+       If memory usage becomes an issue in the case of large graphs, use
+       :func:`dgl.DGLGraph.formats` to restrict the allowed formats.
+
+    Examples
+    --------
+
+    The following example uses PyTorch backend.
+
+    >>> import dgl
+    >>> import numpy as np
+    >>> import torch
+    >>> from scipy.sparse import coo_matrix
+
+    Create a small three-edge graph.
+
+    >>> # Source nodes for edges (2, 1), (3, 2), (4, 3)
+    >>> src_ids = np.array([2, 3, 4])
+    >>> # Destination nodes for edges (2, 1), (3, 2), (4, 3)
+    >>> dst_ids = np.array([1, 2, 3])
+    >>> # Weight for edges (2, 1), (3, 2), (4, 3)
+    >>> eweight = np.array([0.2, 0.3, 0.5])
+    >>> sp_mat = coo_matrix((eweight, (src_ids, dst_ids)), shape=(5, 5))
+    >>> g = dgl.from_scipy(sp_mat)
+
+    Retrieve the edge weights.
+
+    >>> g = dgl.from_scipy(sp_mat, eweight_name='w')
+    >>> g.edata['w']
+    tensor([0.2000, 0.3000, 0.5000], dtype=torch.float64)
+
+    Create a graph on the first GPU with data type int32.
+
+    >>> g = dgl.from_scipy(sp_mat, idtype=torch.int32, device='cuda:0')
+
+    See Also
+    --------
+    graph
+    from_networkx
    """
+    # Sanity check
+    num_rows = sp_mat.shape[0]
+    num_cols = sp_mat.shape[1]
+    if num_rows != num_cols:
+        raise DGLError('Expect the number of rows to be the same as the number of columns for '
+                       'sp_mat, got {:d} and {:d}.'.format(num_rows, num_cols))
+
    u, v, urange, vrange = utils.graphdata2tensors(sp_mat, idtype)
-    g = create_from_edges(u, v, ntype, etype, ntype, urange, vrange,
-                          validate=False, formats=formats)
+    g = create_from_edges(u, v, '_N', '_E', '_N', urange, vrange)
+    if eweight_name is not None:
+        g.edata[eweight_name] = F.tensor(sp_mat.data)
+    return g.to(device)
+
+def bipartite_from_scipy(sp_mat,
+                         utype, etype, vtype,
+                         eweight_name=None,
+                         idtype=None,
+                         device=None):
+    """Create a unidirectional bipartite graph from a SciPy sparse matrix.
+
+    The created graph will have two types of nodes ``utype`` and ``vtype`` as well as one
+    edge type ``etype`` whose edges are from ``utype`` to ``vtype``.
+
+    Parameters
+    ----------
+    sp_mat : scipy.sparse.spmatrix
+        The graph adjacency matrix. Each nonzero entry ``sp_mat[i, j]``
+        represents an edge from node ``i`` of type :attr:`utype` to ``j`` of type :attr:`vtype`.
+        Let the matrix shape be ``(N, M)``. There will be ``N`` nodes of type :attr:`utype`
+        and ``M`` nodes of type ``vtype`` in the resulting graph.
+    utype : str, optional
+        The name of the source node type.
+    etype : str, optional
+        The name of the edge type.
+    vtype : str, optional
+        The name of the destination node type.
+    eweight_name : str, optional
+        The edata name for storing the nonzero values of :attr:`sp_mat`.
+        If given, DGL will store the nonzero values of :attr:`sp_mat` in ``edata[eweight_name]``
+        of the returned graph.
+    idtype : int32 or int64, optional
+        The data type for storing the structure-related graph information such as node and
+        edge IDs. It should be a framework-specific data type object (e.g., ``torch.int32``).
+        By default, DGL uses int64.
+    device : device context, optional
+        The device of the resulting graph. It should be a framework-specific device object
+        (e.g., ``torch.device``). By default, DGL stores the graph on CPU.
+
+    Returns
+    -------
+    DGLGraph
+        The created graph.
+
+    Notes
+    -----
+    1. The function supports all kinds of SciPy sparse matrix classes (e.g.,
+       :class:`scipy.sparse.csr.csr_matrix`). It converts the input matrix to the COOrdinate
+       format using :func:`scipy.sparse.spmatrix.tocoo` before creates a :class:`DGLGraph`.
+       Creating from a :class:`scipy.sparse.coo.coo_matrix` is hence the most efficient way.
+    2. DGL internally maintains multiple copies of the graph structure in different sparse
+       formats and chooses the most efficient one depending on the computation invoked.
+       If memory usage becomes an issue in the case of large graphs, use
+       :func:`dgl.DGLGraph.formats` to restrict the allowed formats.
+
+    Examples
+    --------
+
+    The following example uses PyTorch backend.
+
+    >>> import dgl
+    >>> import numpy as np
+    >>> import torch
+    >>> from scipy.sparse import coo_matrix
+
+    Create a small three-edge graph.
+
+    >>> # Source nodes for edges (2, 1), (3, 2), (4, 3)
+    >>> src_ids = np.array([2, 3, 4])
+    >>> # Destination nodes for edges (2, 1), (3, 2), (4, 3)
+    >>> dst_ids = np.array([1, 2, 3])
+    >>> # Weight for edges (2, 1), (3, 2), (4, 3)
+    >>> eweight = np.array([0.2, 0.3, 0.5])
+    >>> sp_mat = coo_matrix((eweight, (src_ids, dst_ids)))
+    >>> g = dgl.bipartite_from_scipy(sp_mat, utype='_U', etype='_E', vtype='_V')
+
+    Retrieve the edge weights.
+
+    >>> g = dgl.bipartite_from_scipy(sp_mat, utype='_U', etype='_E', vtype='_V', eweight_name='w')
+    >>> g.edata['w']
+    tensor([0.2000, 0.3000, 0.5000], dtype=torch.float64)
+
+    Create a graph on the first GPU with data type int32.
+
+    >>> g = dgl.bipartite_from_scipy(sp_mat, utype='_U', etype='_E', vtype='_V',
+    ...                              idtype=torch.int32, device='cuda:0')
+
+    See Also
+    --------
+    heterograph
+    bipartite_from_networkx
+    """
+    # Sanity check
+    u, v, urange, vrange = utils.graphdata2tensors(sp_mat, idtype, bipartite=True)
+    g = create_from_edges(u, v, utype, etype, vtype, urange, vrange)
    if eweight_name is not None:
        g.edata[eweight_name] = F.tensor(sp_mat.data)
-    return g
+    return g.to(device)

-def from_networkx(nx_graph, *,
-                  ntype='_N', etype='_E',
+def from_networkx(nx_graph,
                  node_attrs=None,
                  edge_attrs=None,
-                  edge_id_attr_name='id',
-                  formats=['coo', 'csr', 'csc'],
-                  idtype=None):
-    """Create a DGLGraph from networkx.
+                  edge_id_attr_name=None,
+                  idtype=None,
+                  device=None):
+    """Create a graph from a NetworkX graph.
+
+    Creating a DGLGraph from a NetworkX graph is not fast especially for large scales.
+    It is recommended to first convert a NetworkX graph into a tuple of node-tensors
+    and then construct a DGLGraph with :func:`dgl.graph`.

    Parameters
    ----------
    nx_graph : networkx.Graph
-        NetworkX graph.
-    ntype : str
-        Type name for both source and destination nodes
-    etype : str
-        Type name for edges
-    node_attrs : list of str
-        Names for node features to retrieve from the NetworkX graph (Default: None)
-    edge_attrs : list of str
-        Names for edge features to retrieve from the NetworkX graph (Default: None)
+        The NetworkX graph holding the graph structure and the node/edge attributes.
+        DGL will relabel the nodes using consecutive integers starting from zero if it is
+        not the case. If the input graph is undirected, DGL converts it to a directed graph
+        by :func:`networkx.Graph.to_directed`.
+    node_attrs : list[str], optional
+        The names of the node attributes to retrieve from the NetworkX graph. If given, DGL
+        stores the retrieved node attributes in ``ndata`` of the returned graph using their
+        original names. The attribute data must be convertible to Tensor type (e.g., scalar,
+        numpy.ndarray, list, etc.).
+    edge_attrs : list[str], optional
+        The names of the edge attributes to retrieve from the NetworkX graph. If given, DGL
+        stores the retrieved edge attributes in ``edata`` of the returned graph using their
+        original names. The attribute data must be convertible to Tensor type (e.g., scalar,
+        numpy.ndarray, list, etc.). It must be None if :attr:`nx_graph` is undirected.
    edge_id_attr_name : str, optional
-        Key name for edge ids in the NetworkX graph. If not found, we
-        will consider the graph not to have pre-specified edge ids. (Default: 'id')
-    formats : str or list of str
-        It can be ``'coo'``/``'csr'``/``'csc'`` or a sublist of them,
-        Force the storage formats.  Default: ``['coo', 'csr', 'csc']``.
-    idtype : int32, int64, optional
-        Integer ID type. Must be int32 or int64. Default: int64.
+        The name of the edge attribute that stores the edge IDs. If given, DGL will assign edge
+        IDs accordingly when creating the graph, so the attribute must be valid IDs, i.e.
+        consecutive integers starting from zero. By default, the edge IDs of the returned graph
+        can be arbitrary. It must be None if :attr:`nx_graph` is undirected.
+    idtype : int32 or int64, optional
+        The data type for storing the structure-related graph information such as node and
+        edge IDs. It should be a framework-specific data type object (e.g., ``torch.int32``).
+        By default, DGL uses int64.
+    device : device context, optional
+        The device of the resulting graph. It should be a framework-specific device object
+        (e.g., ``torch.device``). By default, DGL stores the graph on CPU.

    Returns
    -------
-    g : DGLGraph
+    DGLGraph
+        The created graph.
+
+    Notes
+    -----
+    DGL internally maintains multiple copies of the graph structure in different sparse
+    formats and chooses the most efficient one depending on the computation invoked.
+    If memory usage becomes an issue in the case of large graphs, use
+    :func:`dgl.DGLGraph.formats` to restrict the allowed formats.
+
+    Examples
+    --------
+
+    The following example uses PyTorch backend.
+
+    >>> import dgl
+    >>> import networkx as nx
+    >>> import numpy as np
+    >>> import torch
+
+    Create a 2-edge NetworkX graph.
+
+    >>> nx_g = nx.DiGraph()
+    >>> # Add 3 nodes and two features for them
+    >>> nx_g.add_nodes_from([0, 1, 2], feat1=np.zeros((3, 1)), feat2=np.ones((3, 1)))
+    >>> # Add 2 edges (1, 2) and (2, 1) with two features, one being edge IDs
+    >>> nx_g.add_edge(1, 2, weight=np.ones((1, 1)), eid=np.array([1]))
+    >>> nx_g.add_edge(2, 1, weight=np.ones((1, 1)), eid=np.array([0]))
+
+    Convert it into a DGLGraph with structure only.
+
+    >>> g = dgl.from_networkx(nx_g)
+
+    Retrieve the node/edge features of the graph.
+
+    >>> g = dgl.from_networkx(nx_g, node_attrs=['feat1', 'feat2'], edge_attrs=['weight'])
+
+    Use a pre-specified ordering of the edges.
+
+    >>> g.edges()
+    (tensor([1, 2]), tensor([2, 1]))
+    >>> g = dgl.from_networkx(nx_g, edge_id_attr_name='eid')
+    (tensor([2, 1]), tensor([1, 2]))
+
+    Create a graph on the first GPU with data type int32.
+
+    >>> g = dgl.from_networkx(nx_g, idtype=torch.int32, device='cuda:0')
+
+    See Also
+    --------
+    graph
+    from_scipy
    """
-    # Relabel nodes using consecutive integers
+    # Sanity check
+    if edge_id_attr_name is not None and \
+            edge_id_attr_name not in next(iter(nx_graph.edges(data=True)))[-1]:
+        raise DGLError('Failed to find the pre-specified edge IDs in the edge features of '
+                       'the NetworkX graph with name {}'.format(edge_id_attr_name))
+
+    if not nx_graph.is_directed() and not (edge_id_attr_name is None and edge_attrs is None):
+        raise DGLError('Expect edge_id_attr_name and edge_attrs to be None when nx_graph is '
+                       'undirected, got {} and {}'.format(edge_id_attr_name, edge_attrs))
+
+    # Relabel nodes using consecutive integers starting from 0
    nx_graph = nx.convert_node_labels_to_integers(nx_graph, ordering='sorted')
    if not nx_graph.is_directed():
        nx_graph = nx_graph.to_directed()

-    g = graph(nx_graph, ntype, etype,
-              formats=formats,
-              idtype=idtype)
+    u, v, urange, vrange = utils.graphdata2tensors(
+        nx_graph, idtype, edge_id_attr_name=edge_id_attr_name)
+
+    g = create_from_edges(u, v, '_N', '_E', '_N', urange, vrange)

    # nx_graph.edges(data=True) returns src, dst, attr_dict
-    if nx_graph.number_of_edges() > 0:
-        has_edge_id = edge_id_attr_name in next(iter(nx_graph.edges(data=True)))[-1]
-    else:
-        has_edge_id = False
+    has_edge_id = nx_graph.number_of_edges() > 0 and edge_id_attr_name is not None

    # handle features
    # copy attributes
@@ -912,31 +1013,252 @@ def from_networkx(nx_graph, *,
                    raise DGLError('Not all edges have attribute {}.'.format(attr))
            g.edata[attr] = F.copy_to(_batcher(attr_dict[attr]), g.device)

-    return g
+    return g.to(device)
+
+def bipartite_from_networkx(nx_graph,
+                            utype, etype, vtype,
+                            u_attrs=None, e_attrs=None, v_attrs=None,
+                            edge_id_attr_name=None,
+                            idtype=None,
+                            device=None):
+    """Create a unidirectional bipartite graph from a NetworkX graph.
+
+    The created graph will have two types of nodes ``utype`` and ``vtype`` as well as one
+    edge type ``etype`` whose edges are from ``utype`` to ``vtype``.
+
+    Creating a DGLGraph from a NetworkX graph is not fast especially for large scales.
+    It is recommended to first convert a NetworkX graph into a tuple of node-tensors
+    and then construct a DGLGraph with :func:`dgl.heterograph`.
+
+    Parameters
+    ----------
+    nx_graph : networkx.DiGraph
+        The NetworkX graph holding the graph structure and the node/edge attributes.
+        DGL will relabel the nodes using consecutive integers starting from zero if it is
+        not the case. The graph must follow `NetworkX's bipartite graph convention
+        <https://networkx.github.io/documentation/stable/reference/algorithms/bipartite.html>`_,
+        and furthermore the edges must be from nodes with attribute ``bipartite=0`` to nodes
+        with attribute ``bipartite=1``.
+    utype : str, optional
+        The name of the source node type.
+    etype : str, optional
+        The name of the edge type.
+    vtype : str, optional
+        The name of the destination node type.
+    u_attrs : list[str], optional
+        The names of the node attributes for node type :attr:`utype` to retrieve from the
+        NetworkX graph. If given, DGL stores the retrieved node attributes in
+        ``nodes[utype].data`` of the returned graph using their original names. The attribute
+        data must be convertible to Tensor type (e.g., scalar, numpy.array, list, etc.).
+    e_attrs : list[str], optional
+        The names of the edge attributes to retrieve from the NetworkX graph. If given, DGL
+        stores the retrieved edge attributes in ``edata`` of the returned graph using their
+        original names. The attribute data must be convertible to Tensor type (e.g., scalar,
+        numpy.ndarray, list, etc.).
+    v_attrs : list[str], optional
+        The names of the node attributes for node type :attr:`vtype` to retrieve from the
+        NetworkX graph.  If given, DGL stores the retrieved node attributes in
+        ``nodes[vtype].data`` of the returned graph using their original names. The attribute
+        data must be convertible to Tensor type (e.g., scalar, numpy.array, list, etc.).
+    edge_id_attr_name : str, optional
+        The name of the edge attribute that stores the edge IDs. If given, DGL will assign edge
+        IDs accordingly when creating the graph, so the attribute must be valid IDs, i.e.
+        consecutive integers starting from zero. By default, the edge IDs of the returned graph
+        can be arbitrary.
+    idtype : int32 or int64, optional
+        The data type for storing the structure-related graph information such as node and
+        edge IDs. It should be a framework-specific data type object (e.g., torch.int32).
+        By default, DGL uses int64.
+    device : device context, optional
+        The device of the resulting graph. It should be a framework-specific device object
+        (e.g., torch.device). By default, DGL stores the graph on CPU.
+
+    Returns
+    -------
+    DGLGraph
+        The created graph.
+
+    Examples
+    --------
+
+    The following example uses PyTorch backend.
+
+    >>> import dgl
+    >>> import networkx as nx
+    >>> import numpy as np
+    >>> import torch
+
+    Create a 2-edge unidirectional bipartite graph.
+
+    >>> nx_g = nx.DiGraph()
+    >>> # Add nodes for the source type
+    >>> nx_g.add_nodes_from([1, 3], bipartite=0, feat1=np.zeros((2, 1)), feat2=np.ones((2, 1)))
+    >>> # Add nodes for the destination type
+    >>> nx_g.add_nodes_from([2, 4, 5], bipartite=1, feat3=np.zeros((3, 1)))
+    >>> nx_g.add_edge(1, 4, weight=np.ones((1, 1)), eid=np.array([1]))
+    >>> nx_g.add_edge(3, 5, weight=np.ones((1, 1)), eid=np.array([0]))
+
+    Convert it into a DGLGraph with structure only.
+
+    >>> g = dgl.bipartite_from_networkx(nx_g, utype='_U', etype='_E', vtype='_V')
+
+    Retrieve the node/edge features of the graph.
+
+    >>> g = dgl.bipartite_from_networkx(nx_g, utype='_U', etype='_E', vtype='_V',
+    ...                                 u_attrs=['feat1', 'feat2'],
+    ...                                 e_attrs=['weight'],
+    ...                                 v_attrs=['feat3'])
+
+    Use a pre-specified ordering of the edges.
+
+    >>> g.edges()
+    (tensor([0, 1]), tensor([1, 2]))
+    >>> g = dgl.bipartite_from_networkx(nx_g,
+    ...                                 utype='_U', etype='_E', vtype='_V',
+    ...                                 edge_id_attr_name='eid')
+    (tensor([1, 0]), tensor([2, 1]))
+
+    Create a graph on the first GPU with data type int32.
+
+    >>> g = dgl.bipartite_from_networkx(nx_g, utype='_U', etype='_E', vtype='_V',
+    ...                                 idtype=torch.int32, device='cuda:0')
+
+    See Also
+    --------
+    heterograph
+    bipartite_from_scipy
+    """
+    if not nx_graph.is_directed():
+        raise DGLError('Expect nx_graph to be a directed NetworkX graph.')
+    if edge_id_attr_name is not None and \
+            not edge_id_attr_name in next(iter(nx_graph.edges(data=True)))[-1]:
+        raise DGLError('Failed to find the pre-specified edge IDs in the edge features '
+                       'of the NetworkX graph with name {}'.format(edge_id_attr_name))
+
+    # Get the source and destination node sets
+    top_nodes = set()
+    bottom_nodes = set()
+    for n, ndata in nx_graph.nodes(data=True):
+        if 'bipartite' not in ndata:
+            raise DGLError('Expect the node {} to have attribute bipartite'.format(n))
+        if ndata['bipartite'] == 0:
+            top_nodes.add(n)
+        elif ndata['bipartite'] == 1:
+            bottom_nodes.add(n)
+        else:
+            raise ValueError('Expect the bipartite attribute of the node {} to be 0 or 1, '
+                             'got {}'.format(n, ndata['bipartite']))
+
+    # Separately relabel the source and destination nodes.
+    top_nodes = sorted(top_nodes)
+    bottom_nodes = sorted(bottom_nodes)
+    top_map = {n : i for i, n in enumerate(top_nodes)}
+    bottom_map = {n : i for i, n in enumerate(bottom_nodes)}
+
+    # Get the node tensors and the number of nodes
+    u, v, urange, vrange = utils.graphdata2tensors(
+        nx_graph, idtype, bipartite=True,
+        edge_id_attr_name=edge_id_attr_name,
+        top_map=top_map, bottom_map=bottom_map)
+
+    g = create_from_edges(u, v, utype, etype, vtype, urange, vrange)
+
+    # nx_graph.edges(data=True) returns src, dst, attr_dict
+    has_edge_id = nx_graph.number_of_edges() > 0 and edge_id_attr_name is not None
+
+    # handle features
+    # copy attributes
+    def _batcher(lst):
+        if F.is_tensor(lst[0]):
+            return F.cat([F.unsqueeze(x, 0) for x in lst], dim=0)
+        else:
+            return F.tensor(lst)
+
+    if u_attrs is not None:
+        # mapping from feature name to a list of tensors to be concatenated
+        src_attr_dict = defaultdict(list)
+        for nid in top_map.keys():
+            for attr in u_attrs:
+                src_attr_dict[attr].append(nx_graph.nodes[nid][attr])
+        for attr in u_attrs:
+            g.srcdata[attr] = F.copy_to(_batcher(src_attr_dict[attr]), g.device)
+
+    if v_attrs is not None:
+        # mapping from feature name to a list of tensors to be concatenated
+        dst_attr_dict = defaultdict(list)
+        for nid in bottom_map.keys():
+            for attr in v_attrs:
+                dst_attr_dict[attr].append(nx_graph.nodes[nid][attr])
+        for attr in v_attrs:
+            g.dstdata[attr] = F.copy_to(_batcher(dst_attr_dict[attr]), g.device)
+
+    if e_attrs is not None:
+        # mapping from feature name to a list of tensors to be concatenated
+        attr_dict = defaultdict(lambda: [None] * g.number_of_edges())
+        # each defaultdict value is initialized to be a list of None
+        # None here serves as placeholder to be replaced by feature with
+        # corresponding edge id
+        if has_edge_id:
+            for _, _, attrs in nx_graph.edges(data=True):
+                for key in e_attrs:
+                    attr_dict[key][attrs[edge_id_attr_name]] = attrs[key]
+        else:
+            # XXX: assuming networkx iteration order is deterministic
+            #      so the order is the same as graph_index.from_networkx
+            for eid, (_, _, attrs) in enumerate(nx_graph.edges(data=True)):
+                for key in e_attrs:
+                    attr_dict[key][eid] = attrs[key]
+        for attr in e_attrs:
+            for val in attr_dict[attr]:
+                if val is None:
+                    raise DGLError('Not all edges have attribute {}.'.format(attr))
+            g.edata[attr] = F.copy_to(_batcher(attr_dict[attr]), g.device)
+
+    return g.to(device)

 def to_networkx(g, node_attrs=None, edge_attrs=None):
-    """Convert to networkx graph.
+    """Convert a homogeneous graph to a NetworkX graph.

-    The edge id will be saved as the 'id' edge attribute.
+    It will save the edge IDs as the ``'id'`` edge attribute in the returned NetworkX graph.

    Parameters
    ----------
-    g : DGLGraph or DGLHeteroGraph
-        For DGLHeteroGraphs, we currently only support the
-        case of one node type and one edge type.
+    g : DGLGraph
+        A homogeneous graph on CPU.
    node_attrs : iterable of str, optional
-        The node attributes to be copied. (Default: None)
+        The node attributes to copy from ``g.ndata``. (Default: None)
    edge_attrs : iterable of str, optional
-        The edge attributes to be copied. (Default: None)
+        The edge attributes to copy from ``g.edata``. (Default: None)

    Returns
    -------
    networkx.DiGraph
-        The nx graph
+        The converted NetworkX graph.
+
+    Examples
+    --------
+    The following example uses PyTorch backend.
+
+    >>> import dgl
+    >>> import torch
+
+    >>> g = dgl.graph((torch.tensor([1, 2]), torch.tensor([1, 3])))
+    >>> g.ndata['h'] = torch.zeros(4, 1)
+    >>> g.edata['h1'] = torch.ones(2, 1)
+    >>> g.edata['h2'] = torch.zeros(2, 2)
+    >>> nx_g = dgl.to_networkx(g, node_attrs=['h'], edge_attrs=['h1', 'h2'])
+    >>> nx_g.nodes(data=True)
+    NodeDataView({0: {'h': tensor([0.])},
+                  1: {'h': tensor([0.])},
+                  2: {'h': tensor([0.])},
+                  3: {'h': tensor([0.])}})
+    >>> nx_g.edges(data=True)
+    OutMultiEdgeDataView([(1, 1, {'id': 0, 'h1': tensor([1.]), 'h2': tensor([0., 0.])}),
+                          (2, 3, {'id': 1, 'h1': tensor([1.]), 'h2': tensor([0., 0.])})])
    """
    if g.device != F.cpu():
        raise DGLError('Cannot convert a CUDA graph to networkx. Call g.cpu() first.')
-    if not g.is_homogeneous():
+    if not g.is_homogeneous:
        raise DGLError('dgl.to_networkx only supports homogeneous graphs.')
    src, dst = g.edges()
    src = F.asnumpy(src)
@@ -967,8 +1289,7 @@ DGLHeteroGraph.to_networkx = to_networkx
 def create_from_edges(u, v,
                      utype, etype, vtype,
                      urange, vrange,
-                      validate=True,
-                      formats=['coo', 'csr', 'csc']):
+                      validate=True):
    """Internal function to create a graph from incident nodes with types.

    utype could be equal to vtype
@@ -993,9 +1314,6 @@ def create_from_edges(u, v,
        maximum of the destination node IDs in the edge list plus 1. (Default: None)
    validate : bool, optional
        If True, checks if node IDs are within range.
-    formats : str or list of str
-        It can be ``'coo'``/``'csr'``/``'csc'`` or a sublist of them,
-        Force the storage formats.  Default: ``['coo', 'csr', 'csc']``.

    Returns
    -------
@@ -1016,12 +1334,8 @@ def create_from_edges(u, v,
    else:
        num_ntypes = 2

-    if 'coo' in formats:
-        hgidx = heterograph_index.create_unitgraph_from_coo(
-            num_ntypes, urange, vrange, u, v, formats)
-    else:
-        hgidx = heterograph_index.create_unitgraph_from_coo(
-            num_ntypes, urange, vrange, u, v, ['coo']).formats(formats)
+    hgidx = heterograph_index.create_unitgraph_from_coo(
+        num_ntypes, urange, vrange, u, v, ['coo', 'csr', 'csc'])
    if utype == vtype:
        return DGLHeteroGraph(hgidx, [utype], [etype])
    else:

--- a/python/dgl/data/citation_graph.py
+++ b/python/dgl/data/citation_graph.py
@@ -19,7 +19,7 @@ from .. import convert
 from .. import batch
 from .. import backend as F
 from ..convert import graph as dgl_graph
-from ..convert import to_networkx
+from ..convert import from_networkx, to_networkx

 backend = os.environ.get('DGLBACKEND', 'pytorch')

@@ -119,7 +119,7 @@ class CitationGraphDataset(DGLBuiltinDataset):
        test_mask = _sample_mask(idx_test, labels.shape[0])

        self._graph = graph
-        g = dgl_graph(graph)
+        g = from_networkx(graph)

        g.ndata['train_mask'] = generate_mask_tensor(train_mask)
        g.ndata['val_mask'] = generate_mask_tensor(val_mask)
@@ -794,13 +794,13 @@ class CoraBinary(DGLBuiltinDataset):
            for line in f.readlines():
                if line.startswith('graph'):
                    if len(elist) != 0:
-                        self.graphs.append(dgl_graph(elist))
+                        self.graphs.append(dgl_graph(tuple(zip(*elist))))
                    elist = []
                else:
                    u, v = line.strip().split(' ')
                    elist.append((int(u), int(v)))
            if len(elist) != 0:
-                self.graphs.append(dgl_graph(elist))
+                self.graphs.append(dgl_graph(tuple(zip(*elist))))
        with open("{}/pmpds.pkl".format(root), 'rb') as f:
            self.pmpds = _pickle_load(f)
        self.labels = []

--- a/python/dgl/data/gindt.py
+++ b/python/dgl/data/gindt.py
@@ -157,7 +157,7 @@ class GINDataset(DGLBuiltinDataset):

                self.labels.append(self.glabel_dict[glabel])

-                g = dgl_graph([])
+                g = dgl_graph(([], []))
                g.add_nodes(n_nodes)

                nlabels = []  # node labels

--- a/python/dgl/data/graph_serialize.py
+++ b/python/dgl/data/graph_serialize.py
@@ -86,8 +86,8 @@ def save_graphs(filename, g_list, labels=None):
    Create :code:`DGLGraph`/:code:`DGLHeteroGraph` objects and initialize node
    and edge features.

-    >>> g1 = dgl.graph(([0, 1, 2], [1, 2, 3])
-    >>> g2 = dgl.graph(([0, 2], [2, 3])
+    >>> g1 = dgl.graph(([0, 1, 2], [1, 2, 3]))
+    >>> g2 = dgl.graph(([0, 2], [2, 3]))
    >>> g2.edata["e"] = th.ones(2, 4)

    Save Graphs into file

--- a/python/dgl/data/karate.py
+++ b/python/dgl/data/karate.py
@@ -6,7 +6,7 @@ import networkx as nx
 from .. import backend as F
 from .dgl_dataset import DGLDataset
 from .utils import deprecate_property
-from ..convert import graph as dgl_graph
+from ..convert import from_networkx

 __all__ = ['KarateClubDataset', 'KarateClub']

@@ -56,7 +56,7 @@ class KarateClubDataset(DGLDataset):
        label = np.asarray(
            [kc_graph.nodes[i]['club'] != 'Mr. Hi' for i in kc_graph.nodes]).astype(np.int64)
        label = F.tensor(label)
-        g = dgl_graph(kc_graph)
+        g = from_networkx(kc_graph)
        g.ndata['label'] = label
        self._graph = g
        self._data = [g]

--- a/python/dgl/data/minigc.py
+++ b/python/dgl/data/minigc.py
@@ -6,7 +6,7 @@ import numpy as np
 from .dgl_dataset import DGLDataset
 from .utils import save_graphs, load_graphs, makedirs
 from .. import backend as F
-from ..convert import graph as dgl_graph
+from ..convert import from_networkx
 from ..transform import add_self_loop

 __all__ = ['MiniGCDataset']
@@ -147,7 +147,7 @@ class MiniGCDataset(DGLDataset):
        # preprocess
        for i in range(self.num_graphs):
            # convert to DGLGraph, and add self loops
-            self.graphs[i] = add_self_loop(dgl_graph(self.graphs[i]))
+            self.graphs[i] = add_self_loop(from_networkx(self.graphs[i]))
        self.labels = F.tensor(np.array(self.labels).astype(np.int))

    def _gen_cycle(self, n):

--- a/python/dgl/data/rdf.py
+++ b/python/dgl/data/rdf.py
@@ -300,10 +300,10 @@ class RDFGraphDataset(DGLBuiltinDataset):
        # convert to heterograph
        if self.verbose:
            print('Convert to heterograph ...')
-        hg = dgl.to_hetero(g,
-                           ntypes,
-                           etypes,
-                           metagraph=mg)
+        hg = dgl.to_heterogeneous(g,
+                                  ntypes,
+                                  etypes,
+                                  metagraph=mg)
        if self.verbose:
            print('#Node types:', len(hg.ntypes))
            print('#Canonical edge types:', len(hg.etypes))

--- a/python/dgl/data/reddit.py
+++ b/python/dgl/data/reddit.py
@@ -8,7 +8,7 @@ import os
 from .dgl_dataset import DGLBuiltinDataset
 from .utils import _get_dgl_url, generate_mask_tensor, load_graphs, save_graphs, deprecate_property
 from .. import backend as F
-from ..convert import graph as dgl_graph
+from ..convert import from_scipy


 class RedditDataset(DGLBuiltinDataset):
@@ -140,7 +140,7 @@ class RedditDataset(DGLBuiltinDataset):
        # graph
        coo_adj = sp.load_npz(os.path.join(
            self.raw_path, "reddit{}_graph.npz".format(self._self_loop_str)))
-        self._graph = dgl_graph(coo_adj)
+        self._graph = from_scipy(coo_adj)
        # features and labels
        reddit_data = np.load(os.path.join(self.raw_path, "reddit_data.npz"))
        features = reddit_data["feature"]

--- a/python/dgl/data/sbm.py
+++ b/python/dgl/data/sbm.py
@@ -8,7 +8,7 @@ import numpy.random as npr
 import scipy as sp

 from .dgl_dataset import DGLDataset
-from ..convert import graph as dgl_graph
+from ..convert import from_scipy
 from .. import batch
 from .utils import save_info, save_graphs, load_info, load_graphs

@@ -124,7 +124,7 @@ class SBMMixtureDataset(DGLDataset):
            pq = [generator() for _ in range(self._n_graphs)]
        else:
            raise RuntimeError()
-        self._graphs = [dgl_graph(sbm(self._n_communities, self._block_size, *x)) for x in pq]
+        self._graphs = [from_scipy(sbm(self._n_communities, self._block_size, *x)) for x in pq]
        self._line_graphs = [g.line_graph(backtracking=False) for g in self._graphs]
        in_degrees = lambda g: g.in_degrees().float()
        self._graph_degrees = [in_degrees(g) for g in self._graphs]

--- a/python/dgl/data/tu.py
+++ b/python/dgl/data/tu.py
@@ -100,7 +100,7 @@ class LegacyTUDataset(DGLBuiltinDataset):
        DS_graph_labels = self._idx_from_zero(
            np.genfromtxt(self._file_path("graph_labels"), dtype=int))

-        g = dgl_graph([])
+        g = dgl_graph(([], []))
        g.add_nodes(int(DS_edge_list.max()) + 1)
        g.add_edges(DS_edge_list[:, 0], DS_edge_list[:, 1])

@@ -296,7 +296,7 @@ class TUDataset(DGLBuiltinDataset):
        DS_graph_labels = self._idx_from_zero(
            loadtxt(self._file_path("graph_labels"), delimiter=",").astype(int))

-        g = dgl_graph([])
+        g = dgl_graph(([], []))
        g.add_nodes(int(DS_edge_list.max()) + 1)
        g.add_edges(DS_edge_list[:, 0], DS_edge_list[:, 1])


--- a/python/dgl/dataloading/dataloader.py
+++ b/python/dgl/dataloading/dataloader.py
@@ -596,8 +596,11 @@ class EdgeCollator(Collator):
                'graph has multiple or no edge types; '\
                'please return a dict in negative sampler.'
            neg_srcdst = {self.g.canonical_etypes[0]: neg_srcdst}
+        # Get dtype from a tuple of tensors
+        dtype = F.dtype(list(neg_srcdst.values())[0][0])
        neg_edges = {
-            etype: neg_srcdst.get(etype, []) for etype in self.g.canonical_etypes}
+            etype: neg_srcdst.get(etype, (F.tensor([], dtype), F.tensor([], dtype)))
+            for etype in self.g.canonical_etypes}
        neg_pair_graph = heterograph(
            neg_edges, {ntype: self.g.number_of_nodes(ntype) for ntype in self.g.ntypes})


--- a/python/dgl/generators.py
+++ b/python/dgl/generators.py
@@ -38,12 +38,12 @@ def rand_graph(num_nodes, num_edges, idtype=F.int64, device=F.cpu(),
    rows = F.copy_to(F.astype(eids / num_nodes, idtype), device)
    cols = F.copy_to(F.astype(eids % num_nodes, idtype), device)
    g = convert.graph((rows, cols),
-                      num_nodes=num_nodes, validate=False,
-                      formats=formats,
+                      num_nodes=num_nodes,
                      idtype=idtype, device=device)
-    return g
+    return g.formats(formats)

-def rand_bipartite(num_src_nodes, num_dst_nodes, num_edges,
+def rand_bipartite(utype, etype, vtype,
+                   num_src_nodes, num_dst_nodes, num_edges,
                   idtype=F.int64, device=F.cpu(),
                   formats=['csr', 'coo', 'csc']):
    """Generate a random bipartite graph of the given number of src/dst nodes and
@@ -53,6 +53,12 @@ def rand_bipartite(num_src_nodes, num_dst_nodes, num_edges,

    Parameters
    ----------
+    utype : str, optional
+        The name of the source node type.
+    etype : str, optional
+        The name of the edge type.
+    vtype : str, optional
+        The name of the destination node type.
    num_src_nodes : int
        The number of source nodes, the :math:`|U|` in :math:`G=(U,V,E)`.
    num_dst_nodes : int
@@ -75,8 +81,7 @@ def rand_bipartite(num_src_nodes, num_dst_nodes, num_edges,
    eids = random.choice(num_src_nodes * num_dst_nodes, num_edges, replace=False)
    rows = F.copy_to(F.astype(eids / num_dst_nodes, idtype), device)
    cols = F.copy_to(F.astype(eids % num_dst_nodes, idtype), device)
-    g = convert.bipartite((rows, cols),
-                          num_nodes=(num_src_nodes, num_dst_nodes), validate=False,
-                          idtype=idtype, device=device,
-                          formats=formats)
-    return g
+    g = convert.heterograph({(utype, etype, vtype): (rows, cols)},
+                            {utype: num_src_nodes, vtype: num_dst_nodes},
+                            idtype=idtype, device=device)
+    return g.formats(formats)
--- a/python/dgl/heterograph.py
+++ b/python/dgl/heterograph.py
 """Classes for heterogeneous graphs."""
 #pylint: disable= too-many-lines
-from collections import defaultdict
+from collections import defaultdict, Iterable
 from collections.abc import Mapping
 from contextlib import contextmanager
 import copy
@@ -75,38 +75,21 @@ class DGLHeteroGraph(object):

    One can construct the graph as follows:

-    >>> follows_g = dgl.graph(([0, 1], [1, 2]), 'user', 'follows')
-    >>> plays_g = dgl.bipartite(([0, 1, 1, 2], [0, 0, 1, 1]), 'user', 'plays', 'game')
-    >>> devs_g = dgl.bipartite(([0, 1], [0, 1]), 'developer', 'develops', 'game')
-    >>> g = dgl.hetero_from_relations([follows_g, plays_g, devs_g])
-
-    Or equivalently
-
    >>> g = dgl.heterograph({
    ...     ('user', 'follows', 'user'): ([0, 1], [1, 2]),
    ...     ('user', 'plays', 'game'): ([0, 1, 1, 2], [0, 0, 1, 1]),
    ...     ('developer', 'develops', 'game'): ([0, 1], [0, 1]),
    ...     })

-    :func:`dgl.graph` and :func:`dgl.bipartite` can create a graph from a variety of
-    data types including:
-
-    * edge list
-    * edge tuples
-    * networkx graph
-    * scipy sparse matrix
-
-    Click the function names for more details.
-
    Then one can query the graph structure by specifying the ``ntype`` or ``etype`` arguments:

    >>> g.number_of_nodes('user')
    3
    >>> g.number_of_edges('plays')
    4
-    >>> g.out_degrees(etype='develops')  # out-degrees of source nodes of 'develops' relation
+    >>> g.out_degrees(etype='develops')  # out-degrees of source nodes of 'develops' edge type
    tensor([1, 1])
-    >>> g.in_edges(0, etype='develops')  # in-edges of destination node 0 of 'develops' relation
+    >>> g.in_edges(0, etype='develops')  # in-edges of destination node 0 of 'develops' edge type
    (tensor([0]), tensor([0]))

    Or on the sliced graph for an edge type:
@@ -125,9 +108,10 @@ class DGLHeteroGraph(object):
    For example, suppose a graph that has two types of relation "user-watches-movie"
    and "user-watches-TV" as follows:

-    >>> g0 = dgl.bipartite(([0, 1, 1], [1, 0, 1]), 'user', 'watches', 'movie')
-    >>> g1 = dgl.bipartite(([0, 1], [0, 1]), 'user', 'watches', 'TV')
-    >>> GG = dgl.hetero_from_relations([g0, g1]) # Merge the two graphs
+    >>> GG = dgl.heterograph({
+    ...     ('user', 'watches', 'movie'): ([0, 1, 1], [1, 0, 1]),
+    ...     ('user', 'watches', 'TV'): ([0, 1], [0, 1])
+    ... })

    To distinguish between the two "watches" edge type, one must specify a full triplet:

@@ -400,11 +384,11 @@ class DGLHeteroGraph(object):
        **Heterogeneous Graphs with Multiple Node Types**

        >>> g = dgl.heterograph({
-        >>>     ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]),
-        >>>                                 torch.tensor([0, 0, 1, 1])),
-        >>>     ('developer', 'develops', 'game'): (torch.tensor([0, 1]),
-        >>>                                         torch.tensor([0, 1]))
-        >>>     })
+        ...     ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]),
+        ...                                 torch.tensor([0, 0, 1, 1])),
+        ...     ('developer', 'develops', 'game'): (torch.tensor([0, 1]),
+        ...                                         torch.tensor([0, 1]))
+        ...     })
        >>> g.add_nodes(2)
        DGLError: Node type name must be specified
        if there are more than one node types.
@@ -547,7 +531,7 @@ class DGLHeteroGraph(object):
        We can also assign features for the new edges in adding new edges.

        >>> g.add_edges(torch.tensor([0, 0]), torch.tensor([2, 2]),
-        >>>             {'h': torch.tensor([[1.], [2.]]), 'w': torch.ones(2, 1)})
+        ...             {'h': torch.tensor([[1.], [2.]]), 'w': torch.ones(2, 1)})
        >>> g.edata['h']
        tensor([[1.], [1.], [1.], [1.], [0.], [1.], [2.]])

@@ -560,17 +544,17 @@ class DGLHeteroGraph(object):
        **Heterogeneous Graphs with Multiple Edge Types**

        >>> g = dgl.heterograph({
-        >>>     ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]),
-        >>>                                 torch.tensor([0, 0, 1, 1])),
-        >>>     ('developer', 'develops', 'game'): (torch.tensor([0, 1]),
-        >>>                                         torch.tensor([0, 1]))
-        >>>     })
+        ...     ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]),
+        ...                                 torch.tensor([0, 0, 1, 1])),
+        ...     ('developer', 'develops', 'game'): (torch.tensor([0, 1]),
+        ...                                         torch.tensor([0, 1]))
+        ...     })
        >>> g.add_edges(torch.tensor([3]), torch.tensor([3]))
        DGLError: Edge type name must be specified
        if there are more than one edge types.
        >>> g.number_of_edges('plays')
        4
-        >>>  g.add_edges(torch.tensor([3]), torch.tensor([3]), etype='plays')
+        >>> g.add_edges(torch.tensor([3]), torch.tensor([3]), etype='plays')
        >>> g.number_of_edges('plays')
        5

@@ -696,11 +680,11 @@ class DGLHeteroGraph(object):
        **Heterogeneous Graphs with Multiple Edge Types**

        >>> g = dgl.heterograph({
-        >>>     ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]),
-        >>>                                 torch.tensor([0, 0, 1, 1])),
-        >>>     ('developer', 'develops', 'game'): (torch.tensor([0, 1]),
-        >>>                                         torch.tensor([0, 1]))
-        >>>     })
+        ...     ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]),
+        ...                                 torch.tensor([0, 0, 1, 1])),
+        ...     ('developer', 'develops', 'game'): (torch.tensor([0, 1]),
+        ...                                         torch.tensor([0, 1]))
+        ...     })
        >>> g.remove_edges(torch.tensor([0, 1]))
        DGLError: Edge type name must be specified
        if there are more than one edge types.
@@ -784,11 +768,11 @@ class DGLHeteroGraph(object):
        **Heterogeneous Graphs with Multiple Node Types**

        >>> g = dgl.heterograph({
-        >>>     ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]),
-        >>>                                 torch.tensor([0, 0, 1, 1])),
-        >>>     ('developer', 'develops', 'game'): (torch.tensor([0, 1]),
-        >>>                                         torch.tensor([0, 1]))
-        >>>     })
+        ...     ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]),
+        ...                                 torch.tensor([0, 0, 1, 1])),
+        ...     ('developer', 'develops', 'game'): (torch.tensor([0, 1]),
+        ...                                         torch.tensor([0, 1]))
+        ...     })
        >>> g.remove_nodes(torch.tensor([0, 1]))
        DGLError: Node type name must be specified
        if there are more than one node types.
@@ -853,7 +837,7 @@ class DGLHeteroGraph(object):

        A uni-bipartite heterograph can further divide its node types into two sets:
        SRC and DST. All edges are from nodes in SRC to nodes in DST. The following APIs
-        can be used to get the nodes and types that belong to SRC and DST sets:
+        can be used to get the type, data, and nodes that belong to SRC and DST sets:

        * :func:`srctype` and :func:`dsttype`
        * :func:`srcdata` and :func:`dstdata`
@@ -867,67 +851,131 @@ class DGLHeteroGraph(object):

    @property
    def ntypes(self):
-        """Return the list of node types of this graph.
+        """Return the node types of the graph.

        Returns
        -------
        list of str
+            Each ``str`` is a node type.

        Examples
        --------
+        The following example uses PyTorch backend.
+
+        >>> import dgl
+        >>> import torch

-        >>> follows_g = dgl.graph(([0, 1], [1, 2]), 'user', 'follows')
-        >>> plays_g = dgl.bipartite(([0, 1, 1, 2], [0, 0, 1, 1]), 'user', 'plays', 'game')
-        >>> g = dgl.hetero_from_relations([follows_g, plays_g])
+        >>> g = dgl.heterograph({
+        ...     ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])),
+        ...     ('user', 'follows', 'game'): (torch.tensor([0, 1, 2]), torch.tensor([1, 2, 3])),
+        ...     ('user', 'plays', 'game'): (torch.tensor([1, 3]), torch.tensor([2, 3]))
+        ... })
        >>> g.ntypes
-        ['user', 'game']
+        ['game', 'user']
        """
        return self._ntypes

    @property
    def etypes(self):
-        """Return the list of edge types of this graph.
+        """Return the edge types of the graph.

        Returns
        -------
        list of str
+            Each ``str`` is an edge type.
+
+        Notes
+        -----
+        An edge type can appear in multiple canonical edge types. For example, ``'interacts'``
+        can appear in two canonical edge types ``('drug', 'interacts', 'drug')`` and
+        ``('protein', 'interacts', 'protein')``. It is recommended to use
+        :func:`~dgl.DGLGraph.canonical_etypes` in this case.

        Examples
        --------
+        The following example uses PyTorch backend.
+
+        >>> import dgl
+        >>> import torch

-        >>> follows_g = dgl.graph(([0, 1], [1, 2]), 'user', 'follows')
-        >>> plays_g = dgl.bipartite(([0, 1, 1, 2], [0, 0, 1, 1]), 'user', 'plays', 'game')
-        >>> g = dgl.hetero_from_relations([follows_g, plays_g])
+        >>> g = dgl.heterograph({
+        ...     ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])),
+        ...     ('user', 'follows', 'game'): (torch.tensor([0, 1, 2]), torch.tensor([1, 2, 3])),
+        ...     ('user', 'plays', 'game'): (torch.tensor([1, 3]), torch.tensor([2, 3]))
+        ... })
        >>> g.etypes
-        ['follows', 'plays']
+        ['follows', 'follows', 'plays']
        """
        return self._etypes

    @property
    def canonical_etypes(self):
-        """Return the list of canonical edge types of this graph.
+        """Return the canonical edge types of the graph.

-        A canonical edge type is a tuple of string (src_type, edge_type, dst_type).
+        A canonical edge type is a 3-tuple of str ``src_type, edge_type, dst_type``, where
+        ``src_type``, ``edge_type``, ``dst_type`` are the type of the source nodes, edges
+        and destination nodes respectively.

        Returns
        -------
-        list of 3-tuples
+        list of 3-tuple of str
+            Each 3-tuple of str is a canonical edge type.

        Examples
        --------
+        The following example uses PyTorch backend.
+
+        >>> import dgl
+        >>> import torch

-        >>> follows_g = dgl.graph(([0, 1], [1, 2]), 'user', 'follows')
-        >>> plays_g = dgl.bipartite(([0, 1, 1, 2], [0, 0, 1, 1]), 'user', 'plays', 'game')
-        >>> g = dgl.hetero_from_relations([follows_g, plays_g])
+        >>> g = dgl.heterograph({
+        ...     ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])),
+        ...     ('user', 'follows', 'game'): (torch.tensor([0, 1, 2]), torch.tensor([1, 2, 3])),
+        ...     ('user', 'plays', 'game'): (torch.tensor([1, 3]), torch.tensor([2, 3]))
+        ... })
        >>> g.canonical_etypes
-        [('user', 'follows', 'user'), ('user', 'plays', 'game')]
+        [('user', 'follows', 'user'),
+         ('user', 'follows', 'game'),
+         ('user', 'plays', 'game')]
        """
        return self._canonical_etypes

    @property
    def srctypes(self):
-        """Return the node types in the SRC category. Return :attr:``ntypes`` if
-        the graph is not a uni-bipartite graph.
+        """Return the source node types.
+
+        Returns
+        -------
+        list of str
+
+            * If the graph is a uni-bipartite graph, it returns the source node types.
+              For a definition of uni-bipartite, see :func:`is_unibipartite`.
+            * Otherwise, it returns all node types in the graph.
+
+        Examples
+        --------
+        The following example uses PyTorch backend.
+
+        >>> import dgl
+        >>> import torch
+
+        Query for a uni-bipartite graph.
+
+        >>> g = dgl.heterograph({
+        ...     ('user', 'plays', 'game'): (torch.tensor([0]), torch.tensor([1])),
+        ...     ('developer', 'develops', 'game'): (torch.tensor([1]), torch.tensor([2]))
+        ... })
+        >>> g.srctypes
+        ['developer', 'user']
+
+        Query for a graph that is not uni-bipartite.
+
+        >>> g = dgl.heterograph({
+        ...     ('user', 'follows', 'user'): (torch.tensor([0]), torch.tensor([1])),
+        ...     ('developer', 'develops', 'game'): (torch.tensor([1]), torch.tensor([2]))
+        ... })
+        >>> g.srctypes
+        ['developer', 'game', 'user']
        """
        if self.is_unibipartite:
            return sorted(list(self._srctypes_invmap.keys()))
@@ -936,8 +984,41 @@ class DGLHeteroGraph(object):

    @property
    def dsttypes(self):
-        """Return the node types in the DST category. Return :attr:``ntypes`` if
-        the graph is not a uni-bipartite graph.
+        """Return the destination node types.
+
+        Returns
+        -------
+        list of str
+            Each str is a node type.
+
+            * If the graph is a uni-bipartite graph, it returns the destination node types.
+              For a definition of uni-bipartite, see :func:`is_unibipartite`.
+            * Otherwise, it returns all node types in the graph.
+
+        Examples
+        --------
+        The following example uses PyTorch backend.
+
+        >>> import dgl
+        >>> import torch
+
+        Query for a uni-bipartite graph.
+
+        >>> g = dgl.heterograph({
+        ...     ('user', 'plays', 'game'): (torch.tensor([0]), torch.tensor([1])),
+        ...     ('developer', 'develops', 'game'): (torch.tensor([1]), torch.tensor([2]))
+        ... })
+        >>> g.dsttypes
+        ['game']
+
+        Query for a graph that is not uni-bipartite.
+
+        >>> g = dgl.heterograph({
+        ...     ('user', 'follows', 'user'): (torch.tensor([0]), torch.tensor([1])),
+        ...     ('developer', 'develops', 'game'): (torch.tensor([1]), torch.tensor([2]))
+        ... })
+        >>> g.dsttypes
+        ['developer', 'game', 'user']
        """
        if self.is_unibipartite:
            return sorted(list(self._dsttypes_invmap.keys()))
@@ -945,33 +1026,34 @@ class DGLHeteroGraph(object):
            return self.ntypes

    def metagraph(self):
-        """Return the metagraph as networkx.MultiDiGraph.
+        """Return the metagraph of the heterograph.

-        The nodes are labeled with node type names.
-        The edges have their keys holding the edge type names.
+        The metagraph (or network schema) of a heterogeneous network specifies type constraints
+        on the sets of nodes and edges between the nodes. For a formal definition, refer to
+        `Yizhou et al. <https://www.kdd.org/exploration_files/V14-02-03-Sun.pdf>`_.

        Returns
        -------
        networkx.MultiDiGraph
+            The metagraph.

        Examples
        --------
+        The following example uses PyTorch backend.

-        >>> follows_g = dgl.graph(([0, 1], [1, 2]), 'user', 'follows')
-        >>> plays_g = dgl.bipartite(([0, 1, 1, 2], [0, 0, 1, 1]), 'user', 'plays', 'game')
-        >>> g = dgl.hetero_from_relations([follows_g, plays_g])
-        >>> meta_g = g.metagraph()
-
-        The metagraph then has two nodes and two edges.
+        >>> import dgl
+        >>> import torch

+        >>> g = dgl.heterograph({
+        ...     ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])),
+        ...     ('user', 'follows', 'game'): (torch.tensor([0, 1, 2]), torch.tensor([1, 2, 3])),
+        ...     ('user', 'plays', 'game'): (torch.tensor([1, 3]), torch.tensor([2, 3]))
+        ... })
+        >>> meta_g = g.metagraph()
        >>> meta_g.nodes()
        NodeView(('user', 'game'))
-        >>> meta_g.number_of_nodes()
-        2
        >>> meta_g.edges()
-        OutMultiEdgeDataView([('user', 'user'), ('user', 'game')])
-        >>> meta_g.number_of_edges()
-        2
+        OutMultiEdgeDataView([('user', 'user'), ('user', 'game'), ('user', 'game')])
        """
        nx_graph = self._graph.metagraph.to_networkx()
        nx_metagraph = nx.MultiDiGraph()
@@ -981,38 +1063,56 @@ class DGLHeteroGraph(object):
        return nx_metagraph

    def to_canonical_etype(self, etype):
-        """Convert edge type to canonical etype: (srctype, etype, dsttype).
+        """Convert an edge type to the corresponding canonical edge type in the graph.

-        The input can already be a canonical tuple.
+        A canonical edge type is a 3-tuple of strings ``src_type, edge_type, dst_type``, where
+        ``src_type``, ``edge_type``, ``dst_type`` are separately the type of source
+        nodes, edges and destination nodes.

        Parameters
        ----------
-        etype : str or tuple of str
-            Edge type
+        etype : str or 3-tuple of str
+            If :attr:`etype` is an edge type (str), it returns the corresponding canonical edge
+            type in the graph. If :attr:`etype` is already a canonical edge type
+            (3-tuple of str), it simply returns :attr:`etype`.

        Returns
        -------
-        tuple of str
+        3-tuple of str
+            The canonical edge type corresponding to the edge type.
+
+        Notes
+        -----
+        If :attr:`etype` is an edge type, the API expects it to appear only once in the graph. For
+        example, in a graph with canonical edge types ``('A', 'follows', 'B')``,
+        ``('A', 'follows', 'C')`` and ``('B', 'watches', 'D')``, ``'follows'`` is an invalid value
+        for :attr:`etype` while ``'watches'`` is a valid one.

        Examples
        --------
+        The following example uses PyTorch backend.

-        Instantiate a heterograph.
+        >>> import dgl
+        >>> import torch
+
+        Create a heterograph.

-        >>> g1 = dgl.graph(([0, 1], [1, 2]), 'user', 'follows')
-        >>> g2 = dgl.bipartite(([0, 1, 1, 2], [0, 0, 1, 1]), 'user', 'plays', 'game')
-        >>> g3 = dgl.bipartite(([0, 1], [0, 1]), 'developer', 'follows', 'game')
-        >>> g = dgl.hetero_from_relations([g1, g2, g3])
+        >>> g = dgl.heterograph({
+        ...     ('user', 'follows', 'user'): ([0, 1], [1, 2]),
+        ...     ('user', 'plays', 'game'): ([0, 1, 1, 2], [0, 0, 1, 1]),
+        ...     ('developer', 'follows', 'game'): ([0, 1], [0, 1])
+        ... })

-        Get canonical edge types.
+        Map an edge type to its corresponding canonical edge type.

        >>> g.to_canonical_etype('plays')
        ('user', 'plays', 'game')
        >>> g.to_canonical_etype(('user', 'plays', 'game'))
        ('user', 'plays', 'game')
-        >>> g.to_canonical_etype('follows')
-        DGLError: Edge type "follows" is ambiguous.
-        Please use canonical etype type in the form of (srctype, etype, dsttype)
+
+        See Also
+        --------
+        canonical_etypes
        """
        if etype is None:
            if len(self.etypes) != 1:
@@ -1026,8 +1126,8 @@ class DGLHeteroGraph(object):
            if ret is None:
                raise DGLError('Edge type "{}" does not exist.'.format(etype))
            if len(ret) == 0:
-                raise DGLError('Edge type "%s" is ambiguous. Please use canonical etype '
-                               'type in the form of (srctype, etype, dsttype)' % etype)
+                raise DGLError('Edge type "%s" is ambiguous. Please use canonical edge type '
+                               'in the form of (srctype, etype, dsttype)' % etype)
            return ret

    def get_ntype_id(self, ntype):
@@ -1144,11 +1244,93 @@ class DGLHeteroGraph(object):
    #################################################################
    @property
    def batch_size(self):
-        """TBD"""
+        """Return the number of graphs in the batched graph.
+
+        Returns
+        -------
+        int
+            The Number of graphs in the batch. If the graph is not a batched one,
+            it will return 1.
+
+        Examples
+        --------
+
+        The following example uses PyTorch backend.
+
+        >>> import dgl
+        >>> import torch
+
+        Query for homogeneous graphs.
+
+        >>> g1 = dgl.graph((torch.tensor([0, 1, 2]), torch.tensor([1, 2, 3])))
+        >>> g1.batch_size
+        1
+        >>> g2 = dgl.graph((torch.tensor([0, 0, 0, 1]), torch.tensor([0, 1, 2, 0])))
+        >>> bg = dgl.batch([g1, g2])
+        >>> bg.batch_size
+        2
+
+        Query for heterogeneous graphs.
+
+        >>> hg1 = dgl.heterograph({
+        ...       ('user', 'plays', 'game') : (torch.tensor([0, 1]), torch.tensor([0, 0]))})
+        >>> hg1.batch_size
+        1
+        >>> hg2 = dgl.heterograph({
+        ...       ('user', 'plays', 'game') : (torch.tensor([0, 0]), torch.tensor([1, 0]))})
+        >>> bg = dgl.batch([hg1, hg2])
+        >>> bg.batch_size
+        2
+        """
        return len(self.batch_num_nodes(self.ntypes[0]))

    def batch_num_nodes(self, ntype=None):
-        """TBD"""
+        """Return the number of nodes for each graph in the batch with the specified node type.
+
+        Parameters
+        ----------
+        ntype : str, optional
+            The node type for query. If the graph has multiple node types, one must
+            specify the argument. Otherwise, it can be omitted. If the graph is not a batched
+            one, it will return a list of length 1 that holds the number of nodes in the graph.
+
+        Returns
+        -------
+        Tensor
+            The number of nodes with the specified type for each graph in the batch. The i-th
+            element of it is the number of nodes with the specified type for the i-th graph.
+
+        Examples
+        --------
+
+        The following example uses PyTorch backend.
+
+        >>> import dgl
+        >>> import torch
+
+        Query for homogeneous graphs.
+
+        >>> g1 = dgl.graph((torch.tensor([0, 1, 2]), torch.tensor([1, 2, 3])))
+        >>> g1.batch_num_nodes()
+        tensor([4])
+        >>> g2 = dgl.graph((torch.tensor([0, 0, 0, 1]), torch.tensor([0, 1, 2, 0])))
+        >>> bg = dgl.batch([g1, g2])
+        >>> bg.batch_num_nodes()
+        tensor([4, 3])
+
+        Query for heterogeneous graphs.
+
+        >>> hg1 = dgl.heterograph({
+        ...       ('user', 'plays', 'game') : (torch.tensor([0, 1]), torch.tensor([0, 0]))})
+        >>> hg2 = dgl.heterograph({
+        ...       ('user', 'plays', 'game') : (torch.tensor([0, 0]), torch.tensor([1, 0]))})
+        >>> bg = dgl.batch([hg1, hg2])
+        >>> bg.batch_num_nodes('user')
+        tensor([2, 1])
+        """
+        if ntype is not None and ntype not in self.ntypes:
+            raise DGLError('Expect ntype in {}, got {}'.format(self.ntypes, ntype))
+
        if self._batch_num_nodes is None:
            self._batch_num_nodes = {}
            for ty in self.ntypes:
@@ -1170,7 +1352,52 @@ class DGLHeteroGraph(object):
        self._batch_num_nodes = val

    def batch_num_edges(self, etype=None):
-        """TBD"""
+        """Return the number of edges for each graph in the batch with the specified edge type.
+
+        Parameters
+        ----------
+        etype : str or tuple of str, optional
+            The edge type for query, which can be an edge type (str) or a canonical edge type
+            (3-tuple of str). When an edge type appears in multiple canonical edge types, one
+            must use a canonical edge type. If the graph has multiple edge types, one must
+            specify the argument. Otherwise, it can be omitted.
+
+        Returns
+        -------
+        Tensor
+            The number of edges with the specified type for each graph in the batch. The i-th
+            element of it is the number of edges with the specified type for the i-th graph.
+            If the graph is not a batched one, it will return a list of length 1 that holds
+            the number of edges in the graph.
+
+        Examples
+        --------
+
+        The following example uses PyTorch backend.
+
+        >>> import dgl
+        >>> import torch
+
+        Query for homogeneous graphs.
+
+        >>> g1 = dgl.graph((torch.tensor([0, 1, 2]), torch.tensor([1, 2, 3])))
+        >>> g1.batch_num_edges()
+        tensor([3])
+        >>> g2 = dgl.graph((torch.tensor([0, 0, 0, 1]), torch.tensor([0, 1, 2, 0])))
+        >>> bg = dgl.batch([g1, g2])
+        >>> bg.batch_num_edges()
+        tensor([3, 4])
+
+        Query for heterogeneous graphs.
+
+        >>> hg1 = dgl.heterograph({
+        ...       ('user', 'plays', 'game') : (torch.tensor([0, 1]), torch.tensor([0, 0]))})
+        >>> hg2 = dgl.heterograph({
+        ...       ('user', 'plays', 'game') : (torch.tensor([0, 0]), torch.tensor([1, 0]))})
+        >>> bg = dgl.batch([hg1, hg2])
+        >>> bg.batch_num_edges('plays')
+        tensor([2, 2])
+        """
        if self._batch_num_edges is None:
            self._batch_num_edges = {}
            for ty in self.canonical_etypes:
@@ -1181,6 +1408,8 @@ class DGLHeteroGraph(object):
                raise DGLError('Edge type name must be specified if there are more than one '
                               'edge types.')
            etype = self.canonical_etypes[0]
+        else:
+            etype = self.to_canonical_etype(etype)
        return self._batch_num_edges[etype]

    def set_batch_num_edges(self, val):
@@ -1197,37 +1426,114 @@ class DGLHeteroGraph(object):

    @property
    def nodes(self):
-        """Return a node view that can be used to set/get feature
-        data of a single node type.
+        """Return a node view
+
+        One can use it for:
+
+        1. Getting the node IDs for a single node type.
+        2. Setting/getting features for all nodes of a single node type.

        Examples
        --------
        The following example uses PyTorch backend.

-        To set features of all users
+        >>> import dgl
+        >>> import torch
+
+        Create a homogeneous graph and a heterogeneous graph of two node types.
+
+        >>> g = dgl.graph((torch.tensor([0, 1]), torch.tensor([1, 2])))
+        >>> hg = dgl.heterograph({
+        ...     ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])),
+        ...     ('user', 'plays', 'game'): (torch.tensor([3, 4]), torch.tensor([5, 6]))
+        ... })
+
+        Get the node IDs of the homogeneous graph.
+
+        >>> g.nodes()
+        tensor([0, 1, 2])
+
+        Get the node IDs of the heterogeneous graph. With multiple node types introduced,
+        one needs to specify the node type for query.
+
+        >>> hg.nodes('user')
+        tensor([0, 1, 2, 3, 4])

-        >>> g = dgl.graph(([0, 1], [1, 2]), 'user', 'follows')
-        >>> g.nodes['user'].data['h'] = torch.zeros(3, 5)
+        Set and get a feature 'h' for all nodes of a single type in the heterogeneous graph.
+
+        >>> hg.nodes['user'].data['h'] = torch.ones(5, 1)
+        >>> hg.nodes['user'].data['h']
+        tensor([[1.], [1.], [1.], [1.], [1.]])
+
+        To set node features for a graph with a single node type, use :func:`DGLGraph.ndata`.

        See Also
        --------
        ndata
        """
+        # Todo (Mufei) Replace the syntax g.nodes[...].ndata[...] with g.nodes[...][...]
        return HeteroNodeView(self, self.get_ntype_id)

    @property
    def srcnodes(self):
-        """Return a SRC node view that can be used to set/get feature
-        data of a single node type.
+        """Return a node view for source nodes
+
+        If the graph is a uni-bipartite graph (see :func:`is_unibipartite` for reference),
+        this is :func:`nodes` restricted to source node types. Otherwise, it is an alias
+        for :func:`nodes`.
+
+        One can use it for:
+
+        1. Getting the node IDs for a single node type.
+        2. Setting/getting features for all nodes of a single node type.

        Examples
        --------
        The following example uses PyTorch backend.

-        To set features of all users
+        >>> import dgl
+        >>> import torch
+
+        Create a uni-bipartite graph.
+
+        >>> g = dgl.heterograph({
+        ...     ('user', 'plays', 'game'): (torch.tensor([0]), torch.tensor([1])),
+        ...     ('developer', 'develops', 'game'): (torch.tensor([1]), torch.tensor([2]))
+        ... })
+
+        Get the node IDs for source node types.
+
+        >>> g.srcnodes('user')
+        tensor([0])
+        >>> g.srcnodes('developer')
+        tensor([0, 1])
+
+        Set/get features for source node types.
+
+        >>> g.srcnodes['user'].data['h'] = torch.ones(1, 1)
+        >>> g.srcnodes['user'].data['h']
+        tensor([[1.]])

-        >>> g = dgl.bipartite(([0, 1], [1, 2]), 'user', 'plays', 'game')
-        >>> g.srcnodes['user'].data['h'] = torch.zeros(2, 5)
+        Create a graph that is not uni-bipartite.
+
+        >>> g = dgl.heterograph({
+        ...     ('user', 'follows', 'user'): (torch.tensor([0]), torch.tensor([1])),
+        ...     ('developer', 'develops', 'game'): (torch.tensor([1]), torch.tensor([2]))
+        ... })
+
+        :func:`dgl.DGLGraph.srcnodes` falls back to :func:`dgl.DGLGraph.nodes` and one can
+        get the node IDs for both source and destination node types.
+
+        >>> g.srcnodes('game')
+        tensor([0, 1, 2])
+
+        One can also set/get features for destination node types in this case.
+
+        >>> g.srcnodes['game'].data['h'] = torch.ones(3, 1)
+        >>> g.srcnodes['game'].data['h']
+        tensor([[1.],
+                [1.],
+                [1.]])

        See Also
        --------
@@ -1237,17 +1543,63 @@ class DGLHeteroGraph(object):

    @property
    def dstnodes(self):
-        """Return a DST node view that can be used to set/get feature
-        data of a single node type.
+        """Return a node view for destination nodes
+
+        If the graph is a uni-bipartite graph (see :func:`is_unibipartite` for reference),
+        this is :func:`nodes` restricted to destination node types. Otherwise, it is an alias
+        for :func:`nodes`.
+
+        One can use it for:
+
+        1. Getting the node IDs for a single node type.
+        2. Setting/getting features for all nodes of a single node type.

        Examples
        --------
        The following example uses PyTorch backend.

-        To set features of all games
+        >>> import dgl
+        >>> import torch
+
+        Create a uni-bipartite graph.

-        >>> g = dgl.bipartite(([0, 1], [1, 2]), 'user', 'plays', 'game')
-        >>> g.dstnodes['game'].data['h'] = torch.zeros(3, 5)
+        >>> g = dgl.heterograph({
+        ...     ('user', 'plays', 'game'): (torch.tensor([0]), torch.tensor([1])),
+        ...     ('developer', 'develops', 'game'): (torch.tensor([1]), torch.tensor([2]))
+        ... })
+
+        Get the node IDs for destination node types.
+
+        >>> g.dstnodes('game')
+        tensor([0, 1, 2])
+
+        Set/get features for destination node types.
+
+        >>> g.dstnodes['game'].data['h'] = torch.ones(3, 1)
+        >>> g.dstnodes['game'].data['h']
+        tensor([[1.],
+                [1.],
+                [1.]])
+
+        Create a graph that is not uni-bipartite.
+
+        >>> g = dgl.heterograph({
+        ...     ('user', 'follows', 'user'): (torch.tensor([0]), torch.tensor([1])),
+        ...     ('developer', 'develops', 'game'): (torch.tensor([1]), torch.tensor([2]))
+        ... })
+
+        :func:`dgl.DGLGraph.dstnodes` falls back to :func:`dgl.DGLGraph.nodes` and one can
+        get the node IDs for both source and destination node types.
+
+        >>> g.dstnodes('developer')
+        tensor([0, 1])
+
+        One can also set/get features for source node types in this case.
+
+        >>> g.dstnodes['developer'].data['h'] = torch.ones(2, 1)
+        >>> g.dstnodes['developer'].data['h']
+        tensor([[1.],
+                [1.]])

        See Also
        --------
@@ -1257,50 +1609,53 @@ class DGLHeteroGraph(object):

    @property
    def ndata(self):
-        """Return the data view of all the nodes.
+        """Return a node data view for setting/getting node features
+
+        Let ``g`` be a DGLGraph. If ``g`` is a graph of a single node type, ``g.ndata[feat]``
+        returns the node feature associated with the name ``feat``. One can also set a node
+        feature associated with the name ``feat`` by setting ``g.ndata[feat]`` to a tensor.

-        If the graph has only one node type, ``g.ndata['feat']`` gives
-        the node feature data under name ``'feat'``.
-        If the graph has multiple node types, then ``g.ndata['feat']``
-        returns a dictionary where the key is the node type and the
-        value is the node feature tensor. If the node type does not
-        have feature `'feat'`, it is not included in the dictionary.
+        If ``g`` is a graph of multiple node types, ``g.ndata[feat]`` returns a
+        dict[str, Tensor] mapping node types to the node features associated with the name
+        ``feat`` for the corresponding type. One can also set a node feature associated
+        with the name ``feat`` for some node type(s) by setting ``g.ndata[feat]`` to a
+        dictionary as described.
+
+        Notes
+        -----
+        For setting features, the device of the features must be the same as the device
+        of the graph.

        Examples
        --------
        The following example uses PyTorch backend.

-        To set features of all nodes in a heterogeneous graph
-        with only one node type:
-
-        >>> g = dgl.graph(([0, 1], [1, 2]), 'user', 'follows')
-        >>> g.ndata['h'] = torch.zeros(3, 5)
+        >>> import dgl
+        >>> import torch

-        To set features of all nodes in a heterogeneous graph
-        with multiple node types:
+        Set and get feature 'h' for a graph of a single node type.

-        >>> g = dgl.heterograph({('user', 'like', 'movie') : ([0, 1, 1], [1, 2, 0])})
-        >>> g.ndata['h'] = {'user': torch.zeros(2, 5),
-        ...                 'movie': torch.zeros(3, 5)}
+        >>> g = dgl.graph((torch.tensor([0, 1]), torch.tensor([1, 2])))
+        >>> g.ndata['h'] = torch.ones(3, 1)
        >>> g.ndata['h']
-        ... {'user': tensor([[0., 0., 0., 0., 0.],
-        ...                 [0., 0., 0., 0., 0.]]),
-        ...  'movie': tensor([[0., 0., 0., 0., 0.],
-        ...                   [0., 0., 0., 0., 0.],
-        ...                   [0., 0., 0., 0., 0.]])}
+        tensor([[1.],
+                [1.],
+                [1.]])

-        To set features of part of nodes in a heterogeneous graph
-        with multiple node types:
+        Set and get feature 'h' for a graph of multiple node types.

-        >>> g = dgl.heterograph({('user', 'like', 'movie') : ([0, 1, 1], [1, 2, 0])})
-        >>> g.ndata['h'] = {'user': torch.zeros(2, 5)}
+        >>> g = dgl.heterograph({
+        ...     ('user', 'follows', 'user'): (torch.tensor([1, 2]), torch.tensor([3, 4])),
+        ...     ('player', 'plays', 'game'): (torch.tensor([2, 2]), torch.tensor([1, 1]))
+        ... })
+        >>> g.ndata['h'] = {'game': torch.zeros(2, 1), 'player': torch.ones(3, 1)}
        >>> g.ndata['h']
-        ... {'user': tensor([[0., 0., 0., 0., 0.],
-        ...                  [0., 0., 0., 0., 0.]])}
-        >>> # clean the feature 'h' and no node type contains 'h'
-        >>> g.ndata.pop('h')
+        {'game': tensor([[0.], [0.]]),
+         'player': tensor([[1.], [1.], [1.]])}
+        >>> g.ndata['h'] = {'game': torch.ones(2, 1)}
        >>> g.ndata['h']
-        ... {}
+        {'game': tensor([[1.], [1.]]),
+         'player': tensor([[1.], [1.], [1.]])}

        See Also
        --------
@@ -1315,84 +1670,62 @@ class DGLHeteroGraph(object):
            ntypes = self.ntypes
            return HeteroNodeDataView(self, ntypes, ntids, ALL)

-
    @property
    def srcdata(self):
-        """Return the data view of all nodes in the SRC category.
+        """Return a node data view for setting/getting source node features.

-        If the source nodes have only one node type, ``g.srcdata['feat']``
-        gives the node feature data under name ``'feat'``.
-        If the source nodes have multiple node types, then
-        ``g.srcdata['feat']`` returns a dictionary where the key is
-        the source node type and the value is the node feature
-        tensor. If the source node type does not have feature
-        `'feat'`, it is not included in the dictionary.
+        Let ``g`` be a DGLGraph. If ``g`` is a graph of a single source node type,
+        ``g.srcdata[feat]`` returns the source node feature associated with the name ``feat``.
+        One can also set a source node feature associated with the name ``feat`` by
+        setting ``g.srcdata[feat]`` to a tensor.
+
+        If ``g`` is a graph of multiple source node types, ``g.srcdata[feat]`` returns a
+        dict[str, Tensor] mapping source node types to the node features associated with
+        the name ``feat`` for the corresponding type. One can also set a node feature
+        associated with the name ``feat`` for some source node type(s) by setting
+        ``g.srcdata[feat]`` to a dictionary as described.
+
+        Notes
+        -----
+        For setting features, the device of the features must be the same as the device
+        of the graph.

        Examples
        --------
        The following example uses PyTorch backend.

-        To set features of all source nodes in a graph with only one edge type:
-
-        >>> g = dgl.bipartite(([0, 1], [1, 2]), 'user', 'plays', 'game')
-        >>> g.srcdata['h'] = torch.zeros(2, 5)
-
-        This is equivalent to
-
-        >>> g.nodes['user'].data['h'] = torch.zeros(2, 5)
+        >>> import dgl
+        >>> import torch

-        Also work on more complex uni-bipartite graph
+        Set and get feature 'h' for a graph of a single source node type.

        >>> g = dgl.heterograph({
-        ...     ('user', 'plays', 'game') : ([0, 1], [1, 2]),
-        ...     ('user', 'reads', 'book') : ([0, 1], [1, 0]),
-        ...     })
-        >>> print(g.is_unibipartite)
-        True
-        >>> g.srcdata['h'] = torch.zeros(2, 5)
+        ...     ('user', 'plays', 'game'): (torch.tensor([0, 1]), torch.tensor([1, 2]))})
+        >>> g.srcdata['h'] = torch.ones(2, 1)
+        >>> g.srcdata['h']
+        tensor([[1.],
+                [1.]])

-        To set features of all source nodes in a uni-bipartite graph
-        with multiple source node types:
+        Set and get feature 'h' for a graph of multiple source node types.

        >>> g = dgl.heterograph({
-        ...     ('game', 'liked-by', 'user') : ([1, 2], [0, 1]),
-        ...     ('book', 'liked-by', 'user') : ([0, 1], [1, 0]),
-        ...     })
-        >>> print(g.is_unibipartite)
-        True
-        >>> g.srcdata['h'] = {'game' : torch.zeros(3, 5),
-        ...                   'book' : torch.zeros(2, 5)}
-        >>> g.srcdata['h']
-        ... {'game': tensor([[0., 0., 0., 0., 0.],
-        ...                  [0., 0., 0., 0., 0.],
-        ...                  [0., 0., 0., 0., 0.]]),
-        ...  'book': tensor([[0., 0., 0., 0., 0.],
-        ...                  [0., 0., 0., 0., 0.]])}
-
-        To set features of part of source nodes in a uni-bipartite graph
-        with multiple source node types:
-        >>> g = dgl.heterograph({
-        ...     ('game', 'liked-by', 'user') : ([1, 2], [0, 1]),
-        ...     ('book', 'liked-by', 'user') : ([0, 1], [1, 0]),
-        ...     })
-        >>> g.srcdata['h'] = {'game' : torch.zeros(3, 5)}
+        ...     ('user', 'plays', 'game'): (torch.tensor([1, 2]), torch.tensor([3, 4])),
+        ...     ('player', 'plays', 'game'): (torch.tensor([2, 2]), torch.tensor([1, 1]))
+        ... })
+        >>> g.srcdata['h'] = {'user': torch.zeros(3, 1), 'player': torch.ones(3, 1)}
        >>> g.srcdata['h']
-        >>> {'game': tensor([[0., 0., 0., 0., 0.],
-        ...                  [0., 0., 0., 0., 0.],
-        ...                  [0., 0., 0., 0., 0.]])}
-        >>> # clean the feature 'h' and no source node type contains 'h'
-        >>> g.srcdata.pop('h')
+        {'player': tensor([[1.], [1.], [1.]]),
+         'user': tensor([[0.], [0.], [0.]])}
+        >>> g.srcdata['h'] = {'user': torch.ones(3, 1)}
        >>> g.srcdata['h']
-        ... {}
-
-
-        Notes
-        -----
-        This is identical to :any:`DGLHeteroGraph.ndata` if the graph is homogeneous.
+        {'player': tensor([[1.], [1.], [1.]]),
+         'user': tensor([[1.], [1.], [1.]])}

        See Also
        --------
        nodes
+        ndata
+        srcnodes
        """
        if len(self.srctypes) == 1:
            ntype = self.srctypes[0]
@@ -1405,81 +1738,61 @@ class DGLHeteroGraph(object):

    @property
    def dstdata(self):
-        """Return the data view of all destination nodes.
+        """Return a node data view for setting/getting destination node features.
+
+        Let ``g`` be a DGLGraph. If ``g`` is a graph of a single destination node type,
+        ``g.dstdata[feat]`` returns the destination node feature associated with the name
+        ``feat``. One can also set a destination node feature associated with the name
+        ``feat`` by setting ``g.dstdata[feat]`` to a tensor.

-        If the destination nodes have only one node type,
-        ``g.dstdata['feat']`` gives the node feature data under name
-        ``'feat'``.
-        If the destination nodes have multiple node types, then
-        ``g.dstdata['feat']`` returns a dictionary where the key is
-        the destination node type and the value is the node feature
-        tensor. If the destination node type does not have feature
-        `'feat'`, it is not included in the dictionary.
+        If ``g`` is a graph of multiple destination node types, ``g.dstdata[feat]`` returns a
+        dict[str, Tensor] mapping destination node types to the node features associated with
+        the name ``feat`` for the corresponding type. One can also set a node feature
+        associated with the name ``feat`` for some destination node type(s) by setting
+        ``g.dstdata[feat]`` to a dictionary as described.
+
+        Notes
+        -----
+        For setting features, the device of the features must be the same as the device
+        of the graph.

        Examples
        --------
        The following example uses PyTorch backend.

-        To set features of all source nodes in a graph with only one edge type:
+        >>> import dgl
+        >>> import torch

-        >>> g = dgl.bipartite(([0, 1], [1, 2]), 'user', 'plays', 'game')
-        >>> g.dstdata['h'] = torch.zeros(3, 5)
-
-        This is equivalent to
-
-        >>> g.nodes['game'].data['h'] = torch.zeros(3, 5)
-
-        Also work on more complex uni-bipartite graph
+        Set and get feature 'h' for a graph of a single destination node type.

        >>> g = dgl.heterograph({
-        ...     ('user', 'plays', 'game') : ([0, 1], [1, 2]),
-        ...     ('store', 'sells', 'game') : ([0, 1], [1, 0]),
-        ...     })
-        >>> print(g.is_unibipartite)
-        True
-        >>> g.dstdata['h'] = torch.zeros(3, 5)
+        ...     ('user', 'plays', 'game'): (torch.tensor([0, 1]), torch.tensor([1, 2]))})
+        >>> g.dstdata['h'] = torch.ones(3, 1)
+        >>> g.dstdata['h']
+        tensor([[1.],
+                [1.],
+                [1.]])

-        To set features of all destination nodes in a uni-bipartite graph
-        with multiple destination node types::
+        Set and get feature 'h' for a graph of multiple destination node types.

        >>> g = dgl.heterograph({
-        ...     ('user', 'plays', 'game') : ([0, 1], [1, 2]),
-        ...     ('user', 'reads', 'book') : ([0, 1], [1, 0]),
-        ...     })
-        >>> print(g.is_unibipartite)
-        True
-        >>> g.dstdata['h'] = {'game' : torch.zeros(3, 5),
-        ...                   'book' : torch.zeros(2, 5)}
-        >>> g.dstdata['h']
-        ... {'game': tensor([[0., 0., 0., 0., 0.],
-        ...                  [0., 0., 0., 0., 0.],
-        ...                  [0., 0., 0., 0., 0.]]),
-        ...  'book': tensor([[0., 0., 0., 0., 0.],
-        ...                  [0., 0., 0., 0., 0.]])}
-
-        To set features of part of destination nodes in a uni-bipartite graph
-        with multiple destination node types:
-        >>> g = dgl.heterograph({
-        ...     ('user', 'plays', 'game') : ([0, 1], [1, 2]),
-        ...     ('user', 'reads', 'book') : ([0, 1], [1, 0]),
-        ...     })
-        >>> g.dstdata['h'] = {'game' : torch.zeros(3, 5)}
+        ...     ('user', 'plays', 'game'): (torch.tensor([1, 2]), torch.tensor([1, 2])),
+        ...     ('user', 'watches', 'movie'): (torch.tensor([2, 2]), torch.tensor([1, 1]))
+        ... })
+        >>> g.dstdata['h'] = {'game': torch.zeros(3, 1), 'movie': torch.ones(2, 1)}
        >>> g.dstdata['h']
-        ... {'game': tensor([[0., 0., 0., 0., 0.],
-        ...                  [0., 0., 0., 0., 0.],
-        ...                  [0., 0., 0., 0., 0.]])}
-        >>> # clean the feature 'h' and no destination node type contains 'h'
-        >>> g.dstdata.pop('h')
+        {'game': tensor([[0.], [0.], [0.]]),
+         'movie': tensor([[1.], [1.]])}
+        >>> g.dstdata['h'] = {'game': torch.ones(3, 1)}
        >>> g.dstdata['h']
-        ... {}
-
-        Notes
-        -----
-        This is identical to :any:`DGLHeteroGraph.ndata` if the graph is homogeneous.
+        {'game': tensor([[1.], [1.], [1.]]),
+         'movie': tensor([[1.], [1.]])}

        See Also
        --------
        nodes
+        ndata
+        dstnodes
        """
        if len(self.dsttypes) == 1:
            ntype = self.dsttypes[0]
@@ -1492,78 +1805,142 @@ class DGLHeteroGraph(object):

    @property
    def edges(self):
-        """Return an edge view that can be used to set/get feature
-        data of a single edge type.
+        """Return an edge view
+
+        One can use it for:
+
+        1. Getting the edges for a single edge type. In this case, it can take the
+           following optional arguments:
+
+            - form : str, optional
+                  The return form, which can be one of the following:
+
+                  - ``'uv'`` (default): The returned result is a 2-tuple of 1D tensors
+                    :math:`(U, V)`, representing the source and destination nodes of all edges.
+                    For each :math:`i`, :math:`(U[i], V[i])` forms an edge.
+                  - ``'eid'``: The returned result is a 1D tensor :math:`EID`, representing
+                    the IDs of all edges.
+                  - ``'all'``: The returned result is a 3-tuple of 1D tensors :math:`(U, V, EID)`,
+                    representing the source nodes, destination nodes and IDs of all edges.
+                    For each :math:`i`, :math:`(U[i], V[i])` forms an edge with ID :math:`EID[i]`.
+            - order : str, optional
+                  The order of the returned edges, which can be one of the following:
+
+                  - ``'eid'`` (default): The edges are sorted by their IDs.
+                  - ``'srcdst'``: The edges are sorted first by their source node IDs and then
+                    by their destination node IDs to break ties.
+            - etype : str or tuple of str, optional
+                  The edge type for query, which can be an edge type (str) or a canonical edge
+                  type (3-tuple of str). When an edge type appears in multiple canonical edge
+                  types, one must use a canonical edge type. If the graph has multiple edge
+                  types, one must specify the argument. Otherwise, it can be omitted.
+        2. Setting/getting features for all edges of a single edge type. To set/get a feature
+           ``feat`` for edges of type ``etype`` in a graph ``g``, one can use
+           ``g.edges[etype].data[feat]``.

        Examples
        --------
        The following example uses PyTorch backend.

-        To set features of all "play" relationships:
+        >>> import dgl
+        >>> import torch

-        >>> g = dgl.bipartite(([0, 1, 1], [0, 0, 2]), 'user', 'plays', 'game')
-        >>> g.edges['plays'].data['h'] = torch.zeros(3, 4)
+        **Get the Edges for a Single Edge Type**
+
+        Create a graph with a single edge type.
+
+        >>> g = dgl.graph((torch.tensor([1, 0, 0]), torch.tensor([1, 1, 0])))
+        >>> g.edges()
+        (tensor([1, 0, 0]), tensor([1, 1, 0]))
+
+        Specify a different value for :attr:`form` and :attr:`order`.
+
+        >>> g.edges(form='all', order='srcdst')
+        (tensor([0, 0, 1]), tensor([0, 1, 1]), tensor([2, 1, 0]))
+
+        For a graph of multiple edge types, it is required to specify the edge type in query.
+
+        >>> hg = dgl.heterograph({
+        ...     ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])),
+        ...     ('user', 'plays', 'game'): (torch.tensor([3, 4]), torch.tensor([5, 6]))
+        ... })
+        >>> hg.edges(etype='plays')
+        (tensor([3, 4]), tensor([5, 6]))
+
+        **Set/get Features for All Edges of a Single Edge Type**
+
+        Create a heterogeneous graph of two edge types.
+
+        >>> hg = dgl.heterograph({
+        ...     ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])),
+        ...     ('user', 'plays', 'game'): (torch.tensor([3, 4]), torch.tensor([5, 6]))
+        ... })
+
+        Set and get a feature 'h' for all edges of a single type in the heterogeneous graph.
+
+        >>> hg.edges['follows'].data['h'] = torch.ones(2, 1)
+        >>> hg.edges['follows'].data['h']
+        tensor([[1.], [1.]])
+
+        To set edge features for a graph with a single edge type, use :func:`DGLGraph.edata`.

        See Also
        --------
        edata
        """
+        # TODO(Mufei): Replace the syntax g.edges[...].edata[...] with g.edges[...][...]
        return HeteroEdgeView(self)

    @property
    def edata(self):
-        """Return the data view of all the edges.
+        """Return an edge data view for setting/getting edge features.
+
+        Let ``g`` be a DGLGraph. If ``g`` is a graph of a single edge type, ``g.edata[feat]``
+        returns the edge feature associated with the name ``feat``. One can also set an
+        edge feature associated with the name ``feat`` by setting ``g.edata[feat]`` to a tensor.

-        If the graph has only one edge type, ``g.edata['feat']`` gives the
-        edge feature data under name ``'feat'``.
-        If the graph has multiple edge types, then ``g.edata['feat']``
-        returns a dictionary where the key is the edge type and the value
-        is the edge feature tensor. If the edge type does not have feature
-        ``'feat'``, it is not included in the dictionary.
+        If ``g`` is a graph of multiple edge types, ``g.edata[feat]`` returns a
+        dict[str, Tensor] mapping canonical edge types to the edge features associated with
+        the name ``feat`` for the corresponding type. One can also set an edge feature
+        associated with the name ``feat`` for some edge type(s) by setting
+        ``g.edata[feat]`` to a dictionary as described.

-        Note: When the graph has multiple edge type, The key used in
-        ``g.edata['feat']`` should be the canonical_etypes, i.e.
-        (h_ntype, r_type, t_ntype).
+        Notes
+        -----
+        For setting features, the device of the features must be the same as the device
+        of the graph.

        Examples
        --------
        The following example uses PyTorch backend.

-        To set features of all edges in a heterogeneous graph
-        with only one edge type:
-
-        >>> g = dgl.graph(([0, 1], [1, 2]), 'user', 'follows')
-        >>> g.edata['h'] = torch.zeros(2, 5)
+        >>> import dgl
+        >>> import torch

-        To set features of all edges in a heterogeneous graph
-        with multiple edge types:
+        Set and get feature 'h' for a graph of a single edge type.

-        >>> g0 = dgl.bipartite(([0, 1, 1], [1, 0, 1]), 'user', 'watches', 'movie')
-        >>> g1 = dgl.bipartite(([0, 1], [0, 1]), 'user', 'watches', 'TV')
-        >>> g = dgl.hetero_from_relations([g0, g1])
-        >>> g.edata['h'] = {('user', 'watches', 'movie') : torch.zeros(3, 5),
-                            ('user', 'watches', 'TV') : torch.zeros(2, 5)}
+        >>> g = dgl.graph((torch.tensor([0, 1]), torch.tensor([1, 2])))
+        >>> g.edata['h'] = torch.ones(2, 1)
        >>> g.edata['h']
-        ... {('user', 'watches', 'movie'): tensor([[0., 0., 0., 0., 0.],
-        ...                                        [0., 0., 0., 0., 0.],
-        ...                                        [0., 0., 0., 0., 0.]]),
-        ...  ('user', 'watches', 'TV'): tensor([[0., 0., 0., 0., 0.],
-        ...                                     [0., 0., 0., 0., 0.]])}
-
-        To set features of part of edges in a heterogeneous graph
-        with multiple edge types:
-        >>> g0 = dgl.bipartite(([0, 1, 1], [1, 0, 1]), 'user', 'watches', 'movie')
-        >>> g1 = dgl.bipartite(([0, 1], [0, 1]), 'user', 'watches', 'TV')
-        >>> g = dgl.hetero_from_relations([g0, g1])
-        >>> g.edata['h'] = {('user', 'watches', 'movie') : torch.zeros(3, 5)}
+        tensor([[1.],
+                [1.]])
+
+        Set and get feature 'h' for a graph of multiple edge types.
+
+        >>> g = dgl.heterograph({
+        ...     ('user', 'follows', 'user'): (torch.tensor([1, 2]), torch.tensor([3, 4])),
+        ...     ('user', 'plays', 'user'): (torch.tensor([2, 2]), torch.tensor([1, 1])),
+        ...     ('player', 'plays', 'game'): (torch.tensor([2, 2]), torch.tensor([1, 1]))
+        ... })
+        >>> g.edata['h'] = {('user', 'follows', 'user'): torch.zeros(2, 1),
+        ...                 ('user', 'plays', 'user'): torch.ones(2, 1)}
        >>> g.edata['h']
-        ... {('user', 'watches', 'movie'): tensor([[0., 0., 0., 0., 0.],
-        ...                                        [0., 0., 0., 0., 0.],
-        ...                                        [0., 0., 0., 0., 0.]])}
-        >>> # clean the feature 'h' and no edge type contains 'h'
-        >>> g.edata.pop('h')
+        {('user', 'follows', 'user'): tensor([[0.], [0.]]),
+         ('user', 'plays', 'user'): tensor([[1.], [1.]])}
+        >>> g.edata['h'] = {('user', 'follows', 'user'): torch.ones(2, 1)}
        >>> g.edata['h']
-        ... {}
+        {('user', 'follows', 'user'): tensor([[1.], [1.]]),
+         ('user', 'plays', 'user'): tensor([[1.], [1.]])}

        See Also
        --------
@@ -1598,7 +1975,7 @@ class DGLHeteroGraph(object):
        equivalent to ``self.edge_type_subgraph(etype)``.  The node and edge features
        of the returned graph would be shared with thew original graph.

-        If there are multiple canonical edge type found, then the source/edge/destination
+        If there are multiple canonical edge types found, then the source/edge/destination
        node types would be a *concatenation* of original node/edge types.  The
        new source/destination node type would have the concatenation determined by
        :func:`dgl.combine_names() <dgl.combine_names>` called on original source/destination
@@ -1680,40 +2057,65 @@ class DGLHeteroGraph(object):
    #################################################################

    def number_of_nodes(self, ntype=None):
-        """Return the number of nodes of the given type in the heterograph.
+        """Alias of :func:`num_nodes`"""
+        return self.num_nodes(ntype)
+
+    def num_nodes(self, ntype=None):
+        """Return the number of nodes.

        Parameters
        ----------
        ntype : str, optional
-            The node type. Can be omitted if there is only one node type
-            in the graph. (Default: None)
+            The node type for query. If given, it returns the number of nodes for a particular
+            type. If not given (default), it returns the total number of nodes of all types.

        Returns
        -------
        int
-            The number of nodes
+            The number of nodes.

        Examples
        --------

-        >>> g = dgl.graph(([0, 1], [1, 2]), 'user', 'follows')
-        >>> g.number_of_nodes('user')
-        3
-        >>> g.number_of_nodes()
-        3
+        The following example uses PyTorch backend.
+
+        >>> import dgl
+        >>> import torch
+
+        Create a graph with two node types -- 'user' and 'game'.
+
+        >>> g = dgl.heterograph({
+        ...     ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])),
+        ...     ('user', 'plays', 'game'): (torch.tensor([3, 4]), torch.tensor([5, 6]))
+        ... })
+
+        Query for the number of nodes.
+
+        >>> g.num_nodes('user')
+        5
+        >>> g.num_nodes('game')
+        7
+        >>> g.num_nodes()
+        12
        """
-        return self._graph.number_of_nodes(self.get_ntype_id(ntype))
+        if ntype is None:
+            return sum([self._graph.number_of_nodes(ntid) for ntid in range(len(self.ntypes))])
+        else:
+            return self._graph.number_of_nodes(self.get_ntype_id(ntype))

    def number_of_src_nodes(self, ntype=None):
-        """Return the number of nodes of the given SRC node type in the heterograph.
+        """Alias of :func:`num_src_nodes`"""
+        return self.num_src_nodes(ntype)

-        The heterograph is usually a unidirectional bipartite graph.
+    def num_src_nodes(self, ntype=None):
+        """Return the number of nodes of the given source node type.

        Parameters
        ----------
        ntype : str, optional
-            Node type.
-            If omitted, there should be only one node type in the SRC category.
+            The source node type for query. If given, it returns the number of nodes for a
+            particular source node type. If not given (default), it returns the number of
+            nodes summed over all source node types.

        Returns
        -------
@@ -1722,26 +2124,52 @@ class DGLHeteroGraph(object):

        Examples
        --------
-        >>> g = dgl.bipartite(([0, 1], [1, 2]), 'user', 'plays', 'game')
-        >>> g.number_of_src_nodes('user')
-        2
-        >>> g.number_of_src_nodes()
-        2
-        >>> g.number_of_nodes('user')
+        The following example uses PyTorch backend.
+
+        >>> import dgl
+        >>> import torch
+
+        Create a homogeneous graph for query.
+
+        >>> g = dgl.graph((torch.tensor([0, 1]), torch.tensor([1, 2])))
+        >>> g.num_src_nodes()
+        3
+
+        Create a heterogeneous graph with two source node types -- 'developer' and 'user'.
+
+        >>> g = dgl.heterograph({
+        ...     ('developer', 'develops', 'game'): (torch.tensor([0, 1]), torch.tensor([1, 2])),
+        ...     ('user', 'plays', 'game'): (torch.tensor([3, 4]), torch.tensor([5, 6]))
+        ... })
+
+        Query for the number of nodes.
+
+        >>> g.num_src_nodes('developer')
        2
+        >>> g.num_src_nodes('user')
+        5
+        >>> g.num_src_nodes()
+        7
        """
-        return self._graph.number_of_nodes(self.get_ntype_id_from_src(ntype))
+        if ntype is None:
+            return sum([self._graph.number_of_nodes(self.get_ntype_id_from_src(nty))
+                        for nty in self.srctypes])
+        else:
+            return self._graph.number_of_nodes(self.get_ntype_id_from_src(ntype))

    def number_of_dst_nodes(self, ntype=None):
-        """Return the number of nodes of the given DST node type in the heterograph.
+        """Alias of :func:`num_dst_nodes`"""
+        return self.num_dst_nodes(ntype)

-        The heterograph is usually a unidirectional bipartite graph.
+    def num_dst_nodes(self, ntype=None):
+        """Return the number of nodes of the given destination node type.

        Parameters
        ----------
        ntype : str, optional
-            Node type.
-            If omitted, there should be only one node type in the DST category.
+            The destination node type for query. If given, it returns the number of nodes for a
+            particular destination node type. If not given (default), it returns the number of
+            nodes summed over all destination node types.

        Returns
        -------
@@ -1750,42 +2178,95 @@ class DGLHeteroGraph(object):

        Examples
        --------
-        >>> g = dgl.bipartite(([0, 1], [1, 2]), 'user', 'plays', 'game')
-        >>> g.number_of_dst_nodes('game')
-        3
-        >>> g.number_of_dst_nodes()
-        3
-        >>> g.number_of_nodes('game')
+        The following example uses PyTorch backend.
+
+        >>> import dgl
+        >>> import torch
+
+        Create a homogeneous graph for query.
+
+        >>> g = dgl.graph((torch.tensor([0, 1]), torch.tensor([1, 2])))
+        >>> g.num_dst_nodes()
        3
+
+        Create a heterogeneous graph with two destination node types -- 'user' and 'game'.
+
+        >>> g = dgl.heterograph({
+        ...     ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])),
+        ...     ('user', 'plays', 'game'): (torch.tensor([3, 4]), torch.tensor([5, 6]))
+        ... })
+
+        Query for the number of nodes.
+
+        >>> g.num_dst_nodes('user')
+        5
+        >>> g.num_dst_nodes('game')
+        7
+        >>> g.num_dst_nodes()
+        12
        """
-        return self._graph.number_of_nodes(self.get_ntype_id_from_dst(ntype))
+        if ntype is None:
+            return sum([self._graph.number_of_nodes(self.get_ntype_id_from_dst(nty))
+                        for nty in self.dsttypes])
+        else:
+            return self._graph.number_of_nodes(self.get_ntype_id_from_dst(ntype))

    def number_of_edges(self, etype=None):
-        """Return the number of edges of the given type in the heterograph.
+        """Alias of :func:`num_edges`"""
+        return self.num_edges(etype)
+
+    def num_edges(self, etype=None):
+        """Return the number of edges.

        Parameters
        ----------
        etype : str or tuple of str, optional
-            The edge type. Can be omitted if there is only one edge type
-            in the graph.
+            The edge type for query, which can be an edge type (str) or a canonical edge type
+            (3-tuple of str). When an edge type appears in multiple canonical edge types, one
+            must use a canonical edge type. If given, it returns the number of edges for a
+            particular edge type. If not given (default), it returns the total number of edges
+            of all types.

        Returns
        -------
        int
-            The number of edges
+            The number of edges.

        Examples
        --------

-        >>> g = dgl.graph(([0, 1], [1, 2]), 'user', 'follows')
-        >>> g.number_of_edges(('user', 'follows', 'user'))
-        2
-        >>> g.number_of_edges('follows')
+        The following example uses PyTorch backend.
+
+        >>> import dgl
+        >>> import torch
+
+        Create a graph with three canonical edge types.
+
+        >>> g = dgl.heterograph({
+        ...     ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])),
+        ...     ('user', 'follows', 'game'): (torch.tensor([0, 1, 2]), torch.tensor([1, 2, 3])),
+        ...     ('user', 'plays', 'game'): (torch.tensor([1, 3]), torch.tensor([2, 3]))
+        ... })
+
+        Query for the number of edges.
+
+        >>> g.num_edges('plays')
        2
-        >>> g.number_of_edges()
+        >>> g.num_edges()
+        7
+
+        Use a canonical edge type instead when there is ambiguity for an edge type.
+
+        >>> g.num_edges(('user', 'follows', 'user'))
        2
+        >>> g.num_edges(('user', 'follows', 'game'))
+        3
        """
-        return self._graph.number_of_edges(self.get_etype_id(etype))
+        if etype is None:
+            return sum([self._graph.number_of_edges(etid)
+                        for etid in range(len(self.canonical_etypes))])
+        else:
+            return self._graph.number_of_edges(self.get_etype_id(etype))

    def __len__(self):
        """Deprecated: please directly call :func:`number_of_nodes`
@@ -1798,13 +2279,91 @@ class DGLHeteroGraph(object):
    def is_multigraph(self):
        """Whether the graph is a multigraph

+        In a multigraph, there can be multiple edges from a node ``u`` to a node ``v``.
+
+        For a heterogeneous graph of multiple canonical edge types, we consider it as a
+        multigraph if there are multiple edges from a node ``u`` to a node ``v`` for any
+        canonical edge type.
+
        Returns
        -------
        bool
-            True if the graph is a multigraph, False otherwise.
+            Whether the graph is a multigraph.
+
+        Notes
+        -----
+        Checking whether the graph is a multigraph can be expensive for a large one.
+
+        Examples
+        --------
+
+        The following example uses PyTorch backend.
+
+        >>> import dgl
+        >>> import torch
+
+        Check for homogeneous graphs.
+
+        >>> g = dgl.graph((torch.tensor([0, 1]), torch.tensor([1, 3])))
+        >>> g.is_multigraph
+        False
+        >>> g = dgl.graph((torch.tensor([0, 1, 1]), torch.tensor([1, 3, 3])))
+        >>> g.is_multigraph
+        True
+
+        Check for heterogeneous graphs.
+
+        >>> g = dgl.heterograph({
+        ...     ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])),
+        ...     ('user', 'follows', 'game'): (torch.tensor([0, 1, 2]), torch.tensor([1, 2, 3]))
+        ... })
+        >>> g.is_multigraph
+        False
+        >>> g = dgl.heterograph({
+        ...     ('user', 'follows', 'user'): (torch.tensor([0, 1, 1]), torch.tensor([1, 2, 2])),
+        ...     ('user', 'follows', 'game'): (torch.tensor([0, 1, 2]), torch.tensor([1, 2, 3]))
+        ... })
+        >>> g.is_multigraph
+        True
        """
        return self._graph.is_multigraph()

+    @property
+    def is_homogeneous(self):
+        """Whether the graph is a homogeneous graph.
+
+        A homogeneous graph only has one node type and one edge type.
+
+        Returns
+        -------
+        bool
+            Whether the graph is a homogeneous graph.
+
+        Examples
+        --------
+
+        The following example uses PyTorch backend.
+
+        >>> import dgl
+        >>> import torch
+
+        Create a homogeneous graph for check.
+
+        >>> g = dgl.graph((torch.tensor([0, 0, 1, 1]), torch.tensor([1, 0, 2, 3])))
+        >>> g.is_homogeneous
+        True
+
+        Create a heterogeneous graph for check.
+
+        If the graph has multiple edge types, one need to specify the edge type.
+
+        >>> g = dgl.heterograph({
+        ...     ('user', 'follows', 'game'): (torch.tensor([0, 1, 2]), torch.tensor([1, 2, 3]))})
+        >>> g.is_homogeneous
+        False
+        """
+        return len(self.ntypes) == 1 and len(self.etypes) == 1
+
    @property
    def is_readonly(self):
        """Deprecated: DGLGraph will always be mutable.
@@ -1821,12 +2380,30 @@ class DGLHeteroGraph(object):

    @property
    def idtype(self):
-        """The dtype of graph index
+        """The data type for storing the structure-related graph information
+        such as node and edge IDs.

        Returns
        -------
-        backend dtype object
-            th.int32/th.int64 or tf.int32/tf.int64 etc.
+        Framework-specific device object
+            For example, this can be ``torch.int32`` or ``torch.int64`` for PyTorch.
+
+        Examples
+        --------
+
+        The following example uses PyTorch backend.
+
+        >>> import dgl
+        >>> import torch
+
+        >>> src_ids = torch.tensor([0, 0, 1])
+        >>> dst_ids = torch.tensor([1, 2, 2])
+        >>> g = dgl.graph((src_ids, dst_ids))
+        >>> g.idtype
+        torch.int64
+        >>> g = dgl.graph((src_ids, dst_ids), idtype=torch.int32)
+        >>> g.idtype
+        torch.int32

        See Also
        --------
@@ -1854,41 +2431,68 @@ class DGLHeteroGraph(object):
        return self.has_nodes(vid)

    def has_nodes(self, vid, ntype=None):
-        """Whether the graph has a node with a particular id and type.
+        """Whether the graph has some particular node(s) of a given type.

        Parameters
        ----------
-        vid : int, iterable, tensor
-            Node ID(s).
+        vid : node ID(s)
+            The node ID(s) for query. The allowed formats are:
+
+            - ``int``: The ID of a single node.
+            - ``Tensor``: A 1D tensor that contains the IDs of multiple nodes, whose data type and
+              device should be the same as the :py:attr:`idtype` and device of the graph.
+            - ``iterable[int]``: A sequence (e.g. list, tuple, numpy.ndarray)
+              of integers that contains the IDs of multiple nodes.
        ntype : str, optional
-            The node type. Can be omitted if there is only one node type
-            in the graph. (Default: None)
+            The node type for query. It is required if the graph has
+            multiple node types.

        Returns
        -------
        bool or bool Tensor
-            Each element is a bool flag, which is True if the node exists,
-            and is False otherwise.
+
+            - If :attr:`vid` is an ``int``, the result will be a ``bool`` indicating
+              whether the graph has the particular node.
+            - If :attr:`vid` is a 1D ``Tensor`` or ``iterable[int]`` of node IDs,
+              the result will be a bool Tensor whose i-th element indicates whether
+              the graph has node :attr:`vid[i]` of the given type.

        Examples
        --------
+
+        The following example uses PyTorch backend.
+
+        >>> import dgl
+        >>> import torch
+
+        Create a graph with two node types -- 'user' and 'game'.
+
+        >>> g = dgl.heterograph({
+        ...     ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])),
+        ...     ('user', 'plays', 'game'): (torch.tensor([3, 4]), torch.tensor([0, 1]))
+        ... })
+
+        Query for the nodes.
+
        >>> g.has_nodes(0, 'user')
        True
-        >>> g.has_nodes(4, 'user')
+        >>> g.has_nodes(3, 'game')
        False
-        >>> g.has_nodes([0, 1, 2, 3, 4], 'user')
-        tensor([True, True, True, False, False])
+        >>> g.has_nodes(torch.tensor([3, 0, 1]), 'game')
+        tensor([False,  True,  True])
        """
+        vid_tensor = utils.prepare_tensor(self, vid, "vid")
+        if len(vid_tensor) > 0 and F.as_scalar(F.min(vid_tensor, 0)) < 0 < len(vid_tensor):
+            raise DGLError('All IDs must be non-negative integers.')
        ret = self._graph.has_nodes(
-            self.get_ntype_id(ntype),
-            utils.prepare_tensor(self, vid, "vid"))
+            self.get_ntype_id(ntype), vid_tensor)
        if isinstance(vid, numbers.Integral):
            return bool(F.as_scalar(ret))
        else:
            return F.astype(ret, F.bool)

    def has_node(self, vid, ntype=None):
-        """Whether the graph has a node with ids and a particular type.
+        """Whether the graph has a particular node of a given type.

        DEPRECATED: see :func:`~DGLGraph.has_nodes`
        """
@@ -1896,38 +2500,93 @@ class DGLHeteroGraph(object):
        return self.has_nodes(vid, ntype)

    def has_edges_between(self, u, v, etype=None):
-        """Whether the graph has an edge (u, v) of type ``etype``.
+        """Whether the graph has some particular edge(s) of a given type.

        Parameters
        ----------
-        u : int, iterable of int, Tensor
-            Source node ID(s).
-        v : int, iterable of int, Tensor
-            Destination node ID(s).
+        u : source node ID(s)
+            The source node(s) of the edges for query. The allowed formats are:
+
+            - ``int``: The source node of an edge for query.
+            - ``Tensor``: A 1D tensor that contains the source node(s) of edge(s) for query.
+              The data type and device of the tensor must be the same as the :py:attr:`idtype` and
+              device of the graph. Its i-th element represents the source node ID of the
+              i-th edge for query.
+            - ``iterable[int]`` : Similar to the tensor, but stores node IDs in a sequence
+              (e.g. list, tuple, numpy.ndarray).
+        v : destination node ID(s)
+            The destination node(s) of the edges for query. It's a counterpart of :attr:`u`
+            for destination nodes and should have the same format as :attr:`u`. If :attr:`u`
+            and :attr:`v` are not int, they should have the same length.
        etype : str or tuple of str, optional
-            The edge type. Can be omitted if there is only one edge type
-            in the graph.
+            The edge type for query, which can be an edge type (str) or a canonical edge type
+            (3-tuple of str). When an edge type appears in multiple canonical edge types, one
+            must use a canonical edge type. If the graph has multiple edge types, one must
+            specify the argument. Otherwise, it can be omitted.

        Returns
        -------
-        a : Tensor
-            Binary tensor indicating the existence of edges. ``a[i]=1`` if the graph
-            contains edge ``(u[i], v[i])`` of type ``etype``, 0 otherwise.
+        bool or bool Tensor
+
+            - If :attr:`u` and :attr:`v` are ``int`` objects, the result will be a ``bool``
+              indicating whether there is an edge from ``u`` to ``v`` of the given edge type.
+            - If :attr:`u` and :attr:`v` are ``Tensor`` or ``iterable[int]`` objects, the
+              result will be a bool Tensor whose i-th element indicates whether there is an
+              edge from ``u[i]`` to ``v[i]`` of the given edge type.
+
+        Notes
+        -----
+        The value(s) of :attr:`u` and :attr:`v` need to be separately smaller than the
+        number of nodes of the source and destination type.

        Examples
        --------

-        >>> g.has_edge_between(0, 1, ('user', 'plays', 'game'))
+        The following example uses PyTorch backend.
+
+        >>> import dgl
+        >>> import torch
+
+        Create a homogeneous graph.
+
+        >>> g = dgl.graph((torch.tensor([0, 0, 1, 1]), torch.tensor([1, 0, 2, 3])))
+
+        Query for the edges.
+
+        >>> g.has_edges_between(1, 2)
        True
-        >>> g.has_edge_between(0, 2, ('user', 'plays', 'game'))
-        False
-        >>> g.has_edge_between([0, 0], [1, 2], ('user', 'plays', 'game'))
-        tensor([1, 0])
+        >>> g.has_edges_between(torch.tensor([1, 2]), torch.tensor([2, 3]))
+        tensor([ True, False])
+
+        If the graph has multiple edge types, one need to specify the edge type.
+
+        >>> g = dgl.heterograph({
+        ...     ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])),
+        ...     ('user', 'follows', 'game'): (torch.tensor([0, 1, 2]), torch.tensor([1, 2, 3])),
+        ...     ('user', 'plays', 'game'): (torch.tensor([1, 3]), torch.tensor([2, 3]))
+        ... })
+        >>> g.has_edges_between(torch.tensor([1, 2]), torch.tensor([2, 3]), 'plays')
+        tensor([ True, False])
+
+        Use a canonical edge type instead when there is ambiguity for an edge type.
+
+        >>> g.has_edges_between(torch.tensor([1, 2]), torch.tensor([2, 3]),
+        ...                     ('user', 'follows', 'user'))
+        tensor([ True, False])
+        >>> g.has_edges_between(torch.tensor([1, 2]), torch.tensor([2, 3]),
+        ...                     ('user', 'follows', 'game'))
+        tensor([True, True])
        """
+        srctype, _, dsttype = self.to_canonical_etype(etype)
+        u_tensor = utils.prepare_tensor(self, u, 'u')
+        if F.as_scalar(F.sum(self.has_nodes(u_tensor, ntype=srctype), dim=0)) != len(u_tensor):
+            raise DGLError('u contains invalid node IDs')
+        v_tensor = utils.prepare_tensor(self, v, 'v')
+        if F.as_scalar(F.sum(self.has_nodes(v_tensor, ntype=dsttype), dim=0)) != len(v_tensor):
+            raise DGLError('v contains invalid node IDs')
        ret = self._graph.has_edges_between(
            self.get_etype_id(etype),
-            utils.prepare_tensor(self, u, 'u'),
-            utils.prepare_tensor(self, v, 'v'))
+            u_tensor, v_tensor)
        if isinstance(u, numbers.Integral) and isinstance(v, numbers.Integral):
            return bool(F.as_scalar(ret))
        else:
@@ -1943,79 +2602,111 @@ class DGLHeteroGraph(object):
        return self.has_edges_between(u, v, etype)

    def predecessors(self, v, etype=None):
-        """Return the predecessors of node `v` in the graph with the specified
-        edge type.
+        """Return the predecessor(s) of a particular node with the specified edge type.

-        Node `u` is a predecessor of `v` if an edge `(u, v)` with type `etype`
-        exists in the graph.
+        Node ``u`` is a predecessor of node ``v`` if there is an edge ``(u, v)`` with type
+        ``etype`` in the graph.

        Parameters
        ----------
        v : int
-            The destination node.
+            The destination node for query.
        etype : str or tuple of str, optional
-            The edge type. Can be omitted if there is only one edge type
-            in the graph. (Default: None)
+            The edge type for query, which can be an edge type (str) or a canonical edge type
+            (3-tuple of str). When an edge type appears in multiple canonical edge types, one
+            must use a canonical edge type. If the graph has multiple edge types, one must
+            specify the argument. Otherwise, it can be omitted.

        Returns
        -------
-        tensor
-            Array of predecessor node IDs with the specified edge type.
+        Tensor
+            The predecessors of :attr:`v` with the specified edge type.

        Examples
        --------
        The following example uses PyTorch backend.

-        >>> plays_g = dgl.bipartite(([0, 1, 1, 2], [0, 0, 1, 1]), 'user', 'plays', 'game')
-        >>> devs_g = dgl.bipartite(([0, 1], [0, 1]), 'developer', 'develops', 'game')
-        >>> g = dgl.hetero_from_relations([plays_g, devs_g])
-        >>> g.predecessors(0, 'plays')
-        tensor([0, 1])
-        >>> g.predecessors(0, 'develops')
+        >>> import dgl
+        >>> import torch
+
+        Create a homogeneous graph.
+
+        >>> g = dgl.graph((torch.tensor([0, 0, 1, 1]), torch.tensor([1, 1, 2, 3])))
+
+        Query for node 1.
+
+        >>> g.predecessors(1)
+        tensor([0, 0])
+
+        For a graph of multiple edge types, it is required to specify the edge type in query.
+
+        >>> hg = dgl.heterograph({
+        ...     ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])),
+        ...     ('user', 'plays', 'game'): (torch.tensor([3, 4]), torch.tensor([5, 6]))
+        ... })
+        >>> hg.predecessors(1, etype='follows')
        tensor([0])

        See Also
        --------
        successors
        """
+        if not self.has_nodes(v, self.to_canonical_etype(etype)[-1]):
+            raise DGLError('Non-existing node ID {}'.format(v))
        return self._graph.predecessors(self.get_etype_id(etype), v)

    def successors(self, v, etype=None):
-        """Return the successors of node `v` in the graph with the specified edge
-        type.
+        """Return the successor(s) of a particular node with the specified edge type.

-        Node `u` is a successor of `v` if an edge `(v, u)` with type `etype` exists
-        in the graph.
+        Node ``u`` is a successor of node ``v`` if there is an edge ``(v, u)`` with type
+        ``etype`` in the graph.

        Parameters
        ----------
        v : int
-            The source node.
+            The source node for query.
        etype : str or tuple of str, optional
-            The edge type. Can be omitted if there is only one edge type
-            in the graph. (Default: None)
+            The edge type for query, which can be an edge type (str) or a canonical edge type
+            (3-tuple of str). When an edge type appears in multiple canonical edge types, one
+            must use a canonical edge type. If the graph has multiple edge types, one must
+            specify the argument. Otherwise, it can be omitted.

        Returns
        -------
-        tensor
-            Array of successor node IDs with the specified edge type.
+        Tensor
+            The successors of :attr:`v` with the specified edge type.

        Examples
        --------
        The following example uses PyTorch backend.

-        >>> plays_g = dgl.bipartite(([0, 1, 1, 2], [0, 0, 1, 1]), 'user', 'plays', 'game')
-        >>> follows_g = dgl.graph(([0, 1], [1, 2]), 'user', 'follows')
-        >>> g = dgl.hetero_from_relations([plays_g, follows_g])
-        >>> g.successors(0, 'plays')
-        tensor([0])
-        >>> g.successors(0, 'follows')
-        tensor([1])
+        >>> import dgl
+        >>> import torch
+
+        Create a homogeneous graph.
+
+        >>> g = dgl.graph((torch.tensor([0, 0, 1, 1]), torch.tensor([1, 1, 2, 3])))
+
+        Query for node 1.
+
+        >>> g.successors(1)
+        tensor([2, 3])
+
+        For a graph of multiple edge types, it is required to specify the edge type in query.
+
+        >>> hg = dgl.heterograph({
+        ...     ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])),
+        ...     ('user', 'plays', 'game'): (torch.tensor([3, 4]), torch.tensor([5, 6]))
+        ... })
+        >>> hg.successors(1, etype='follows')
+        tensor([2])

        See Also
        --------
        predecessors
        """
+        if not self.has_nodes(v, self.to_canonical_etype(etype)[0]):
+            raise DGLError('Non-existing node ID {}'.format(v))
        return self._graph.successors(self.get_etype_id(etype), v)

    def edge_id(self, u, v, force_multi=None, return_uv=False, etype=None):
@@ -2029,67 +2720,107 @@ class DGLHeteroGraph(object):
                             return_uv=return_uv, etype=etype)

    def edge_ids(self, u, v, force_multi=None, return_uv=False, etype=None):
-        """Return all edge IDs between source node array `u` and destination
-        node array `v` with the specified edge type.
+        """Return the ID(s) of edge(s) from the given source node(s) to the given destination
+        node(s) with the specified edge type.

        Parameters
        ----------
-        u : int, list, tensor
-            The node ID array of source type.
-        v : int, list, tensor
-            The node ID array of destination type.
+        u : source node ID(s)
+            The source node(s) of the edges for query. The allowed formats are:
+
+            - ``int``: The source node of an edge for query.
+            - ``Tensor``: A 1D tensor that contains the source node(s) of edge(s) for query, whose
+              data type an device should be the same as the :py:attr:`idtype` and device of
+              the graph. Its i-th element is the source node of the i-th edge for query.
+            - ``iterable[int]``: Similar to the tensor, but stores node IDs in a sequence
+              (e.g. list, tuple, numpy.ndarray).
+        v : destination node ID(s)
+            The destination node(s) of the edges for query. It's a counterpart of :attr:`u`
+            for destination nodes and should have the same format as :attr:`u`. If :attr:`u`
+            and :attr:`v` are not int, they should have the same length.
        force_multi : bool, optional
-            Deprecated (Will be deleted in the future).
-            Whether to always treat the graph as a multigraph. See the
-            "Returns" for their effects. (Default: False)
-        return_uv : bool
-            See the "Returns" for their effects. (Default: False)
+            Deprecated, use :attr:`return_uv` instead. Whether to allow the graph to be a
+            multigraph, i.e. there can be multiple edges from one node to another.
+        return_uv : bool, optional
+            Whether to return the source and destination node IDs along with the edges. If
+            False (default), it assumes that the graph is a simple graph and there is only
+            one edge from one node to another. If True, there can be multiple edges found
+            from one node to another.
        etype : str or tuple of str, optional
-            The edge type. Can be omitted if there is only one edge type
-            in the graph.
+            The edge type for query, which can be an edge type (str) or a canonical edge type
+            (3-tuple of str). When an edge type appears in multiple canonical edge types, one
+            must use a canonical edge type. If the graph has multiple edge types, one must
+            specify the argument. Otherwise, it can be omitted.

        Returns
        -------
        tensor, or (tensor, tensor, tensor)

-            * If ``return_uv=False``, return a single edge ID array ``e``.
-            ``e[i]`` is the edge ID between ``u[i]`` and ``v[i]``.
-
-            * Otherwise, return three arrays ``(eu, ev, e)``.  ``e[i]`` is the ID
-            of an edge between ``eu[i]`` and ``ev[i]``.  All edges between ``u[i]``
-            and ``v[i]`` are returned.
+            * If ``return_uv=False``, it returns a 1D tensor that contains the IDs of the edges.
+              If :attr:`u` and :attr:`v` are int, the tensor has length 1. Otherwise, the i-th
+              element of the tensor is the ID of the edge ``(u[i], v[i])``.
+            * If ``return_uv=True``, it returns a tuple of three 1D tensors ``(eu, ev, e)``.
+              ``e[i]`` is the ID of an edge from ``eu[i]`` to ``ev[i]``. It returns all edges
+              from ``eu[i]`` to ``ev[i]`` in this case.

        Notes
        -----
-        If the graph is a simple graph, ``return_uv=False``, and no edge
-        exists between some pairs of ``u[i]`` and ``v[i]``, the result is undefined
-        and an empty tensor is returned.
+        If the graph is a simple graph, ``return_uv=False``, and there are no edges
+        between some pairs of node(s), it will raise an error.

-        If the graph is a multi graph, ``return_uv=False``, and multi edges
-        exist between some pairs of `u[i]` and `v[i]`, the result is undefined.
+        If the graph is a multigraph, ``return_uv=False``, and there are multiple edges
+        between some pairs of node(s), it returns an arbitrary one from them.

        Examples
        --------
        The following example uses PyTorch backend.

-        Instantiate a heterograph.
+        >>> import dgl
+        >>> import torch

-        >>> plays_g = dgl.bipartite(([0, 1, 1, 2], [0, 0, 2, 1]), 'user', 'plays', 'game')
-        >>> follows_g = dgl.graph(([0, 1, 1], [1, 2, 2]), 'user', 'follows')
-        >>> g = dgl.hetero_from_relations([plays_g, follows_g])
+        Create a homogeneous graph.

-        Query for edge ids.
+        >>> g = dgl.graph((torch.tensor([0, 0, 1, 1, 1]), torch.tensor([1, 0, 2, 3, 2])))

-        >>> plays_g.edge_ids([0], [2], etype=('user', 'plays', 'game'))
-        tensor([], dtype=torch.int64)
-        >>> plays_g.edge_ids([1], [2], etype=('user', 'plays', 'game'))
-        tensor([2])
-        >>> g.edge_ids([1], [2], return_uv=True, etype=('user', 'follows', 'user'))
-        (tensor([1, 1]), tensor([2, 2]), tensor([1, 2]))
+        Query for the edges.
+
+        >>> g.edge_ids(0, 0)
+        1
+        >>> g.edge_ids(torch.tensor([1, 0]), torch.tensor([3, 1]))
+        tensor([3, 0])
+
+        Get all edges for pairs of nodes.
+
+        >>> g.edge_ids(torch.tensor([1, 0]), torch.tensor([3, 1]), return_uv=True)
+        (tensor([1, 0]), tensor([3, 1]), tensor([3, 0]))
+
+        If the graph has multiple edge types, one need to specify the edge type.
+
+        >>> g = dgl.heterograph({
+        ...     ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])),
+        ...     ('user', 'follows', 'game'): (torch.tensor([0, 1, 2]), torch.tensor([1, 2, 3])),
+        ...     ('user', 'plays', 'game'): (torch.tensor([1, 3]), torch.tensor([2, 3]))
+        ... })
+        >>> g.edge_ids(torch.tensor([1]), torch.tensor([2]), etype='plays')
+        tensor([0])
+
+        Use a canonical edge type instead when there is ambiguity for an edge type.
+
+        >>> g.edge_ids(torch.tensor([0, 1]), torch.tensor([1, 2]),
+        ...            etype=('user', 'follows', 'user'))
+        tensor([0, 1])
+        >>> g.edge_ids(torch.tensor([1, 2]), torch.tensor([2, 3]),
+        ...            etype=('user', 'follows', 'game'))
+        tensor([1, 2])
        """
        is_int = isinstance(u, numbers.Integral) and isinstance(v, numbers.Integral)
+        srctype, _, dsttype = self.to_canonical_etype(etype)
        u = utils.prepare_tensor(self, u, 'u')
+        if F.as_scalar(F.sum(self.has_nodes(u, ntype=srctype), dim=0)) != len(u):
+            raise DGLError('u contains invalid node IDs')
        v = utils.prepare_tensor(self, v, 'v')
+        if F.as_scalar(F.sum(self.has_nodes(v, ntype=dsttype), dim=0)) != len(v):
+            raise DGLError('v contains invalid node IDs')
        if force_multi is not None:
            dgl_warning("force_multi will be deprecated, " \
                        "Please use return_uv instead")
@@ -2109,88 +2840,145 @@ class DGLHeteroGraph(object):
            return F.as_scalar(eid) if is_int else eid

    def find_edges(self, eid, etype=None):
-        """Given an edge ID array with the specified type, return the source
-        and destination node ID array ``s`` and ``d``.  ``s[i]`` and ``d[i]``
-        are source and destination node ID for edge ``eid[i]``.
+        """Return the source and destination node(s) of some particular edge(s)
+        with the specified edge type.

        Parameters
        ----------
-        eid : list, tensor
-            The edge ID array.
+        eid : edge ID(s)
+            The IDs of the edges for query. The function expects that :attr:`eid` contains
+            valid edge IDs only, i.e. among consecutive integers :math:`0, 1, ... E - 1`, where
+            :math:`E` is the number of edges with the specified edge type.
+
+            - ``int``: An edge ID for query.
+            - ``Tensor``: A 1D tensor that contains the edge IDs for query, whose data
+              type and device should be the same as the :py:attr:`idtype` and device of the graph.
+            - ``iterable[int]``: Similar to the tensor, but stores edge IDs in a sequence
+              (e.g. list, tuple, numpy.ndarray).
        etype : str or tuple of str, optional
-            The edge type. Can be omitted if there is only one edge type
-            in the graph. (Default: None)
+            The edge type for query, which can be an edge type (str) or a canonical edge type
+            (3-tuple of str). When an edge type appears in multiple canonical edge types, one
+            must use a canonical edge type.

        Returns
        -------
-        tensor
-            The source node ID array.
-        tensor
-            The destination node ID array.
+        Tensor
+            The source node IDs of the edges, whose i-th element is the source node of the edge
+            with ID ``eid[i]``.
+        Tensor
+            The destination node IDs of the edges, whose i-th element is the destination node of
+            the edge with ID ``eid[i]``.

        Examples
        --------
        The following example uses PyTorch backend.

-        >>> g = dgl.bipartite(([0, 1, 1], [0, 0, 2]), 'user', 'plays', 'game')
-        >>> g.find_edges([0, 2], ('user', 'plays', 'game'))
-        (tensor([0, 1]), tensor([0, 2]))
-        >>> g.find_edges([0, 2])
-        (tensor([0, 1]), tensor([0, 2]))
+        >>> import dgl
+        >>> import torch
+
+        Create a homogeneous graph.
+
+        >>> g = dgl.graph((torch.tensor([0, 0, 1, 1]), torch.tensor([1, 0, 2, 3])))
+
+        Find edges of IDs 0 and 2.
+
+        >>> g.find_edges(torch.tensor([0, 2]))
+        (tensor([0, 1]), tensor([1, 2]))
+
+        For a graph of multiple edge types, it is required to specify the edge type in query.
+
+        >>> hg = dgl.heterograph({
+        ...     ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])),
+        ...     ('user', 'plays', 'game'): (torch.tensor([3, 4]), torch.tensor([5, 6]))
+        ... })
+        >>> hg.find_edges(torch.tensor([1, 0]), 'plays')
+        (tensor([4, 3]), tensor([6, 5]))
        """
        eid = utils.prepare_tensor(self, eid, 'eid')
+        if len(eid) > 0:
+            min_eid = F.as_scalar(F.min(eid, 0))
+            if min_eid < 0:
+                raise DGLError('Invalid edge ID {:d}'.format(min_eid))
+            max_eid = F.as_scalar(F.max(eid, 0))
+            if max_eid >= self.num_edges(etype):
+                raise DGLError('Invalid edge ID {:d}'.format(max_eid))
+
        if len(eid) == 0:
            empty = F.copy_to(F.tensor([], self.idtype), self.device)
            return empty, empty
-        # sanity check
-        max_eid = F.as_scalar(F.max(eid, dim=0))
-        if max_eid >= self.number_of_edges(etype):
-            raise DGLError('Expect edge IDs to be smaller than number of edges ({}). '
-                           ' But got {}.'.format(self.number_of_edges(etype), max_eid))
        src, dst, _ = self._graph.find_edges(self.get_etype_id(etype), eid)
        return src, dst

    def in_edges(self, v, form='uv', etype=None):
-        """Return the inbound edges of the node(s) with the specified type.
+        """Return the incoming edges of some particular node(s) with the specified edge type.

        Parameters
        ----------
-        v : int, list, tensor
-            The node id(s) of destination type.
+        v : destination node ID(s)
+            The destination node(s) for query. The allowed formats are:
+
+            - ``int``: The destination node for query.
+            - ``Tensor``: A 1D tensor that contains the destination node(s) for query, whose data
+              type and device should be the same as the :py:attr:`idtype` and device of the graph.
+            - ``iterable[int]``: Similar to the tensor, but stores node IDs in a sequence
+              (e.g. list, tuple, numpy.ndarray).
        form : str, optional
-            The return form. Currently support:
-
-            - ``'eid'`` : one eid tensor
-            - ``'all'`` : a tuple ``(u, v, eid)``
-            - ``'uv'``  : a pair ``(u, v)``, default
+            The return form, which can be one of the following:
+
+            - ``'eid'``: The returned result is a 1D tensor :math:`EID`, representing
+              the IDs of all edges.
+            - ``'uv'`` (default): The returned result is a 2-tuple of 1D tensors :math:`(U, V)`,
+              representing the source and destination nodes of all edges. For each :math:`i`,
+              :math:`(U[i], V[i])` forms an edge.
+            - ``'all'``: The returned result is a 3-tuple of 1D tensors :math:`(U, V, EID)`,
+              representing the source nodes, destination nodes and IDs of all edges.
+              For each :math:`i`, :math:`(U[i], V[i])` forms an edge with ID :math:`EID[i]`.
        etype : str or tuple of str, optional
-            The edge type. Can be omitted if there is only one edge type
-            in the graph. (Default: None)
+            The edge type for query, which can be an edge type (str) or a canonical edge type
+            (3-tuple of str). When an edge type appears in multiple canonical edge types, one
+            must use a canonical edge type. If the graph has multiple edge types, one must
+            specify the argument. Otherwise, it can be omitted.

        Returns
        -------
-        tensor or (tensor, tensor, tensor) or (tensor, tensor)
-            All inbound edges to ``v`` are returned.
-
-            * If ``form='eid'``, return a tensor for the ids of the
-              inbound edges of the nodes with the specified type.
-            * If ``form='all'``, return a 3-tuple of tensors
-              ``(eu, ev, eid)``. ``eid[i]`` gives the ID of the
-              edge from ``eu[i]`` to ``ev[i]``.
-            * If ``form='uv'``, return a 2-tuple of tensors ``(eu, ev)``.
-              ``eu[i]`` is the source node of an edge to ``ev[i]``.
+        Tensor or (Tensor, Tensor) or (Tensor, Tensor, Tensor)
+            All incoming edges of the nodes with the specified type. For a description of the
+            returned result, see the description of :attr:`form`.

        Examples
        --------
        The following example uses PyTorch backend.

-        >>> g = dgl.bipartite(([0, 1, 1], [0, 1, 2]), 'user', 'plays', 'game')
-        >>> g.in_edges([0, 2], form='eid')
-        tensor([0, 2])
-        >>> g.in_edges([0, 2], form='all')
-        (tensor([0, 1]), tensor([0, 2]), tensor([0, 2]))
-        >>> g.in_edges([0, 2], form='uv')
-        (tensor([0, 1]), tensor([0, 2]))
+        >>> import dgl
+        >>> import torch
+
+        Create a homogeneous graph.
+
+        >>> g = dgl.graph((torch.tensor([0, 0, 1, 1]), torch.tensor([1, 0, 2, 3])))
+
+        Query for the nodes 1 and 0.
+
+        >>> g.in_edges(torch.tensor([1, 0]))
+        (tensor([0, 0]), tensor([1, 0]))
+
+        Specify a different value for :attr:`form`.
+
+        >>> g.in_edges(torch.tensor([1, 0]), form='all')
+        (tensor([0, 0]), tensor([1, 0]), tensor([0, 1]))
+
+        For a graph of multiple edge types, it is required to specify the edge type in query.
+
+        >>> hg = dgl.heterograph({
+        ...     ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])),
+        ...     ('user', 'plays', 'game'): (torch.tensor([3, 4]), torch.tensor([5, 6]))
+        ... })
+        >>> hg.in_edges(torch.tensor([1, 0]), etype='follows')
+        (tensor([0]), tensor([1]))
+
+        See Also
+        --------
+        edges
+        out_edges
        """
        v = utils.prepare_tensor(self, v, 'v')
        src, dst, eid = self._graph.in_edges(self.get_etype_id(etype), v)
@@ -2204,46 +2992,80 @@ class DGLHeteroGraph(object):
            raise DGLError('Invalid form: {}. Must be "all", "uv" or "eid".'.format(form))

    def out_edges(self, u, form='uv', etype=None):
-        """Return the outbound edges of the node(s) with the specified type.
+        """Return the outgoing edges of some particular node(s) with the specified edge type.

        Parameters
        ----------
-        u : int, list, tensor
-            The node id(s) of source type.
+        u : source node ID(s)
+            The source node(s) for query. The allowed formats are:
+
+            - ``int``: The source node for query.
+            - ``Tensor``: A 1D tensor that contains the source node(s) for query, whose data
+              type and device should be the same as the :py:attr:`idtype` and device of the graph.
+            - ``iterable[int]``: Similar to the tensor, but stores node IDs in a sequence
+              (e.g. list, tuple, numpy.ndarray).
        form : str, optional
-            The return form. Currently support:
-
-            - ``'eid'`` : one eid tensor
-            - ``'all'`` : a tuple ``(u, v, eid)``
-            - ``'uv'``  : a pair ``(u, v)``, default
+            The return form, which can be one of the following:
+
+            - ``'eid'``: The returned result is a 1D tensor :math:`EID`, representing
+              the IDs of all edges.
+            - ``'uv'`` (default): The returned result is a 2-tuple of 1D tensors :math:`(U, V)`,
+              representing the source and destination nodes of all edges. For each :math:`i`,
+              :math:`(U[i], V[i])` forms an edge.
+            - ``'all'``: The returned result is a 3-tuple of 1D tensors :math:`(U, V, EID)`,
+              representing the source nodes, destination nodes and IDs of all edges.
+              For each :math:`i`, :math:`(U[i], V[i])` forms an edge with ID :math:`EID[i]`.
        etype : str or tuple of str, optional
-            The edge type. Can be omitted if there is only one edge type
-            in the graph. (Default: None)
+            The edge type for query, which can be an edge type (str) or a canonical edge type
+            (3-tuple of str). When an edge type appears in multiple canonical edge types, one
+            must use a canonical edge type. If the graph has multiple edge types, one must
+            specify the argument. Otherwise, it can be omitted.
+
+        Returns
+        -------
+        Tensor or (Tensor, Tensor) or (Tensor, Tensor, Tensor)
+            All outgoing edges of the nodes with the specified type. For a description of the
+            returned result, see the description of :attr:`form`.
+
+        Examples
+        --------
+        The following example uses PyTorch backend.
+
+        >>> import dgl
+        >>> import torch
+
+        Create a homogeneous graph.
+
+        >>> g = dgl.graph((torch.tensor([0, 0, 1, 1]), torch.tensor([1, 0, 2, 3])))
+
+        Query for the nodes 1 and 2.
+
+        >>> g.out_edges(torch.tensor([1, 2]))
+        (tensor([1, 1]), tensor([2, 3]))
+
+        Specify a different value for :attr:`form`.
+
+        >>> g.out_edges(torch.tensor([1, 2]), form='all')
+        (tensor([1, 1]), tensor([2, 3]), tensor([2, 3]))

-        Returns
-        -------
-        tensor or (tensor, tensor, tensor) or (tensor, tensor)
-            All outbound edges from ``u`` are returned.
+        For a graph of multiple edge types, it is required to specify the edge type in query.

-            * If ``form='eid'``, return a tensor for the ids of the outbound edges
-              of the nodes with the specified type.
-            * If ``form='all'``, return a 3-tuple of tensors ``(eu, ev, eid)``.
-              ``eid[i]`` gives the ID of the edge from ``eu[i]`` to ``ev[i]``.
-            * If ``form='uv'``, return a 2-tuple of tensors ``(eu, ev)``.
-              ``ev[i]`` is the destination node of the edge from ``eu[i]``.
+        >>> hg = dgl.heterograph({
+        ...     ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])),
+        ...     ('user', 'plays', 'game'): (torch.tensor([3, 4]), torch.tensor([5, 6]))
+        ... })
+        >>> hg.out_edges(torch.tensor([1, 2]), etype='follows')
+        (tensor([1]), tensor([2]))

-        Examples
+        See Also
        --------
-
-        >>> g = dgl.bipartite(([0, 1, 1], [0, 1, 2]), 'user', 'plays', 'game')
-        >>> g.out_edges([0, 1], form='eid')
-        tensor([0, 1, 2])
-        >>> g.out_edges([0, 1], form='all')
-        (tensor([0, 1, 1]), tensor([0, 1, 2]), tensor([0, 1, 2]))
-        >>> g.out_edges([0, 1], form='uv')
-        (tensor([0, 1, 1]), tensor([0, 1, 2]))
+        edges
+        in_edges
        """
        u = utils.prepare_tensor(self, u, 'u')
+        srctype, _, _ = self.to_canonical_etype(etype)
+        if F.as_scalar(F.sum(self.has_nodes(u, ntype=srctype), dim=0)) != len(u):
+            raise DGLError('u contains invalid node IDs')
        src, dst, eid = self._graph.out_edges(self.get_etype_id(etype), u)
        if form == 'all':
            return src, dst, eid
@@ -2254,49 +3076,75 @@ class DGLHeteroGraph(object):
        else:
            raise DGLError('Invalid form: {}. Must be "all", "uv" or "eid".'.format(form))

-    def all_edges(self, form='uv', order=None, etype=None):
-        """Return all edges with the specified type.
+    def all_edges(self, form='uv', order='eid', etype=None):
+        """Return all edges with the specified edge type.

        Parameters
        ----------
        form : str, optional
-            The return form. Currently support:
-
-            - ``'eid'`` : one eid tensor
-            - ``'all'`` : a tuple ``(u, v, eid)``
-            - ``'uv'``  : a pair ``(u, v)``, default
-        order : str or None
-            The order of the returned edges. Currently support:
-
-            - ``'srcdst'`` : sorted by their src and dst ids.
-            - ``'eid'``    : sorted by edge Ids.
-            - ``None``     : arbitrary order, default
+            The return form, which can be one of the following:
+
+            - ``'eid'``: The returned result is a 1D tensor :math:`EID`, representing
+              the IDs of all edges.
+            - ``'uv'`` (default): The returned result is a 2-tuple of 1D tensors :math:`(U, V)`,
+              representing the source and destination nodes of all edges. For each :math:`i`,
+              :math:`(U[i], V[i])` forms an edge.
+            - ``'all'``: The returned result is a 3-tuple of 1D tensors :math:`(U, V, EID)`,
+              representing the source nodes, destination nodes and IDs of all edges.
+              For each :math:`i`, :math:`(U[i], V[i])` forms an edge with ID :math:`EID[i]`.
+        order : str, optional
+            The order of the returned edges, which can be one of the following:
+
+            - ``'srcdst'``: The edges are sorted first by their source node IDs and then
+              by their destination node IDs to break ties.
+            - ``'eid'`` (default): The edges are sorted by their IDs.
        etype : str or tuple of str, optional
-            The edge type. Can be omitted if there is only one edge type
-            in the graph. (Default: None)
+            The edge type for query, which can be an edge type (str) or a canonical edge type
+            (3-tuple of str). When an edge type appears in multiple canonical edge types, one
+            must use a canonical edge type. If the graph has multiple edge types, one must
+            specify the argument. Otherwise, it can be omitted.

        Returns
        -------
-        tensor or (tensor, tensor, tensor) or (tensor, tensor)
-
-            * If ``form='eid'``, return a tensor for the ids of all edges
-              with the specified type.
-            * If ``form='all'``, return a 3-tuple of tensors ``(eu, ev, eid)``.
-              ``eid[i]`` gives the ID of the edge from ``eu[i]`` to ``ev[i]``.
-            * If ``form='uv'``, return a 2-tuple of tensors ``(eu, ev)``.
-              ``ev[i]`` is the destination node of the edge from ``eu[i]``.
+        Tensor or (Tensor, Tensor) or (Tensor, Tensor, Tensor)
+            All edges of the specified edge type. For a description of the returned result,
+            see the description of :attr:`form`.

        Examples
        --------
        The following example uses PyTorch backend.

-        >>> g = dgl.bipartite(([1, 0, 1], [1, 0, 2]), 'user', 'plays', 'game')
-        >>> g.all_edges(form='eid', order='srcdst')
-        tensor([1, 0, 2])
+        >>> import dgl
+        >>> import torch
+
+        Create a homogeneous graph.
+
+        >>> g = dgl.graph((torch.tensor([0, 0, 1, 1]), torch.tensor([1, 0, 2, 3])))
+
+        Query for edges.
+
+        >>> g.all_edges()
+        (tensor([0, 0, 1, 1]), tensor([1, 0, 2, 3]))
+
+        Specify a different value for :attr:`form` and :attr:`order`.
+
        >>> g.all_edges(form='all', order='srcdst')
-        (tensor([0, 1, 1]), tensor([0, 1, 2]), tensor([1, 0, 2]))
-        >>> g.all_edges(form='uv', order='eid')
-        (tensor([1, 0, 1]), tensor([1, 0, 2]))
+        (tensor([0, 0, 1, 1]), tensor([0, 1, 2, 3]), tensor([1, 0, 2, 3]))
+
+        For a graph of multiple edge types, it is required to specify the edge type in query.
+
+        >>> hg = dgl.heterograph({
+        ...     ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])),
+        ...     ('user', 'plays', 'game'): (torch.tensor([3, 4]), torch.tensor([5, 6]))
+        ... })
+        >>> hg.all_edges(etype='plays')
+        (tensor([3, 4]), tensor([5, 6]))
+
+        See Also
+        --------
+        edges
+        in_edges
+        out_edges
        """
        src, dst, eid = self._graph.edges(self.get_etype_id(etype), order)
        if form == 'all':
@@ -2317,46 +3165,78 @@ class DGLHeteroGraph(object):
        return self.in_degrees(v, etype)

    def in_degrees(self, v=ALL, etype=None):
-        """Return the in-degrees of nodes v with edges of type ``etype``.
+        """Return the in-degree(s) of some particular node(s) with the specified edge type.

        Parameters
        ----------
-        v : int, iterable of int or tensor, optional.
-            The node ID array of the destination type. Default is to return the
-            degrees of all nodes.
-        etype : str or tuple of str or None, optional
-            The edge type. Can be omitted if there is only one edge type
-            in the graph. (Default: None)
+        v : destination node ID(s), optional
+            The destination node(s) for query. The allowed formats are:
+
+            - ``int``: The destination node for query.
+            - ``Tensor``: A 1D tensor that contains the destination node(s) for query, whose data
+              type and device should be the same as the :py:attr:`idtype` and device of the graph.
+            - ``iterable[int]``: Similar to the tensor, but stores node IDs in a sequence
+              (e.g. list, tuple, numpy.ndarray).
+
+            By default, it considers all nodes.
+        etype : str or tuple of str, optional
+            The edge type for query, which can be an edge type (str) or a canonical edge type
+            (3-tuple of str). When an edge type appears in multiple canonical edge types, one
+            must use a canonical edge type. If the graph has multiple edge types, one must
+            specify the argument. Otherwise, it can be omitted.

        Returns
        -------
-        d : tensor or int
-            The in-degree array. ``d[i]`` gives the in-degree of node ``v[i]``
-            with edges of type ``etype``. If the argument is an integer, so will
-            be the return.
+        tensor or int
+            The in-degree(s) of the node(s).
+
+            - If :attr:`v` is an ``int`` object, the return result will be an ``int``
+              object as well.
+            - If :attr:`v` is a ``Tensor`` or ``iterable[int]`` object, the return result
+              will be a 1D ``Tensor``. The data type of the result will be the same as the
+              idtype of the graph. The i-th element of the tensor is the in-degree of the
+              node ``v[i]``.

        Examples
        --------
        The following example uses PyTorch backend.

-        Instantiate a heterograph.
+        >>> import dgl
+        >>> import torch

-        >>> plays_g = dgl.bipartite(([0, 1, 1, 2], [0, 0, 2, 1]), 'user', 'plays', 'game')
-        >>> follows_g = dgl.graph(([0, 1, 1], [1, 2, 2]), 'user', 'follows')
-        >>> g = dgl.hetero_from_relations([plays_g, follows_g])
+        Create a homogeneous graph.

-        Query for node degree.
+        >>> g = dgl.graph((torch.tensor([0, 0, 1, 1]), torch.tensor([1, 1, 2, 3])))

-        >>> g.in_degrees(0, 'plays')
-        2
-        >>> g.in_degrees(etype='follows')
-        tensor([0, 1, 2])
+        Query for all nodes.
+
+        >>> g.in_degrees()
+        tensor([0, 2, 1, 1])
+
+        Query for nodes 1 and 2.
+
+        >>> g.in_degrees(torch.tensor([1, 2]))
+        tensor([2, 1])
+
+        For a graph of multiple edge types, it is required to specify the edge type in query.
+
+        >>> hg = dgl.heterograph({
+        ...     ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])),
+        ...     ('user', 'plays', 'game'): (torch.tensor([3, 4]), torch.tensor([5, 6]))
+        ... })
+        >>> hg.in_degrees(torch.tensor([1, 0]), etype='follows')
+        tensor([1, 0])
+
+        See Also
+        --------
+        out_degrees
        """
        dsttype = self.to_canonical_etype(etype)[2]
        etid = self.get_etype_id(etype)
        if is_all(v):
            v = self.dstnodes(dsttype)
-        deg = self._graph.in_degrees(etid, utils.prepare_tensor(self, v, 'v'))
+        v_tensor = utils.prepare_tensor(self, v, 'v')
+        deg = self._graph.in_degrees(etid, v_tensor)
        if isinstance(v, numbers.Integral):
            return F.as_scalar(deg)
        else:
@@ -2371,48 +3251,78 @@ class DGLHeteroGraph(object):
        return self.out_degrees(u, etype)

    def out_degrees(self, u=ALL, etype=None):
-        """Return the out-degrees of nodes u with edges of type ``etype``.
+        """Return the out-degree(s) of some particular node(s) with the specified edge type.

        Parameters
        ----------
-        u : list, tensor
-            The node ID array of source type. Default is to return the degrees
-            of all the nodes.
+        u : source node ID(s), optional
+
+            - ``int``: The source node for query.
+            - ``Tensor``: A 1D tensor that contains the source node(s) for query, whose data
+              type and device should be the same as the :py:attr:`idtype` and device of the graph.
+            - ``iterable[int]``: Similar to the tensor, but stores node IDs in a sequence
+              (e.g. list, tuple, numpy.ndarray).
+
+            By default, it considers all nodes.
        etype : str or tuple of str, optional
-            The edge type. Can be omitted if there is only one edge type
-            in the graph. (Default: None)
+            The edge type for query, which can be an edge type (str) or a canonical edge type
+            (3-tuple of str). When an edge type appears in multiple canonical edge types, one
+            must use a canonical edge type. If the graph has multiple edge types, one must
+            specify the argument. Otherwise, it can be omitted.

        Returns
        -------
-        d : tensor
-            The out-degree array. ``d[i]`` gives the out-degree of node ``u[i]``
-            with edges of type ``etype``.
+        tensor or int
+            The out-degree(s) of the node(s).
+
+            - If :attr:`u` is an ``int`` object, the return result will be an ``int``
+              object as well.
+            - If :attr:`u` is a ``Tensor`` or ``iterable[int]`` object, the return result
+              will be a 1D ``Tensor``. The data type of the result will be the same as the
+              idtype of the graph. The i-th element of the tensor is the out-degree of the
+              node ``v[i]``.

        Examples
        --------
        The following example uses PyTorch backend.

-        Instantiate a heterograph.
+        >>> import dgl
+        >>> import torch

-        >>> plays_g = dgl.bipartite(([0, 1, 1, 2], [0, 0, 2, 1]), 'user', 'plays', 'game')
-        >>> follows_g = dgl.graph(([0, 1, 1], [1, 2, 2]), 'user', 'follows')
-        >>> g = dgl.hetero_from_relations([plays_g, follows_g])
+        Create a homogeneous graph.

-        Query for node degree.
+        >>> g = dgl.graph((torch.tensor([0, 0, 1, 1]), torch.tensor([1, 1, 2, 3])))

-        >>> g.out_degrees(0, 'plays')
-        1
-        >>> g.out_degrees(etype='follows')
-        tensor([1, 2, 0])
+        Query for all nodes.
+
+        >>> g.out_degrees()
+        tensor([2, 2, 0, 0])
+
+        Query for nodes 1 and 2.
+
+        >>> g.out_degrees(torch.tensor([1, 2]))
+        tensor([2, 0])
+
+        For a graph of multiple edge types, it is required to specify the edge type in query.
+
+        >>> hg = dgl.heterograph({
+        ...     ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])),
+        ...     ('user', 'plays', 'game'): (torch.tensor([3, 4]), torch.tensor([5, 6]))
+        ... })
+        >>> hg.out_degrees(torch.tensor([1, 0]), etype='follows')
+        tensor([1, 1])

        See Also
        --------
-        out_degree
+        in_degrees
        """
        srctype = self.to_canonical_etype(etype)[0]
        etid = self.get_etype_id(etype)
        if is_all(u):
            u = self.srcnodes(srctype)
+        u_tensor = utils.prepare_tensor(self, u, 'u')
+        if F.as_scalar(F.sum(self.has_nodes(u_tensor, ntype=srctype), dim=0)) != len(u_tensor):
+            raise DGLError('u contains invalid node IDs')
        deg = self._graph.out_degrees(etid, utils.prepare_tensor(self, u, 'u'))
        if isinstance(u, numbers.Integral):
            return F.as_scalar(deg)
@@ -2420,6 +3330,10 @@ class DGLHeteroGraph(object):
            return deg

    def adjacency_matrix(self, transpose=None, ctx=F.cpu(), scipy_fmt=None, etype=None):
+        """Alias of :func:`adj`"""
+        return self.adj(transpose, ctx, scipy_fmt, etype)
+
+    def adj(self, transpose=None, ctx=F.cpu(), scipy_fmt=None, etype=None):
        """Return the adjacency matrix of edges of the given edge type.

        By default, a row of returned adjacency matrix represents the
@@ -2437,9 +3351,12 @@ class DGLHeteroGraph(object):
        scipy_fmt : str, optional
            If specified, return a scipy sparse matrix in the given format.
            Otherwise, return a backend dependent sparse tensor. (Default: None)
-        etype : str, optional
-            The edge type. Can be omitted if there is only one edge type
-            in the graph. (Default: None)
+        etype : str or tuple of str, optional
+            The edge type for query, which can be an edge type (str) or a canonical edge type
+            (3-tuple of str). When an edge type appears in multiple canonical edge types, one
+            must use a canonical edge type. If given, it returns the number of edges for a
+            particular edge type. If not given (default), it returns the total number of edges
+            of all types.

        Returns
        -------
@@ -2449,15 +3366,21 @@ class DGLHeteroGraph(object):
        Examples
        --------

+        The following example uses PyTorch backend.
+
+        >>> import dgl
+        >>> import torch
+
        Instantiate a heterogeneous graph.

-        >>> follows_g = dgl.graph(([0, 1], [0, 1]), 'user', 'follows')
-        >>> devs_g = dgl.bipartite(([0, 1], [0, 2]), 'developer', 'develops', 'game')
-        >>> g = dgl.hetero_from_relations([follows_g, devs_g])
+        >>> g = dgl.heterograph({
+        ...     ('user', 'follows', 'user'): ([0, 1], [0, 1]),
+        ...     ('developer', 'develops', 'game'): ([0, 1], [0, 2])
+        ... })

        Get a backend dependent sparse tensor. Here we use PyTorch for example.

-        >>> g.adjacency_matrix(etype='develops')
+        >>> g.adj(etype='develops')
        tensor(indices=tensor([[0, 2],
                               [0, 1]]),
               values=tensor([1., 1.]),
@@ -2465,7 +3388,7 @@ class DGLHeteroGraph(object):

        Get a scipy coo sparse matrix.

-        >>> g.adjacency_matrix(scipy_fmt='coo', etype='develops')
+        >>> g.adj(scipy_fmt='coo', etype='develops')
        <3x2 sparse matrix of type '<class 'numpy.int64'>'
        with 2 stored elements in COOrdinate format>
        """
@@ -2482,9 +3405,6 @@ class DGLHeteroGraph(object):
        else:
            return self._graph.adjacency_matrix_scipy(etid, transpose, scipy_fmt, False)

-    # Alias of ``adjacency_matrix``
-    adj = adjacency_matrix
-
    def adjacency_matrix_scipy(self, transpose=None, fmt='csr', return_edge_ids=None):
        """DEPRECATED: please use ``dgl.adjacency_matrix(transpose, scipy_fmt=fmt)``.
        """
@@ -2495,6 +3415,10 @@ class DGLHeteroGraph(object):
        return self.adjacency_matrix(transpose=transpose, scipy_fmt=fmt)

    def incidence_matrix(self, typestr, ctx=F.cpu(), etype=None):
+        """Alias of :func:`inc`"""
+        return self.inc(typestr, ctx, etype)
+
+    def inc(self, typestr, ctx=F.cpu(), etype=None):
        """Return the incidence matrix representation of edges with the given
        edge type.

@@ -2529,9 +3453,12 @@ class DGLHeteroGraph(object):
            Can be either ``in``, ``out`` or ``both``
        ctx : context, optional
            The context of returned incidence matrix. (Default: cpu)
-        etype : str, optional
-            The edge type. Can be omitted if there is only one edge type
-            in the graph.
+        etype : str or tuple of str, optional
+            The edge type for query, which can be an edge type (str) or a canonical edge type
+            (3-tuple of str). When an edge type appears in multiple canonical edge types, one
+            must use a canonical edge type. If given, it returns the number of edges for a
+            particular edge type. If not given (default), it returns the total number of edges
+            of all types.

        Returns
        -------
@@ -2541,18 +3468,22 @@ class DGLHeteroGraph(object):
        Examples
        --------

-        >>> g = dgl.graph(([0, 1], [0, 2]), 'user', 'follows')
-        >>> g.incidence_matrix('in')
+        The following example uses PyTorch backend.
+
+        >>> import dgl
+
+        >>> g = dgl.graph(([0, 1], [0, 2]))
+        >>> g.inc('in')
        tensor(indices=tensor([[0, 2],
                               [0, 1]]),
               values=tensor([1., 1.]),
               size=(3, 2), nnz=2, layout=torch.sparse_coo)
-        >>> g.incidence_matrix('out')
+        >>> g.inc('out')
        tensor(indices=tensor([[0, 1],
                               [0, 1]]),
               values=tensor([1., 1.]),
               size=(3, 2), nnz=2, layout=torch.sparse_coo)
-        >>> g.incidence_matrix('both')
+        >>> g.inc('both')
        tensor(indices=tensor([[1, 2],
                               [1, 1]]),
               values=tensor([-1.,  1.]),
@@ -2561,9 +3492,6 @@ class DGLHeteroGraph(object):
        etid = self.get_etype_id(etype)
        return self._graph.incidence_matrix(etid, typestr, ctx)[0]

-    # Alias of ``incidence_matrix``
-    inc = incidence_matrix
-
    #################################################################
    # Features
    #################################################################
@@ -2571,29 +3499,44 @@ class DGLHeteroGraph(object):
    def node_attr_schemes(self, ntype=None):
        """Return the node feature schemes for the specified type.

-        Each feature scheme is a named tuple that stores the shape and data type
-        of the node feature.
+        The scheme of a feature describes the shape and data type of it.

        Parameters
        ----------
        ntype : str, optional
-            The node type. Can be omitted if there is only one node
-            type in the graph. Error will be raised otherwise.
-            (Default: None)
+            The node type for query. If the graph has multiple node types, one must
+            specify the argument. Otherwise, it can be omitted.

        Returns
        -------
-        dict of str to schemes
-            The schemes of node feature columns.
+        dict[str, Scheme]
+            A dictionary mapping a feature name to its associated feature scheme.

        Examples
        --------
-        The following uses PyTorch backend.
+        The following example uses PyTorch backend.
+
+        >>> import dgl
+        >>> import torch
+
+        Query for a homogeneous graph.

-        >>> g = dgl.graph(([0, 1], [0, 2]), 'user', 'follows')
-        >>> g.nodes['user'].data['h'] = torch.randn(3, 4)
+        >>> g = dgl.graph((torch.tensor([0, 1]), torch.tensor([1, 2])))
+        >>> g.ndata['h1'] = torch.randn(3, 1)
+        >>> g.ndata['h2'] = torch.randn(3, 2)
+        >>> g.node_attr_schemes()
+        {'h1': Scheme(shape=(1,), dtype=torch.float32),
+         'h2': Scheme(shape=(2,), dtype=torch.float32)}
+
+        Query for a heterogeneous graph of multiple node types.
+
+        >>> g = dgl.heterograph({('user', 'plays', 'game'):
+        ...                      (torch.tensor([1, 2]), torch.tensor([3, 4]))})
+        >>> g.nodes['user'].data['h1'] = torch.randn(3, 1)
+        >>> g.nodes['user'].data['h2'] = torch.randn(3, 2)
        >>> g.node_attr_schemes('user')
-        {'h': Scheme(shape=(4,), dtype=torch.float32)}
+        {'h1': Scheme(shape=(1,), dtype=torch.float32),
+         'h2': Scheme(shape=(2,), dtype=torch.float32)}

        See Also
        --------
@@ -2604,28 +3547,48 @@ class DGLHeteroGraph(object):
    def edge_attr_schemes(self, etype=None):
        """Return the edge feature schemes for the specified type.

-        Each feature scheme is a named tuple that stores the shape and data type
-        of the edge feature.
+        The scheme of a feature describes the shape and data type of it.

        Parameters
        ----------
        etype : str or tuple of str, optional
-            The edge type. Can be omitted if there is only one edge type
-            in the graph. (Default: None)
+            The edge type for query, which can be an edge type (str) or a canonical edge type
+            (3-tuple of str). When an edge type appears in multiple canonical edge types, one
+            must use a canonical edge type. If the graph has multiple edge types, one must
+            specify the argument. Otherwise, it can be omitted.

        Returns
        -------
-        dict of str to schemes
-            The schemes of edge feature columns.
+        dict[str, Scheme]
+            A dictionary mapping a feature name to its associated feature scheme.

        Examples
        --------
-        The following uses PyTorch backend.
+        The following example uses PyTorch backend.
+
+        >>> import dgl
+        >>> import torch
+
+        Query for a homogeneous graph.

-        >>> g = dgl.bipartite(([0, 1, 1, 2], [0, 0, 2, 1]), 'user', 'plays', 'game')
-        >>> g.edges['user', 'plays', 'game'].data['h'] = torch.randn(4, 4)
-        >>> g.edge_attr_schemes(('user', 'plays', 'game'))
-        {'h': Scheme(shape=(4,), dtype=torch.float32)}
+        >>> g = dgl.graph((torch.tensor([0, 1]), torch.tensor([1, 2])))
+        >>> g.edata['h1'] = torch.randn(2, 1)
+        >>> g.edata['h2'] = torch.randn(2, 2)
+        >>> g.edge_attr_schemes()
+        {'h1': Scheme(shape=(1,), dtype=torch.float32),
+         'h2': Scheme(shape=(2,), dtype=torch.float32)}
+
+        Query for a heterogeneous graph of multiple edge types.
+
+        >>> g = dgl.heterograph({('user', 'plays', 'game'):
+        ...                      (torch.tensor([1, 2]), torch.tensor([3, 4])),
+        ...                      ('user', 'follows', 'user'):
+        ...                      (torch.tensor([3, 4]), torch.tensor([5, 6]))})
+        >>> g.edges['plays'].data['h1'] = torch.randn(2, 1)
+        >>> g.edges['plays'].data['h2'] = torch.randn(2, 2)
+        >>> g.edge_attr_schemes('plays')
+        {'h1': Scheme(shape=(1,), dtype=torch.float32),
+         'h2': Scheme(shape=(2,), dtype=torch.float32)}

        See Also
        --------
@@ -2634,67 +3597,180 @@ class DGLHeteroGraph(object):
        return self._edge_frames[self.get_etype_id(etype)].schemes

    def set_n_initializer(self, initializer, field=None, ntype=None):
-        """Set the initializer for empty node features.
-
-        Initializer is a callable that returns a tensor given the shape, data type
-        and device context.
+        """Set the initializer for node features.

-        When a subset of the nodes are assigned a new feature, initializer is
-        used to create feature for the rest of the nodes.
+        When only part of the nodes have a feature (e.g. new nodes are added,
+        features are set for a subset of nodes), the initializer initializes
+        features for the rest nodes.

        Parameters
        ----------
        initializer : callable
-            The initializer, mapping (shape, data type, context) to tensor.
+            A function of signature ``func(shape, dtype, ctx, id_range) -> Tensor``.
+            The tensor will be the initialized features. The arguments are:
+
+            - ``shape``: The shape of the tensor to return, which is a tuple of int.
+              The first dimension is the number of nodes for feature initialization.
+            - ``dtype``: The data type of the tensor to return, which is a
+              framework-specific data type object.
+            - ``ctx``: The device of the tensor to return, which is a framework-specific
+              device object.
+            - ``id_range``: The start and end ID of the nodes for feature initialization,
+              which is a slice.
        field : str, optional
-            The feature field name. Default is to set an initializer for all the
-            feature fields.
+            The name of the feature that the initializer applies. If not given, the
+            initializer applies to all features.
        ntype : str, optional
-            The node type. Can be omitted if there is only one node
-            type in the graph. Error will be raised otherwise.
-            (Default: None)
+            The type of the nodes that the initializer applies. If the graph has
+            multiple node types, one must specify the argument. Otherwise, it can
+            be omitted.

-        Note
+        Notes
        -----
-        User defined initializer must follow the signature of
-        :func:`dgl.init.base_initializer() <dgl.init.base_initializer>`
+        Without setting a node feature initializer, zero tensors are generated
+        for nodes without a feature.

-        See Also
+        Examples
        --------
-        set_e_initializer
+
+        The following example uses PyTorch backend.
+
+        >>> import dgl
+        >>> import torch
+
+        Define a function for initializer.
+
+        >>> def init_feats(shape, dtype, device, id_range):
+        ...     return torch.ones(shape, dtype=dtype, device=device)
+
+        An example for a homogeneous graph.
+
+        >>> g = dgl.graph((torch.tensor([0]), torch.tensor([1])))
+        >>> g.ndata['h1'] = torch.zeros(2, 2)
+        >>> g.ndata['h2'] = torch.ones(2, 1)
+        >>> # Apply the initializer to feature 'h2' only.
+        >>> g.set_n_initializer(init_feats, field='h2')
+        >>> g.add_nodes(1)
+        >>> print(g.ndata['h1'])
+        tensor([[0., 0.],
+                [0., 0.],
+                [0., 0.]])
+        >>> print(g.ndata['h2'])
+        tensor([[1.], [1.], [1.]])
+
+        An example for a heterogeneous graph of multiple node types.
+
+        >>> g = dgl.heterograph({
+        ...     ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]),
+        ...                                 torch.tensor([0, 0, 1, 1])),
+        ...     ('developer', 'develops', 'game'): (torch.tensor([0, 1]),
+        ...                                         torch.tensor([0, 1]))
+        ...     })
+        >>> g.nodes['user'].data['h'] = torch.zeros(3, 2)
+        >>> g.nodes['game'].data['w'] = torch.ones(2, 2)
+        >>> g.set_n_initializer(init_feats, ntype='game')
+        >>> g.add_nodes(1, ntype='user')
+        >>> # Initializer not set for 'user', use zero tensors by default
+        >>> g.nodes['user'].data['h']
+        tensor([[0., 0.],
+                [0., 0.],
+                [0., 0.],
+                [0., 0.]])
+        >>> # Initializer set for 'game'
+        >>> g.add_nodes(1, ntype='game')
+        >>> g.nodes['game'].data['w']
+        tensor([[1., 1.],
+                [1., 1.],
+                [1., 1.]])
        """
        ntid = self.get_ntype_id(ntype)
        self._node_frames[ntid].set_initializer(initializer, field)

    def set_e_initializer(self, initializer, field=None, etype=None):
-        """Set the initializer for empty edge features.
+        """Set the initializer for edge features.

-        Initializer is a callable that returns a tensor given the shape, data
-        type and device context.
-
-        When a subset of the edges are assigned a new feature, initializer is
-        used to create feature for rest of the edges.
+        When only part of the edges have a feature (e.g. new edges are added,
+        features are set for a subset of edges), the initializer initializes
+        features for the rest edges.

        Parameters
        ----------
        initializer : callable
-            The initializer, mapping (shape, data type, context) to tensor.
+            A function of signature ``func(shape, dtype, ctx, id_range) -> Tensor``.
+            The tensor will be the initialized features. The arguments are:
+
+            - ``shape``: The shape of the tensor to return, which is a tuple of int.
+              The first dimension is the number of edges for feature initialization.
+            - ``dtype``: The data type of the tensor to return, which is a
+              framework-specific data type object.
+            - ``ctx``: The device of the tensor to return, which is a framework-specific
+              device object.
+            - ``id_range``: The start and end ID of the edges for feature initialization,
+              which is a slice.
        field : str, optional
-            The feature field name. Default is set an initializer for all the
-            feature fields.
+            The name of the feature that the initializer applies. If not given, the
+            initializer applies to all features.
        etype : str or tuple of str, optional
-            The edge type. Can be omitted if there is only one edge type
-            in the graph. Error will be raised otherwise.
-            (Default: None)
+            The edge type for query, which can be an edge type (str) or a canonical edge type
+            (3-tuple of str). When an edge type appears in multiple canonical edge types, one
+            must use a canonical edge type. If the graph has multiple edge types, one must
+            specify the argument. Otherwise, it can be omitted.

-        Note
+        Notes
        -----
-        User defined initializer must follow the signature of
-        :func:`dgl.init.base_initializer() <dgl.init.base_initializer>`
+        Without setting an edge feature initializer, zero tensors are generated
+        for edges without a feature.

-        See Also
+        Examples
        --------
-        set_n_initializer
+
+        The following example uses PyTorch backend.
+
+        >>> import dgl
+        >>> import torch
+
+        Define a function for initializer.
+
+        >>> def init_feats(shape, dtype, device, id_range):
+        ...     return torch.ones(shape, dtype=dtype, device=device)
+
+        An example for a homogeneous graph.
+
+        >>> g = dgl.graph((torch.tensor([0]), torch.tensor([1])))
+        >>> g.edata['h1'] = torch.zeros(1, 2)
+        >>> g.edata['h2'] = torch.ones(1, 1)
+        >>> # Apply the initializer to feature 'h2' only.
+        >>> g.set_e_initializer(init_feats, field='h2')
+        >>> g.add_edges(torch.tensor([1]), torch.tensor([1]))
+        >>> print(g.edata['h1'])
+        tensor([[0., 0.],
+                [0., 0.]])
+        >>> print(g.edata['h2'])
+        tensor([[1.], [1.]])
+
+        An example for a heterogeneous graph of multiple edge types.
+
+        >>> g = dgl.heterograph({
+        ...     ('user', 'plays', 'game'): (torch.tensor([0, 1]),
+        ...                                 torch.tensor([0, 0])),
+        ...     ('developer', 'develops', 'game'): (torch.tensor([0, 1]),
+        ...                                         torch.tensor([0, 1]))
+        ...     })
+        >>> g.edges['plays'].data['h'] = torch.zeros(2, 2)
+        >>> g.edges['develops'].data['w'] = torch.ones(2, 2)
+        >>> g.set_e_initializer(init_feats, etype='plays')
+        >>> # Initializer not set for 'develops', use zero tensors by default
+        >>> g.add_edges(torch.tensor([1]), torch.tensor([1]), etype='develops')
+        >>> g.edges['develops'].data['w']
+        tensor([[1., 1.],
+                [1., 1.],
+                [0., 0.]])
+        >>> # Initializer set for 'plays'
+        >>> g.add_edges(torch.tensor([1]), torch.tensor([1]), etype='plays')
+        >>> g.edges['plays'].data['h']
+        tensor([[0., 0.],
+                [0., 0.],
+                [1., 1.]])
        """
        etid = self.get_etype_id(etype)
        self._edge_frames[etid].set_initializer(initializer, field)
@@ -2897,7 +3973,7 @@ class DGLHeteroGraph(object):

        Examples
        --------
-        >>> g = dgl.graph(([0, 1], [1, 2]), 'user', 'follows')
+        >>> g = dgl.heterograph({('user', 'follows', 'user'): ([0, 1], [1, 2])})
        >>> g.nodes['user'].data['h'] = torch.ones(3, 5)
        >>> g.apply_nodes(lambda nodes: {'h': nodes.data['h'] * 2}, ntype='user')
        >>> g.nodes['user'].data['h']
@@ -2942,7 +4018,7 @@ class DGLHeteroGraph(object):

        Examples
        --------
-        >>> g = dgl.bipartite(([0, 1, 1, 2], [0, 0, 2, 1]), 'user', 'plays', 'game')
+        >>> g = dgl.heterograph({('user', 'plays', 'game'): ([0, 1, 1, 2], [0, 0, 2, 1])})
        >>> g.edges[('user', 'plays', 'game')].data['h'] = torch.ones(4, 5)
        >>> g.apply_edges(lambda edges: {'h': edges.data['h'] * 2})
        >>> g.edges[('user', 'plays', 'game')].data['h']
@@ -3021,12 +4097,13 @@ class DGLHeteroGraph(object):
        >>> import dgl.function as fn
        >>> import torch

-        >>> follows_g = dgl.graph(([0, 1], [1, 2]), 'user', 'follows')
-        >>> plays_g = dgl.bipartite(([0, 1, 1, 2], [0, 0, 1, 1]), 'user', 'plays', 'game')
-        >>> g = dgl.hetero_from_relations([follows_g, plays_g])
+        >>> g = dgl.heterograph({
+        ...     ('user', 'follows', 'user'): ([0, 1], [1, 2]),
+        ...     ('user', 'plays', 'game'): ([0, 1, 1, 2], [0, 0, 1, 1])
+        ... })
        >>> g.nodes['user'].data['h'] = torch.tensor([[0.], [1.], [2.]])
        >>> g.send_and_recv(g['follows'].edges(), fn.copy_src('h', 'm'),
-        >>>                 fn.sum('m', 'h'), etype='follows')
+        ...                 fn.sum('m', 'h'), etype='follows')
        >>> g.nodes['user'].data['h']
        tensor([[0.],
                [0.],
@@ -3045,7 +4122,8 @@ class DGLHeteroGraph(object):
            return
        u, v = self.find_edges(eid, etype=etype)
        # call message passing onsubgraph
-        ndata = core.message_passing(_create_compute_graph(self, u, v, eid),
+        g = self if etype is None else self[etype]
+        ndata = core.message_passing(_create_compute_graph(g, u, v, eid),
                                     message_func, reduce_func, apply_node_func)
        dstnodes = F.unique(v)
        self._set_n_repr(dtid, dstnodes, ndata)
@@ -3106,9 +4184,10 @@ class DGLHeteroGraph(object):

        Instantiate a heterograph.

-        >>> follows_g = dgl.graph(([0, 1], [1, 2]), 'user', 'follows')
-        >>> plays_g = dgl.bipartite(([0, 2], [0, 1]), 'user', 'plays', 'game')
-        >>> g = dgl.hetero_from_relations([follows_g, plays_g])
+        >>> g = dgl.heterograph({
+        ...     ('user', 'follows', 'user'): ([0, 1], [1, 2]),
+        ...     ('user', 'plays', 'game'): ([0, 2], [0, 1])
+        ... })
        >>> g.nodes['user'].data['h'] = torch.tensor([[0.], [1.], [2.]])

        Pull.
@@ -3131,7 +4210,7 @@ class DGLHeteroGraph(object):
        g = self if etype is None else self[etype]
        # call message passing on subgraph
        src, dst, eid = g.in_edges(v, form='all')
-        ndata = core.message_passing(_create_compute_graph(self, src, dst, eid, v),
+        ndata = core.message_passing(_create_compute_graph(g, src, dst, eid, v),
                                     message_func, reduce_func, apply_node_func)
        self._set_n_repr(dtid, v, ndata)

@@ -3182,7 +4261,7 @@ class DGLHeteroGraph(object):

        Instantiate a heterograph.

-        >>> g = dgl.graph(([0, 0], [1, 2]), 'user', 'follows')
+        >>> g = dgl.heterograph({('user', 'follows', 'user'): ([0, 0], [1, 2])})
        >>> g.nodes['user'].data['h'] = torch.tensor([[0.], [1.], [2.]])

        Push.
@@ -3240,7 +4319,7 @@ class DGLHeteroGraph(object):

        Instantiate a heterograph.

-        >>> g = dgl.graph(([0, 1, 2], [1, 2, 2]), 'user', 'follows')
+        >>> g = dgl.heterograph({('user', 'follows', 'user'): ([0, 1, 2], [1, 2, 2])})

        Update all.

@@ -3301,18 +4380,19 @@ class DGLHeteroGraph(object):

        Instantiate a heterograph.

-        >>> g1 = dgl.graph(([0, 1], [1, 1]), 'user', 'follows')
-        >>> g2 = dgl.bipartite(([0], [1]), 'game', 'attracts', 'user')
-        >>> g = dgl.hetero_from_relations([g1, g2])
+        >>> g = dgl.heterograph({
+        ...     ('user', 'follows', 'user'): ([0, 1], [1, 1]),
+        ...     ('game', 'attracts', 'user'): ([0], [1])
+        ... })
        >>> g.nodes['user'].data['h'] = torch.tensor([[1.], [2.]])
        >>> g.nodes['game'].data['h'] = torch.tensor([[1.]])

        Update all.

        >>> g.multi_update_all(
-        >>>     {'follows': (fn.copy_src('h', 'm'), fn.sum('m', 'h')),
-        >>>      'attracts': (fn.copy_src('h', 'm'), fn.sum('m', 'h'))},
-        >>> "sum")
+        ...     {'follows': (fn.copy_src('h', 'm'), fn.sum('m', 'h')),
+        ...      'attracts': (fn.copy_src('h', 'm'), fn.sum('m', 'h'))},
+        ... "sum")
        >>> g.nodes['user'].data['h']
        tensor([[0.],
                [4.]])
@@ -3327,7 +4407,8 @@ class DGLHeteroGraph(object):
                raise DGLError('Invalid arguments for edge type "{}". Should be '
                               '(msg_func, reduce_func, [apply_node_func])'.format(etype))
            mfunc, rfunc, afunc = args
-            all_out[dtid].append(core.message_passing(self[etype], mfunc, rfunc, afunc))
+            g = self if etype is None else self[etype]
+            all_out[dtid].append(core.message_passing(g, mfunc, rfunc, afunc))
            merge_order[dtid].append(etid)  # use edge type id as merge order hint
        for dtid, frames in all_out.items():
            # merge by cross_reducer
@@ -3381,10 +4462,10 @@ class DGLHeteroGraph(object):

        Instantiate a heterogrph and perform multiple rounds of message passing.

-        >>> g = dgl.graph(([0, 1, 2, 3], [2, 3, 4, 4]), 'user', 'follows')
+        >>> g = dgl.heterograph({('user', 'follows', 'user'): ([0, 1, 2, 3], [2, 3, 4, 4])})
        >>> g.nodes['user'].data['h'] = torch.tensor([[1.], [2.], [3.], [4.], [5.]])
        >>> g['follows'].prop_nodes([[2, 3], [4]], fn.copy_src('h', 'm'),
-        >>>                         fn.sum('m', 'h'), etype='follows')
+        ...                         fn.sum('m', 'h'), etype='follows')
        tensor([[1.],
                [2.],
                [1.],
@@ -3439,10 +4520,10 @@ class DGLHeteroGraph(object):

        Instantiate a heterogrph and perform multiple rounds of message passing.

-        >>> g = dgl.graph(([0, 1, 2, 3], [2, 3, 4, 4]), 'user', 'follows')
+        >>> g = dgl.heterograph({('user', 'follows', 'user'): ([0, 1, 2, 3], [2, 3, 4, 4])})
        >>> g.nodes['user'].data['h'] = torch.tensor([[1.], [2.], [3.], [4.], [5.]])
        >>> g['follows'].prop_edges([[0, 1], [2, 3]], fn.copy_src('h', 'm'),
-        >>>                         fn.sum('m', 'h'), etype='follows')
+        ...                         fn.sum('m', 'h'), etype='follows')
        >>> g.nodes['user'].data['h']
        tensor([[1.],
                [2.],
@@ -3463,38 +4544,78 @@ class DGLHeteroGraph(object):
    #################################################################

    def filter_nodes(self, predicate, nodes=ALL, ntype=None):
-        """Return a tensor of node IDs with the given node type that satisfy
+        """Return the IDs of the nodes with the given node type that satisfy
        the given predicate.

        Parameters
        ----------
        predicate : callable
-            A function of signature ``func(nodes) -> tensor``.
-            ``nodes`` are :class:`NodeBatch` objects as in :mod:`~dgl.udf`.
-            The ``tensor`` returned should be a 1-D boolean tensor with
+            A function of signature ``func(nodes) -> Tensor``.
+            ``nodes`` are :class:`dgl.NodeBatch` objects.
+            Its output tensor should be a 1D boolean tensor with
            each element indicating whether the corresponding node in
            the batch satisfies the predicate.
-        nodes : int, iterable or tensor of ints
-            The nodes to filter on. Default value is all the nodes.
+        nodes : node ID(s), optional
+            The node(s) for query. The allowed formats are:
+
+            - Tensor: A 1D tensor that contains the node(s) for query, whose data type
+              and device should be the same as the :py:attr:`idtype` and device of the graph.
+            - iterable[int] : Similar to the tensor, but stores node IDs in a sequence
+              (e.g. list, tuple, numpy.ndarray).
+
+            By default, it considers all nodes.
        ntype : str, optional
-            The node type. Can be omitted if there is only one node type
-            in the graph. (Default: None)
+            The node type for query. If the graph has multiple node types, one must
+            specify the argument. Otherwise, it can be omitted.

        Returns
        -------
        tensor
-            Node ids indicating the nodes that satisfy the predicate.
+            A 1D tensor that contains the ID(s) of the node(s) that satisfy the predicate.

        Examples
        --------
-        >>> import torch
+
+        The following example uses PyTorch backend.
+
        >>> import dgl
-        >>> import dgl.function as fn
-        >>> g = dgl.graph([], 'user', 'follows', num_nodes=4)
-        >>> g.nodes['user'].data['h'] = torch.tensor([[0.], [1.], [1.], [0.]])
-        >>> g.filter_nodes(lambda nodes: (nodes.data['h'] == 1.).squeeze(1), ntype='user')
+        >>> import torch
+
+        Define a predicate function.
+
+        >>> def nodes_with_feature_one(nodes):
+        ...     # Whether a node has feature 1
+        ...     return (nodes.data['h'] == 1.).squeeze(1)
+
+        Filter nodes for a homogeneous graph.
+
+        >>> g = dgl.graph((torch.tensor([0, 1, 2]), torch.tensor([1, 2, 3])))
+        >>> g.ndata['h'] = torch.tensor([[0.], [1.], [1.], [0.]])
+        >>> print(g.filter_nodes(nodes_with_feature_one))
+        tensor([1, 2])
+
+        Filter on nodes with IDs 0 and 1
+
+        >>> print(g.filter_nodes(nodes_with_feature_one, nodes=torch.tensor([0, 1])))
+        tensor([1])
+
+        Filter nodes for a heterogeneous graph.
+
+        >>> g = dgl.heterograph({
+        ...     ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]),
+        ...                                 torch.tensor([0, 0, 1, 1]))})
+        >>> g.nodes['user'].data['h'] = torch.tensor([[0.], [1.], [1.]])
+        >>> g.nodes['game'].data['h'] = torch.tensor([[0.], [1.]])
+        >>> # Filter for 'user' nodes
+        >>> print(g.filter_nodes(nodes_with_feature_one, ntype='user'))
        tensor([1, 2])
        """
+        if is_all(nodes):
+            nodes = self.nodes(ntype)
+        v = utils.prepare_tensor(self, nodes, 'nodes')
+        if F.as_scalar(F.sum(self.has_nodes(v, ntype=ntype), dim=0)) != len(v):
+            raise DGLError('v contains invalid node IDs')
+
        with self.local_scope():
            self.apply_nodes(lambda nbatch: {'_mask' : predicate(nbatch)}, nodes, ntype)
            ntype = self.ntypes[0] if ntype is None else ntype
@@ -3502,43 +4623,105 @@ class DGLHeteroGraph(object):
            if is_all(nodes):
                return F.nonzero_1d(mask)
            else:
-                v = utils.prepare_tensor(self, nodes, 'nodes')
                return F.boolean_mask(v, F.gather_row(mask, v))

    def filter_edges(self, predicate, edges=ALL, etype=None):
-        """Return a tensor of edge IDs with the given edge type that satisfy
+        """Return the IDs of the edges with the given edge type that satisfy
        the given predicate.

        Parameters
        ----------
        predicate : callable
-            A function of signature ``func(edges) -> tensor``.
-            ``edges`` are :class:`EdgeBatch` objects as in :mod:`~dgl.udf`.
-            The ``tensor`` returned should be a 1-D boolean tensor with
+            A function of signature ``func(edges) -> Tensor``.
+            ``edges`` are :class:`dgl.EdgeBatch` objects.
+            Its output tensor should be a 1D boolean tensor with
            each element indicating whether the corresponding edge in
            the batch satisfies the predicate.
-        edges : valid edges type
-            Edges on which to apply ``func``. See :func:`send` for valid
-            edges type. Default value is all the edges.
-        etype : str, optional
-            The edge type. Can be omitted if there is only one edge type
-            in the graph. (Default: None)
+        edges : edge ID(s) or edge end nodes, optional
+            The edge(s) for query. The allowed formats are:
+
+            - Tensor: A 1D tensor that contains the IDs of the edge(s) for query, whose data
+              type and device should be the same as the :py:attr:`idtype` and device of the graph.
+            - iterable[int]: Similar to the tensor, but stores edge IDs in a sequence
+              (e.g. list, tuple, numpy.ndarray).
+            - (Tensor, Tensor): A 2-tuple of the source and destination nodes of multiple
+              edges for query. Each tensor is a 1D tensor containing node IDs. DGL calls this
+              format "tuple of node-tensors". The data type and device of the tensors should
+              be the same as the :py:attr:`idtype` and device of the graph.
+            - (iterable[int], iterable[int]): Similar to the tuple of node-tensors format,
+              but stores node IDs in two sequences (e.g. list, tuple, numpy.ndarray).
+
+            By default, it considers all edges.
+        etype : str or tuple of str, optional
+            The edge type for query, which can be an edge type (str) or a canonical edge type
+            (3-tuple of str). When an edge type appears in multiple canonical edge types, one
+            must use a canonical edge type. If the graph has multiple edge types, one must
+            specify the argument. Otherwise, it can be omitted.

        Returns
        -------
        tensor
-            Edge ids indicating the edges that satisfy the predicate.
+            A 1D tensor that contains the ID(s) of the edge(s) that satisfy the predicate.

        Examples
        --------
-        >>> import torch
+
+        The following example uses PyTorch backend.
+
        >>> import dgl
-        >>> import dgl.function as fn
-        >>> g = dgl.graph(([0, 0, 1, 2], [0, 1, 2, 3]), 'user', 'follows')
-        >>> g.edges['follows'].data['h'] = torch.tensor([[0.], [1.], [1.], [0.]])
-        >>> g.filter_edges(lambda edges: (edges.data['h'] == 1.).squeeze(1), etype='follows')
+        >>> import torch
+
+        Define a predicate function.
+
+        >>> def edges_with_feature_one(edges):
+        ...     # Whether an edge has feature 1
+        ...     return (edges.data['h'] == 1.).squeeze(1)
+
+        Filter edges for a homogeneous graph.
+
+        >>> g = dgl.graph((torch.tensor([0, 1, 2]), torch.tensor([1, 2, 3])))
+        >>> g.edata['h'] = torch.tensor([[0.], [1.], [1.]])
+        >>> print(g.filter_edges(edges_with_feature_one))
+        tensor([1, 2])
+
+        Filter on edges with IDs 0 and 1
+
+        >>> print(g.filter_edges(edges_with_feature_one, edges=torch.tensor([0, 1])))
+        tensor([1])
+
+        Filter edges for a heterogeneous graph.
+
+        >>> g = dgl.heterograph({
+        ...     ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]),
+        ...                                 torch.tensor([0, 0, 1, 1])),
+        ...     ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2]))})
+        >>> g.edges['plays'].data['h'] = torch.tensor([[0.], [1.], [1.], [0.]])
+        >>> # Filter for 'plays' nodes
+        >>> print(g.filter_edges(edges_with_feature_one, etype='plays'))
        tensor([1, 2])
        """
+        if is_all(edges):
+            pass
+        elif isinstance(edges, tuple):
+            u, v = edges
+            srctype, _, dsttype = self.to_canonical_etype(etype)
+            u = utils.prepare_tensor(self, u, 'u')
+            if F.as_scalar(F.sum(self.has_nodes(u, ntype=srctype), dim=0)) != len(u):
+                raise DGLError('edges[0] contains invalid node IDs')
+            v = utils.prepare_tensor(self, v, 'v')
+            if F.as_scalar(F.sum(self.has_nodes(v, ntype=dsttype), dim=0)) != len(v):
+                raise DGLError('edges[1] contains invalid node IDs')
+        elif isinstance(edges, Iterable) or F.is_tensor(edges):
+            edges = utils.prepare_tensor(self, edges, 'edges')
+            min_eid = F.as_scalar(F.min(edges, 0))
+            if len(edges) > 0 > min_eid:
+                raise DGLError('Invalid edge ID {:d}'.format(min_eid))
+            max_eid = F.as_scalar(F.max(edges, 0))
+            if len(edges) > 0 and max_eid >= self.num_edges(etype):
+                raise DGLError('Invalid edge ID {:d}'.format(max_eid))
+        else:
+            raise ValueError('Unsupported type of edges:', type(edges))
+
        with self.local_scope():
            self.apply_edges(lambda ebatch: {'_mask' : predicate(ebatch)}, edges, etype)
            etype = self.canonical_etypes[0] if etype is None else etype
@@ -3554,53 +4737,77 @@ class DGLHeteroGraph(object):

    @property
    def device(self):
-        """Get the device context of this graph.
+        """Get the device of the graph.
+
+        Returns
+        -------
+        device context
+            The device of the graph, which should be a framework-specific device object
+            (e.g., ``torch.device``).

        Examples
        --------
        The following example uses PyTorch backend.

-        >>> g = dgl.bipartite(([0, 1, 1, 2], [0, 0, 2, 1]), 'user', 'plays', 'game')
+        >>> import dgl
+        >>> import torch
+
+        Create a homogeneous graph for demonstration.
+
+        >>> g = dgl.graph((torch.tensor([0, 1]), torch.tensor([1, 2])))
        >>> print(g.device)
        device(type='cpu')
-        >>> g = g.to('cuda:0')
-        >>> print(g.device)
-        device(type='cuda', index=0)

-        Returns
-        -------
-        Device context object
+        The case of heterogeneous graphs is the same.
        """
        return F.to_backend_ctx(self._graph.ctx)

    def to(self, device, **kwargs):  # pylint: disable=invalid-name
        """Move ndata, edata and graph structure to the targeted device (cpu/gpu).

+        If the graph is already on the specified device, the function directly returns it.
+        Otherwise, it returns a cloned graph on the specified device.
+
        Parameters
        ----------
        device : Framework-specific device context object
-            The context to move data to.
+            The context to move data to (e.g., ``torch.device``).
        kwargs : Key-word arguments.
            Key-word arguments fed to the framework copy function.

        Returns
        -------
-        g : DGLHeteroGraph
-          Moved DGLHeteroGraph of the targeted mode.
+        DGLGraph
+            The graph on the specified device.

        Examples
        --------
        The following example uses PyTorch backend.

+        >>> import dgl
        >>> import torch
-        >>> g = dgl.bipartite(([0, 1, 1, 2], [0, 0, 2, 1]), 'user', 'plays', 'game')
-        >>> g.nodes['user'].data['h'] = torch.tensor([[0.], [1.], [2.]])
-        >>> g.edges['plays'].data['h'] = torch.tensor([[0.], [1.], [2.], [3.]])
+
+        >>> g = dgl.graph((torch.tensor([1, 0]), torch.tensor([1, 2])))
+        >>> g.ndata['h'] = torch.ones(3, 1)
+        >>> g.edata['h'] = torch.zeros(2, 2)
        >>> g1 = g.to(torch.device('cuda:0'))
        >>> print(g1.device)
        device(type='cuda', index=0)
+        >>> print(g1.ndata['h'].device)
+        device(type='cuda', index=0)
+        >>> print(g1.nodes().device)
+        device(type='cuda', index=0)
+
+        The original graph is still on CPU.
+
        >>> print(g.device)
        device(type='cpu')
+        >>> print(g.ndata['h'].device)
+        device(type='cpu')
+        >>> print(g.nodes().device)
+        device(type='cpu')
+
+        The case of heterogeneous graphs is the same.
        """
        if device is None or self.device == device:
            return self
@@ -3680,62 +4887,72 @@ class DGLHeteroGraph(object):
        return ret

    def local_var(self):
-        """Return a heterograph object that can be used in a local function scope.
+        """Return a graph object for usage in a local function scope.

        The returned graph object shares the feature data and graph structure of this graph.
        However, any out-place mutation to the feature data will not reflect to this graph,
-        thus making it easier to use in a function scope.
+        thus making it easier to use in a function scope (e.g. forward computation of a model).

        If set, the local graph object will use same initializers for node features and
        edge features.

        Returns
        -------
-        DGLHeteroGraph
-            The graph object that can be used as a local variable.
+        DGLGraph
+            The graph object for a local variable.

        Notes
        -----
-        Internally, the returned graph shares the same feature tensors, but construct a new
-        dictionary structure (aka. Frame) so adding/removing feature tensors from the returned
-        graph will not reflect to the original graph. However, inplace operations do change
-        the shared tensor values, so will be reflected to the original graph. This function
-        also has little overhead when the number of feature tensors in this graph is small.
+        Inplace operations do reflect to the original graph. This function also has little
+        overhead when the number of feature tensors in this graph is small.

        Examples
        --------
+
        The following example uses PyTorch backend.

-        Avoid accidentally overriding existing feature data. This is quite common when
-        implementing a NN module:
+        >>> import dgl
+        >>> import torch
+
+        Create a function for computation on graphs.

        >>> def foo(g):
-        >>>     g = g.local_var()
-        >>>     g.edata['h'] = torch.ones((g.number_of_edges(), 3))
-        >>>     return g.edata['h']
-        >>>
-        >>> g = dgl.bipartite(([0, 1, 1], [0, 0, 2]), 'user', 'plays', 'game')
-        >>> g.edata['h'] = torch.zeros((g.number_of_edges(), 3))
-        >>> newh = foo(g)        # get tensor of all ones
+        ...     g = g.local_var()
+        ...     g.edata['h'] = torch.ones((g.num_edges(), 3))
+        ...     g.edata['h2'] = torch.ones((g.num_edges(), 3))
+        ...     return g.edata['h']
+
+        ``local_var`` avoids changing the graph features when exiting the function.
+
+        >>> g = dgl.graph((torch.tensor([0, 1, 1]), torch.tensor([0, 0, 2])))
+        >>> g.edata['h'] = torch.zeros((g.num_edges(), 3))
+        >>> newh = foo(g)
        >>> print(g.edata['h'])  # still get tensor of all zeros
+        tensor([[0., 0., 0.],
+                [0., 0., 0.],
+                [0., 0., 0.]])
+        >>> 'h2' in g.edata      # new feature set in the function scope is not found
+        False

-        Automatically garbage collect locally-defined tensors without the need to manually
-        ``pop`` the tensors.
+        In-place operations will still reflect to the original graph.

        >>> def foo(g):
-        >>>     g = g.local_var()
-        >>>     # This 'h' feature will stay local and be GCed when the function exits
-        >>>     g.edata['h'] = torch.ones((g.number_of_edges(), 3))
-        >>>     return g.edata['h']
-        >>>
-        >>> g = dgl.bipartite(([0, 1, 1], [0, 0, 2]), 'user', 'plays', 'game')
-        >>> h = foo(g)
-        >>> print('h' in g.edata)
-        False
+        ...     g = g.local_var()
+        ...     # in-place operation
+        ...     g.edata['h'] += 1
+        ...     return g.edata['h']
+
+        >>> g = dgl.graph((torch.tensor([0, 1, 1]), torch.tensor([0, 0, 2])))
+        >>> g.edata['h'] = torch.zeros((g.num_edges(), 1))
+        >>> newh = foo(g)
+        >>> print(g.edata['h'])  # the result changes
+        tensor([[1.],
+                [1.],
+                [1.]])

        See Also
        --------
-        local_var
+        local_scope
        """
        ret = copy.copy(self)
        ret._node_frames = [fr.clone() for fr in self._node_frames]
@@ -3744,44 +4961,63 @@ class DGLHeteroGraph(object):

    @contextmanager
    def local_scope(self):
-        """Enter a local scope context for this graph.
+        """Enter a local scope context for the graph.

        By entering a local scope, any out-place mutation to the feature data will
-        not reflect to the original graph, thus making it easier to use in a function scope.
+        not reflect to the original graph, thus making it easier to use in a function scope
+        (e.g. forward computation of a model).

        If set, the local scope will use same initializers for node features and
        edge features.

+        Notes
+        -----
+        Inplace operations do reflect to the original graph. This function also has little
+        overhead when the number of feature tensors in this graph is small.
+
        Examples
        --------
+
        The following example uses PyTorch backend.

-        Avoid accidentally overriding existing feature data. This is quite common when
-        implementing a NN module:
+        >>> import dgl
+        >>> import torch
+
+        Create a function for computation on graphs.

        >>> def foo(g):
-        >>>     with g.local_scope():
-        >>>         g.edata['h'] = torch.ones((g.number_of_edges(), 3))
-        >>>         return g.edata['h']
-        >>>
-        >>> g = dgl.bipartite(([0, 1, 1], [0, 0, 2]), 'user', 'plays', 'game')
-        >>> g.edata['h'] = torch.zeros((g.number_of_edges(), 3))
-        >>> newh = foo(g)        # get tensor of all ones
+        ...     with g.local_scope():
+        ...         g.edata['h'] = torch.ones((g.num_edges(), 3))
+        ...         g.edata['h2'] = torch.ones((g.num_edges(), 3))
+        ...         return g.edata['h']
+
+        ``local_scope`` avoids changing the graph features when exiting the function.
+
+        >>> g = dgl.graph((torch.tensor([0, 1, 1]), torch.tensor([0, 0, 2])))
+        >>> g.edata['h'] = torch.zeros((g.num_edges(), 3))
+        >>> newh = foo(g)
        >>> print(g.edata['h'])  # still get tensor of all zeros
+        tensor([[0., 0., 0.],
+                [0., 0., 0.],
+                [0., 0., 0.]])
+        >>> 'h2' in g.edata      # new feature set in the function scope is not found
+        False

-        Automatically garbage collect locally-defined tensors without the need to manually
-        ``pop`` the tensors.
+        In-place operations will still reflect to the original graph.

        >>> def foo(g):
-        >>>     with g.local_scope():
-        >>>         # This 'h' feature will stay local and be GCed when the function exits
-        >>>         g.edata['h'] = torch.ones((g.number_of_edges(), 3))
-        >>>         return g.edata['h']
-        >>>
-        >>> g = dgl.bipartite(([0, 1, 1], [0, 0, 2]), 'user', 'plays', 'game')
-        >>> h = foo(g)
-        >>> print('h' in g.edata)
-        False
+        ...     with g.local_scope():
+        ...         # in-place operation
+        ...         g.edata['h'] += 1
+        ...         return g.edata['h']
+
+        >>> g = dgl.graph((torch.tensor([0, 1, 1]), torch.tensor([0, 0, 2])))
+        >>> g.edata['h'] = torch.zeros((g.num_edges(), 1))
+        >>> newh = foo(g)
+        >>> print(g.edata['h'])  # the result changes
+        tensor([[1.],
+                [1.],
+                [1.]])

        See Also
        --------
@@ -3795,10 +5031,6 @@ class DGLHeteroGraph(object):
        self._node_frames = old_nframes
        self._edge_frames = old_eframes

-    def is_homogeneous(self):
-        """Return if the graph is homogeneous."""
-        return len(self.ntypes) == 1 and len(self.etypes) == 1
-
    def formats(self, formats=None):
        r"""Get a cloned graph with the specified sparse format(s) or query
        for the usage status of sparse formats
@@ -3835,7 +5067,7 @@ class DGLHeteroGraph(object):

        **Homographs or Heterographs with A Single Edge Type**

-        >>> g = dgl.graph([(0, 2), (0, 3), (1, 2)])
+        >>> g = dgl.graph(([0, 0, 1], [2, 3, 2]))
        >>> g.ndata['h'] = torch.ones(4, 1)
        >>> # Check status of format usage
        >>> g.formats()
@@ -3855,11 +5087,11 @@ class DGLHeteroGraph(object):
        **Heterographs with Multiple Edge Types**

        >>> g = dgl.heterograph({
-        >>>     ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]),
-        >>>                                 torch.tensor([0, 0, 1, 1])),
-        >>>     ('developer', 'develops', 'game'): (torch.tensor([0, 1]),
-        >>>                                         torch.tensor([0, 1]))
-        >>>     })
+        ...     ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]),
+        ...                                 torch.tensor([0, 0, 1, 1])),
+        ...     ('developer', 'develops', 'game'): (torch.tensor([0, 1]),
+        ...                                         torch.tensor([0, 1]))
+        ...     })
        >>> g.formats()
        {'created': ['coo'], 'not created': ['csr', 'csc']}
        >>> # Get a clone of the graph with 'csr' format
@@ -3894,7 +5126,7 @@ class DGLHeteroGraph(object):

        **Homographs or Heterographs with A Single Edge Type**

-        >>> g = dgl.graph([(0, 2), (0, 3), (1, 2)])
+        >>> g = dgl.graph(([0, 0, 1], [2, 3, 2]))
        >>> g.format()
        {'created': ['coo'], 'not created': ['csr', 'csc']}
        >>> g.create_format_()
@@ -3904,11 +5136,11 @@ class DGLHeteroGraph(object):
        **Heterographs with Multiple Edge Types**

        >>> g = dgl.heterograph({
-        >>>     ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]),
-        >>>                                 torch.tensor([0, 0, 1, 1])),
-        >>>     ('developer', 'develops', 'game'): (torch.tensor([0, 1]),
-        >>>                                         torch.tensor([0, 1]))
-        >>>     })
+        ...     ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]),
+        ...                                 torch.tensor([0, 0, 1, 1])),
+        ...     ('developer', 'develops', 'game'): (torch.tensor([0, 1]),
+        ...                                         torch.tensor([0, 1]))
+        ...     })
        >>> g.format()
        {'created': ['coo'], 'not created': ['csr', 'csc']}
        >>> g.create_format_()
@@ -3934,8 +5166,7 @@ class DGLHeteroGraph(object):
        """
        if idtype is None:
            return self
-        if not idtype in (F.int32, F.int64):
-            raise DGLError("ID type must be int32 or int64, but got {}.".format(idtype))
+        utils.check_valid_idtype(idtype)
        if self.idtype == idtype:
            return self
        bits = 32 if idtype == F.int32 else 64
@@ -3974,51 +5205,102 @@ class DGLHeteroGraph(object):


    def long(self):
-        """Cast this graph to use int64 IDs.
+        """Cast the graph to one with idtype int64

-        Features are copied (shallow copy) to the new graph.
+        If the graph already has idtype int64, the function directly returns it. Otherwise,
+        it returns a cloned graph of idtype int64 with features copied (shallow copy).

        Returns
        -------
-        DGLHeteroGraph
-            The graph object
+        DGLGraph
+            The graph of idtype int64.

        Examples
        --------

-        >>> g = dgl.bipartite(([0, 1, 1], [0, 0, 2]), 'user', 'plays', 'game',
-        >>>                   idtype=torch.int32)
-        >>> g_long = g.long() # Convert g to int64 indexed, not changing the original `g`
+        The following example uses PyTorch backend.
+
+        >>> import dgl
+        >>> import torch
+
+        Create a graph of idtype int32.
+
+        >>> # (0, 1), (0, 2), (1, 2)
+        >>> g = dgl.graph((torch.tensor([0, 0, 1]).int(), torch.tensor([1, 2, 2]).int()))
+        >>> g.ndata['feat'] = torch.ones(3, 1)
+        >>> g.idtype
+        torch.int32
+
+        Cast the graph to one of idtype int64.
+
+        >>> # A cloned graph with an idtype of int64
+        >>> g_long = g.long()
+        >>> g_long.idtype
+        torch.int64
+        >>> # The idtype of the original graph does not change.
+        >>> g.idtype
+        torch.int32
+        >>> g_long.edges()
+        (tensor([0, 0, 1]), tensor([1, 2, 2]))
+        >>> g_long.ndata
+        {'feat': tensor([[1.],
+                         [1.],
+                         [1.]])}

        See Also
        --------
        int
        idtype
-        astype
        """
        return self.astype(F.int64)

    def int(self):
-        """Return a heterograph object use int32 as index dtype,
-        with the ndata and edata as the original object
+        """Cast the graph to one with idtype int32
+
+        If the graph already has idtype int32, the function directly returns it. Otherwise,
+        it returns a cloned graph of idtype int32 with features copied (shallow copy).

        Returns
        -------
-        DGLHeteroGraph
-            The graph object
+        DGLGraph
+            The graph of idtype int32.

        Examples
        --------

-        >>> g = dgl.bipartite(([0, 1, 1], [0, 0, 2]), 'user', 'plays', 'game',
-        >>>                   idtype=torch.int64)
-        >>> g_int = g.int() # Convert g to int32 indexed, not changing the original `g`
+        The following example uses PyTorch backend.
+
+        >>> import dgl
+        >>> import torch
+
+        Create a graph of idtype int64.
+
+        >>> # (0, 1), (0, 2), (1, 2)
+        >>> g = dgl.graph((torch.tensor([0, 0, 1]), torch.tensor([1, 2, 2])))
+        >>> g.ndata['feat'] = torch.ones(3, 1)
+        >>> g.idtype
+        torch.int64
+
+        Cast the graph to one of idtype int32.
+
+        >>> # A cloned graph with an idtype of int32
+        >>> g_int = g.int()
+        >>> g_int.idtype
+        torch.int32
+        >>> # The idtype of the original graph does not change.
+        >>> g.idtype
+        torch.int64
+        >>> g_int.edges()
+        (tensor([0, 0, 1], dtype=torch.int32), tensor([1, 2, 2], dtype=torch.int32))
+        >>> g_int.ndata
+        {'feat': tensor([[1.],
+                         [1.],
+                         [1.]])}

        See Also
        --------
        long
        idtype
-        astype
        """
        return self.astype(F.int32)

@@ -4280,7 +5562,7 @@ def reduce_dict_data(frames, reducer, order=None):
        ret[k] = merger(flist)
    return ret

-def combine_frames(frames, ids):
+def combine_frames(frames, ids, col_names=None):
    """Merge the frames into one frame, taking the common columns.

    Return None if there is no common columns.
@@ -4291,6 +5573,8 @@ def combine_frames(frames, ids):
        List of frames
    ids : List[int]
        List of frame IDs
+    col_names : List[str], optional
+        Column names to consider. If not given, it considers all columns.

    Returns
    -------
@@ -4298,7 +5582,10 @@ def combine_frames(frames, ids):
        The resulting frame
    """
    # find common columns and check if their schemes match
-    schemes = {key: scheme for key, scheme in frames[ids[0]].schemes.items()}
+    if col_names is None:
+        schemes = {key: scheme for key, scheme in frames[ids[0]].schemes.items()}
+    else:
+        schemes = {key: frames[ids[0]].schemes[key] for key in col_names}
    for frame_id in ids:
        frame = frames[frame_id]
        for key, scheme in list(schemes.items()):

--- a/python/dgl/heterograph_index.py
+++ b/python/dgl/heterograph_index.py
@@ -547,6 +547,9 @@ class HeteroGraphIndex(ObjectBase):
        """
        if order is None:
            order = ""
+        elif order not in ['srcdst', 'eid']:
+            raise DGLError("Expect order to be one of None, 'srcdst', 'eid', "
+                           "got {}".format(order))
        edge_array = _CAPI_DGLHeteroEdges(self, int(etype), order)
        src = F.from_dgl_nd(edge_array(0))
        dst = F.from_dgl_nd(edge_array(1))

--- a/python/dgl/nn/mxnet/conv/gatedgraphconv.py
+++ b/python/dgl/nn/mxnet/conv/gatedgraphconv.py
@@ -76,8 +76,9 @@ class GatedGraphConv(nn.Block):
            is the output feature size.
        """
        with graph.local_scope():
-            assert graph.is_homogeneous(), \
-                "not a homograph; convert it with to_homo and pass in the edge type as argument"
+            assert graph.is_homogeneous, \
+                "not a homogeneous graph; convert it with to_homogeneous " \
+                "and pass in the edge type as argument"
            zero_pad = nd.zeros((feat.shape[0], self._out_feats - feat.shape[1]),
                                ctx=feat.context)
            feat = nd.concat(feat, zero_pad, dim=-1)

--- a/python/dgl/nn/mxnet/conv/relgraphconv.py
+++ b/python/dgl/nn/mxnet/conv/relgraphconv.py
@@ -229,8 +229,9 @@ class RelGraphConv(gluon.Block):
        mx.ndarray.NDArray
            New node features.
        """
-        assert g.is_homogeneous(), \
-            "not a homograph; convert it with to_homo and pass in the edge type as argument"
+        assert g.is_homogeneous, \
+            "not a homogeneous graph; convert it with to_homogeneous " \
+            "and pass in the edge type as argument"
        with g.local_scope():
            g.ndata['h'] = x
            g.edata['type'] = etypes