[Refactor] Enable new kernel in all message passing APIs (#1953)

* WIP: frame refactor * new frame * simple update_all builtin * move all subgraph routines into the same file * sddmm & spmm schedule; node & edge udf * degree bucketing * some tricky 0deg corner cases * bug in frame append * merge test_hetero_basics and test_basics * some code rearange * fix test_heterograph * add mean spmm * enable all builtin combinations * pass gpu test * pass pytorch tests * wip * fix some pt debugging codes * fix bug in mxnet backward * pass all mxnet utests * passed tf tests * docstring * lint * lint * fix broadcasting bugs * add warning and clamp for mean reducer * add test for zero-degree mean * address comments * lint * small fix

[Refactor] Enable new kernel in all message passing APIs (#1953)
* WIP: frame refactor * new frame * simple update_all builtin * move all subgraph routines into the same file * sddmm & spmm schedule; node & edge udf * degree bucketing * some tricky 0deg corner cases * bug in frame append * merge test_hetero_basics and test_basics * some code rearange * fix test_heterograph * add mean spmm * enable all builtin combinations * pass gpu test * pass pytorch tests * wip * fix some pt debugging codes * fix bug in mxnet backward * pass all mxnet utests * passed tf tests * docstring * lint * lint * fix broadcasting bugs * add warning and clamp for mean reducer * add test for zero-degree mean * address comments * lint * small fix
22167f72 · Minjie Wang · GitHub · 5d5436ba · 22167f72 · 22167f72
Unverified Commit 22167f72 authored Aug 07, 2020 by Minjie Wang Committed by GitHub Aug 07, 2020
20 changed files
--- a/python/dgl/ops/sddmm.py
+++ b/python/dgl/ops/sddmm.py
@@ -3,6 +3,7 @@ from itertools import product
 import sys
 from ..backend import gsddmm as gsddmm_internal
+from .. import backend as F
 __all__ = ['gsddmm', 'copy_u', 'copy_v']
@@ -41,6 +42,21 @@ def gsddmm(g, op, lhs_data, rhs_data, lhs_target='u', rhs_target='v'):
    tensor
        The result tensor.
    """
+    if op not in ['copy_lhs', 'copy_rhs']:
+        # Expand dims so that there will be no broadcasting issues with different
+        # number of dimensions. For example, given two shapes (N, 3, 1), (E, 5, 3, 4)
+        # that are valid broadcastable shapes, change them to (N, 1, 3, 1) and
+        # (E, 5, 3, 4)
+        lhs_shape = F.shape(lhs_data)
+        rhs_shape = F.shape(rhs_data)
+        if len(lhs_shape) != len(rhs_shape):
+            max_ndims = max(len(lhs_shape), len(rhs_shape))
+            lhs_pad_ndims = max_ndims - len(lhs_shape)
+            rhs_pad_ndims = max_ndims - len(rhs_shape)
+            new_lhs_shape = (lhs_shape[0],) + (1,) * lhs_pad_ndims + lhs_shape[1:]
+            new_rhs_shape = (rhs_shape[0],) + (1,) * rhs_pad_ndims + rhs_shape[1:]
+            lhs_data = F.reshape(lhs_data, new_lhs_shape)
+            rhs_data = F.reshape(rhs_data, new_rhs_shape)
    return gsddmm_internal(
        g._graph, op, lhs_data, rhs_data, lhs_target, rhs_target)

--- a/python/dgl/ops/spmm.py
+++ b/python/dgl/ops/spmm.py
 """dgl spmm operator module."""
 import sys
+from ..base import dgl_warning
 from ..backend import gspmm as gspmm_internal
+from .. import backend as F
 __all__ = ['gspmm']
@@ -30,7 +33,7 @@ def gspmm(g, op, reduce_op, lhs_data, rhs_data):
        The binary op's name, could be ``add``, ``sub``, ``mul``, ``div``,
        ``copy_lhs``, ``copy_rhs``.
    reduce_op : str
-        Reduce operator, could be ``sum``, ``max``, ``min``.
+        Reduce operator, could be ``sum``, ``max``, ``min``, ``mean``.
    lhs_data : tensor or None
        The left operand, could be None if it's not required by the op.
    rhs_data : tensor or None
@@ -41,7 +44,32 @@ def gspmm(g, op, reduce_op, lhs_data, rhs_data):
    tensor
        The result tensor.
    """
-    return gspmm_internal(g._graph, op, reduce_op, lhs_data, rhs_data)
+    if op not in ['copy_lhs', 'copy_rhs']:
+        # Expand dims so that there will be no broadcasting issues with different
+        # number of dimensions. For example, given two shapes (N, 3, 1), (E, 5, 3, 4)
+        # that are valid broadcastable shapes, change them to (N, 1, 3, 1) and
+        # (E, 5, 3, 4)
+        lhs_shape = F.shape(lhs_data)
+        rhs_shape = F.shape(rhs_data)
+        if len(lhs_shape) != len(rhs_shape):
+            max_ndims = max(len(lhs_shape), len(rhs_shape))
+            lhs_pad_ndims = max_ndims - len(lhs_shape)
+            rhs_pad_ndims = max_ndims - len(rhs_shape)
+            new_lhs_shape = (lhs_shape[0],) + (1,) * lhs_pad_ndims + lhs_shape[1:]
+            new_rhs_shape = (rhs_shape[0],) + (1,) * rhs_pad_ndims + rhs_shape[1:]
+            lhs_data = F.reshape(lhs_data, new_lhs_shape)
+            rhs_data = F.reshape(rhs_data, new_rhs_shape)
+    if reduce_op == 'mean':
+        ret = gspmm_internal(g._graph, op, 'sum', lhs_data, rhs_data)
+        ret_shape = F.shape(ret)
+        deg = g.in_degrees()
+        if F.as_scalar(F.min(deg, dim=0)) == 0:
+            dgl_warning('Zero-degree nodes encountered in mean reducer. Setting the mean to 0.')
+        deg = F.astype(F.clamp(deg, 1, g.number_of_edges()), F.dtype(ret))
+        deg_shape = (ret_shape[0],) + (1,) * (len(ret_shape) - 1)
+        return ret / F.reshape(deg, deg_shape)
+    else:
+        return gspmm_internal(g._graph, op, reduce_op, lhs_data, rhs_data)
 def _gen_spmm_func(binary_op, reduce_op):
@@ -130,9 +158,14 @@ def _gen_copy_reduce_func(binary_op, reduce_op):
 def _register_spmm_func():
-    """Register spmm functions"""
+    """Register spmm functions
+    - Binary operation plus reduction between u and e: u_[]_e_[]
+    - Copy u plus reduction: copy_u_[]
+    - Copy e plus reduction: copy_e_[]
+    """
    for binary_op in ["add", "sub", "mul", "div", "copy_u", "copy_e"]:
-        for reduce_op in ["sum", "max", "min"]:
+        for reduce_op in ["sum", "max", "min", "mean"]:
            if binary_op.startswith("copy"):
                func = _gen_copy_reduce_func(binary_op, reduce_op)
            else:

--- a/python/dgl/sampling/neighbor.py
+++ b/python/dgl/sampling/neighbor.py
@@ -6,7 +6,7 @@ from ..base import DGLError, EID
 from ..heterograph import DGLHeteroGraph
 from .. import ndarray as nd
 from .. import utils
-from .. import transform
+from .. import subgraph as subg
 from .dataloader import BlockSampler, assign_block_eids
 __all__ = [
@@ -300,7 +300,7 @@ class MultiLayerNeighborSampler(BlockSampler):
    def sample_frontier(self, block_id, g, seed_nodes, *args, **kwargs):
        fanout = self.fanouts[block_id]
        if fanout is None:
-            frontier = transform.in_subgraph(g, seed_nodes)
+            frontier = subg.in_subgraph(g, seed_nodes)
        else:
            frontier = sample_neighbors(g, seed_nodes, fanout, replace=self.replace)
        return frontier

--- a/python/dgl/subgraph.py
+++ b/python/dgl/subgraph.py
+"""Functions for extracting subgraphs.
+The module only contains functions for extracting subgraphs deterministically.
+For stochastic subgraph extraction, please see functions under :mod:`dgl.sampling`.
+"""
+from collections.abc import Mapping
+from ._ffi.function import _init_api
+from .base import DGLError
+from . import backend as F
+from . import graph_index
+from . import heterograph_index
+from . import ndarray as nd
+from .heterograph import DGLHeteroGraph
+from . import utils
+__all__ = ['node_subgraph', 'edge_subgraph', 'node_type_subgraph', 'edge_type_subgraph',
+           'in_subgraph', 'out_subgraph']
+def node_subgraph(graph, nodes):
+    """Return the subgraph induced on given nodes.
+    The metagraph of the returned subgraph is the same as the parent graph.
+    Features are copied from the original graph.
+    Parameters
+    ----------
+    graph : DGLGraph
+        The graph to extract subgraphs from.
+    nodes : list or dict[str->list or iterable]
+        A dictionary mapping node types to node ID array for constructing
+        subgraph. All nodes must exist in the graph.
+        If the graph only has one node type, one can just specify a list,
+        tensor, or any iterable of node IDs intead.
+        The node ID array can be either an interger tensor or a bool tensor.
+        When a bool tensor is used, it is automatically converted to
+        an interger tensor using the semantic of np.where(nodes_idx == True).
+        Note: When using bool tensor, only backend (torch, tensorflow, mxnet)
+        tensors are supported.
+    Returns
+    -------
+    G : DGLHeteroGraph
+        The subgraph.
+        The nodes and edges in the subgraph are relabeled using consecutive
+        integers from 0.
+        One can retrieve the mapping from subgraph node/edge ID to parent
+        node/edge ID via ``dgl.NID`` and ``dgl.EID`` node/edge features of the
+        subgraph.
+    Examples
+    --------
+    The following example uses PyTorch backend.
+    Instantiate a heterograph.
+    >>> plays_g = dgl.bipartite(([0, 1, 1, 2], [0, 0, 2, 1]), 'user', 'plays', 'game')
+    >>> follows_g = dgl.graph(([0, 1, 1], [1, 2, 2]), 'user', 'follows')
+    >>> g = dgl.hetero_from_relations([plays_g, follows_g])
+    >>> # Set node features
+    >>> g.nodes['user'].data['h'] = torch.tensor([[0.], [1.], [2.]])
+    Get subgraphs.
+    >>> g.subgraph({'user': [4, 5]})
+    An error occurs as these nodes do not exist.
+    >>> sub_g = g.subgraph({'user': [1, 2]})
+    >>> print(sub_g)
+    Graph(num_nodes={'user': 2, 'game': 0},
+          num_edges={('user', 'plays', 'game'): 0, ('user', 'follows', 'user'): 2},
+          metagraph=[('user', 'game'), ('user', 'user')])
+    Get subgraphs using boolean mask tensor.
+    >>> sub_g = g.subgraph({'user': th.tensor([False, True, True])})
+    >>> print(sub_g)
+    Graph(num_nodes={'user': 2, 'game': 0},
+          num_edges={('user', 'plays', 'game'): 0, ('user', 'follows', 'user'): 2},
+          metagraph=[('user', 'game'), ('user', 'user')])
+    Get the original node/edge indices.
+    >>> sub_g['follows'].ndata[dgl.NID] # Get the node indices in the raw graph
+    tensor([1, 2])
+    >>> sub_g['follows'].edata[dgl.EID] # Get the edge indices in the raw graph
+    tensor([1, 2])
+    Get the copied node features.
+    >>> sub_g.nodes['user'].data['h']
+    tensor([[1.],
+            [2.]])
+    >>> sub_g.nodes['user'].data['h'] += 1
+    >>> g.nodes['user'].data['h']          # Features are not shared.
+    tensor([[0.],
+            [1.],
+            [2.]])
+    See Also
+    --------
+    edge_subgraph
+    """
+    if graph.is_block:
+        raise DGLError('Extracting subgraph from a block graph is not allowed.')
+    if not isinstance(nodes, Mapping):
+        assert len(graph.ntypes) == 1, \
+            'need a dict of node type and IDs for graph with multiple node types'
+        nodes = {graph.ntypes[0]: nodes}
+    def _process_nodes(ntype, v):
+        if F.is_tensor(v) and F.dtype(v) == F.bool:
+            return F.astype(F.nonzero_1d(F.copy_to(v, graph.device)), graph.idtype)
+        else:
+            return utils.prepare_tensor(graph, v, 'nodes["{}"]'.format(ntype))
+    induced_nodes = [_process_nodes(ntype, nodes.get(ntype, [])) for ntype in graph.ntypes]
+    sgi = graph._graph.node_subgraph(induced_nodes)
+    induced_edges = sgi.induced_edges
+    return _create_hetero_subgraph(graph, sgi, induced_nodes, induced_edges)
+DGLHeteroGraph.subgraph = node_subgraph
+def edge_subgraph(graph, edges, preserve_nodes=False):
+    """Return the subgraph induced on given edges.
+    The metagraph of the returned subgraph is the same as the parent graph.
+    Features are copied from the original graph.
+    Parameters
+    ----------
+    graph : DGLGraph
+        The graph to extract subgraphs from.
+    edges : dict[(str, str, str), Tensor]
+        A dictionary mapping edge types to edge ID array for constructing
+        subgraph. All edges must exist in the subgraph.
+        The edge types are characterized by triplets of
+        ``(src type, etype, dst type)``.
+        If the graph only has one edge type, one can just specify a list,
+        tensor, or any iterable of edge IDs intead.
+        The edge ID array can be either an interger tensor or a bool tensor.
+        When a bool tensor is used, it is automatically converted to
+        an interger tensor using the semantic of np.where(edges_idx == True).
+        Note: When using bool tensor, only backend (torch, tensorflow, mxnet)
+        tensors are supported.
+    preserve_nodes : bool
+        Whether to preserve all nodes or not. If false, all nodes
+        without edges will be removed. (Default: False)
+    Returns
+    -------
+    G : DGLHeteroGraph
+        The subgraph.
+        The nodes and edges are relabeled using consecutive integers from 0.
+        One can retrieve the mapping from subgraph node/edge ID to parent
+        node/edge ID via ``dgl.NID`` and ``dgl.EID`` node/edge features of the
+        subgraph.
+    Examples
+    --------
+    The following example uses PyTorch backend.
+    Instantiate a heterograph.
+    >>> plays_g = dgl.bipartite(([0, 1, 1, 2], [0, 0, 2, 1]), 'user', 'plays', 'game')
+    >>> follows_g = dgl.graph(([0, 1, 1], [1, 2, 2]), 'user', 'follows')
+    >>> g = dgl.hetero_from_relations([plays_g, follows_g])
+    >>> # Set edge features
+    >>> g.edges['follows'].data['h'] = torch.tensor([[0.], [1.], [2.]])
+    Get subgraphs.
+    >>> g.edge_subgraph({('user', 'follows', 'user'): [5, 6]})
+    An error occurs as these edges do not exist.
+    >>> sub_g = g.edge_subgraph({('user', 'follows', 'user'): [1, 2],
+    >>>                          ('user', 'plays', 'game'): [2]})
+    >>> print(sub_g)
+    Graph(num_nodes={'user': 2, 'game': 1},
+          num_edges={('user', 'plays', 'game'): 1, ('user', 'follows', 'user'): 2},
+          metagraph=[('user', 'game'), ('user', 'user')])
+    Get subgraphs using boolean mask tensor.
+    >>> sub_g = g.edge_subgraph({('user', 'follows', 'user'): th.tensor([False, True, True]),
+    >>>                   ('user', 'plays', 'game'): th.tensor([False, False, True, False])})
+    >>> sub_g
+    Graph(num_nodes={'user': 2, 'game': 1},
+        num_edges={('user', 'plays', 'game'): 1, ('user', 'follows', 'user'): 2},
+        metagraph=[('user', 'game'), ('user', 'user')])
+    Get the original node/edge indices.
+    >>> sub_g['follows'].ndata[dgl.NID] # Get the node indices in the raw graph
+    tensor([1, 2])
+    >>> sub_g['plays'].edata[dgl.EID]   # Get the edge indices in the raw graph
+    tensor([2])
+    Get the copied node features.
+    >>> sub_g.edges['follows'].data['h']
+    tensor([[1.],
+            [2.]])
+    >>> sub_g.edges['follows'].data['h'] += 1
+    >>> g.edges['follows'].data['h']          # Features are not shared.
+    tensor([[0.],
+            [1.],
+            [2.]])
+    See Also
+    --------
+    subgraph
+    """
+    if graph.is_block:
+        raise DGLError('Extracting subgraph from a block graph is not allowed.')
+    if not isinstance(edges, Mapping):
+        assert len(graph.canonical_etypes) == 1, \
+            'need a dict of edge type and IDs for graph with multiple edge types'
+        edges = {graph.canonical_etypes[0]: edges}
+    def _process_edges(etype, e):
+        if F.is_tensor(e) and F.dtype(e) == F.bool:
+            return F.astype(F.nonzero_1d(F.copy_to(e, graph.device)), graph.idtype)
+        else:
+            return utils.prepare_tensor(graph, e, 'edges["{}"]'.format(etype))
+    edges = {graph.to_canonical_etype(etype): e for etype, e in edges.items()}
+    induced_edges = [
+        _process_edges(cetype, edges.get(cetype, []))
+        for cetype in graph.canonical_etypes]
+    sgi = graph._graph.edge_subgraph(induced_edges, preserve_nodes)
+    induced_nodes = sgi.induced_nodes
+    return _create_hetero_subgraph(graph, sgi, induced_nodes, induced_edges)
+DGLHeteroGraph.edge_subgraph = edge_subgraph
+def in_subgraph(g, nodes):
+    """Extract the subgraph containing only the in edges of the given nodes.
+    The subgraph keeps the same type schema and the cardinality of the original one.
+    Node/edge features are not preserved. The original IDs
+    the extracted edges are stored as the `dgl.EID` feature in the returned graph.
+    Parameters
+    ----------
+    g : DGLHeteroGraph
+        Full graph structure.
+    nodes : tensor or dict
+        Node ids to sample neighbors from. The allowed types
+        are dictionary of node types to node id tensors, or simply node id tensor if
+        the given graph g has only one type of nodes.
+    Returns
+    -------
+    DGLHeteroGraph
+        The subgraph.
+    """
+    if g.is_block:
+        raise DGLError('Extracting subgraph of a block graph is not allowed.')
+    if not isinstance(nodes, dict):
+        if len(g.ntypes) > 1:
+            raise DGLError("Must specify node type when the graph is not homogeneous.")
+        nodes = {g.ntypes[0] : nodes}
+    nodes = utils.prepare_tensor_dict(g, nodes, 'nodes')
+    nodes_all_types = []
+    for ntype in g.ntypes:
+        if ntype in nodes:
+            nodes_all_types.append(F.to_dgl_nd(nodes[ntype]))
+        else:
+            nodes_all_types.append(nd.NULL[g._idtype_str])
+    sgi = _CAPI_DGLInSubgraph(g._graph, nodes_all_types)
+    induced_edges = sgi.induced_edges
+    return _create_hetero_subgraph(g, sgi, None, induced_edges)
+DGLHeteroGraph.in_subgraph = in_subgraph
+def out_subgraph(g, nodes):
+    """Extract the subgraph containing only the out edges of the given nodes.
+    The subgraph keeps the same type schema and the cardinality of the original one.
+    Node/edge features are not preserved. The original IDs
+    the extracted edges are stored as the `dgl.EID` feature in the returned graph.
+    Parameters
+    ----------
+    g : DGLHeteroGraph
+        Full graph structure.
+    nodes : tensor or dict
+        Node ids to sample neighbors from. The allowed types
+        are dictionary of node types to node id tensors, or simply node id tensor if
+        the given graph g has only one type of nodes.
+    Returns
+    -------
+    DGLHeteroGraph
+        The subgraph.
+    """
+    if g.is_block:
+        raise DGLError('Extracting subgraph of a block graph is not allowed.')
+    if not isinstance(nodes, dict):
+        if len(g.ntypes) > 1:
+            raise DGLError("Must specify node type when the graph is not homogeneous.")
+        nodes = {g.ntypes[0] : nodes}
+    nodes = utils.prepare_tensor_dict(g, nodes, 'nodes')
+    nodes_all_types = []
+    for ntype in g.ntypes:
+        if ntype in nodes:
+            nodes_all_types.append(F.to_dgl_nd(nodes[ntype]))
+        else:
+            nodes_all_types.append(nd.NULL[g._idtype_str])
+    sgi = _CAPI_DGLOutSubgraph(g._graph, nodes_all_types)
+    induced_edges = sgi.induced_edges
+    return _create_hetero_subgraph(g, sgi, None, induced_edges)
+DGLHeteroGraph.out_subgraph = out_subgraph
+def node_type_subgraph(graph, ntypes):
+    """Return the subgraph induced on given node types.
+    The metagraph of the returned subgraph is the subgraph of the original
+    metagraph induced from the node types.
+    Features are shared with the original graph.
+    Parameters
+    ----------
+    graph : DGLGraph
+        The graph to extract subgraphs from.
+    ntypes : list[str]
+        The node types
+    Returns
+    -------
+    G : DGLHeteroGraph
+        The subgraph.
+    Examples
+    --------
+    The following example uses PyTorch backend.
+    Instantiate a heterograph.
+    >>> plays_g = dgl.bipartite(([0, 1, 1, 2], [0, 0, 2, 1]), 'user', 'plays', 'game')
+    >>> follows_g = dgl.graph(([0, 1, 1], [1, 2, 2]), 'user', 'follows')
+    >>> g = dgl.hetero_from_relations([plays_g, follows_g])
+    >>> # Set node features
+    >>> g.nodes['user'].data['h'] = torch.tensor([[0.], [1.], [2.]])
+    Get subgraphs.
+    >>> sub_g = g.node_type_subgraph(['user'])
+    >>> print(sub_g)
+    Graph(num_nodes=3, num_edges=3,
+          ndata_schemes={'h': Scheme(shape=(1,), dtype=torch.float32)}
+          edata_schemes={})
+    Get the shared node features.
+    >>> sub_g.nodes['user'].data['h']
+    tensor([[0.],
+            [1.],
+            [2.]])
+    >>> sub_g.nodes['user'].data['h'] += 1
+    >>> g.nodes['user'].data['h']          # Features are shared.
+    tensor([[1.],
+            [2.],
+            [3.]])
+    See Also
+    --------
+    edge_type_subgraph
+    """
+    ntid = [graph.get_ntype_id(ntype) for ntype in ntypes]
+    stids, dtids, etids = graph._graph.metagraph.edges('eid')
+    stids, dtids, etids = stids.tonumpy(), dtids.tonumpy(), etids.tonumpy()
+    etypes = []
+    for stid, dtid, etid in zip(stids, dtids, etids):
+        if stid in ntid and dtid in ntid:
+            etypes.append(graph.canonical_etypes[etid])
+    return edge_type_subgraph(graph, etypes)
+DGLHeteroGraph.node_type_subgraph = node_type_subgraph
+def edge_type_subgraph(graph, etypes):
+    """Return the subgraph induced on given edge types.
+    The metagraph of the returned subgraph is the subgraph of the original metagraph
+    induced from the edge types.
+    Features are shared with the original graph.
+    Parameters
+    ----------
+    graph : DGLGraph
+        The graph to extract subgraphs from.
+    etypes : list[str or tuple]
+        The edge types
+    Returns
+    -------
+    G : DGLHeteroGraph
+        The subgraph.
+    Examples
+    --------
+    The following example uses PyTorch backend.
+    Instantiate a heterograph.
+    >>> plays_g = dgl.bipartite(([0, 1, 1, 2], [0, 0, 2, 1]), 'user', 'plays', 'game')
+    >>> follows_g = dgl.graph(([0, 1, 1], [1, 2, 2]), 'user', 'follows')
+    >>> g = dgl.hetero_from_relations([plays_g, follows_g])
+    >>> # Set edge features
+    >>> g.edges['follows'].data['h'] = torch.tensor([[0.], [1.], [2.]])
+    Get subgraphs.
+    >>> sub_g = g.edge_type_subgraph(['follows'])
+    >>> print(sub_g)
+    Graph(num_nodes=3, num_edges=3,
+          ndata_schemes={}
+          edata_schemes={'h': Scheme(shape=(1,), dtype=torch.float32)})
+    Get the shared edge features.
+    >>> sub_g.edges['follows'].data['h']
+    tensor([[0.],
+            [1.],
+            [2.]])
+    >>> sub_g.edges['follows'].data['h'] += 1
+    >>> g.edges['follows'].data['h']          # Features are shared.
+    tensor([[1.],
+            [2.],
+            [3.]])
+    See Also
+    --------
+    node_type_subgraph
+    """
+    etype_ids = [graph.get_etype_id(etype) for etype in etypes]
+    # meta graph is homograph, still using int64
+    meta_src, meta_dst, _ = graph._graph.metagraph.find_edges(utils.toindex(etype_ids, "int64"))
+    rel_graphs = [graph._graph.get_relation_graph(i) for i in etype_ids]
+    meta_src = meta_src.tonumpy()
+    meta_dst = meta_dst.tonumpy()
+    ntypes_invmap = {n: i for i, n in enumerate(set(meta_src) | set(meta_dst))}
+    mapped_meta_src = [ntypes_invmap[v] for v in meta_src]
+    mapped_meta_dst = [ntypes_invmap[v] for v in meta_dst]
+    node_frames = [graph._node_frames[i] for i in ntypes_invmap]
+    edge_frames = [graph._edge_frames[i] for i in etype_ids]
+    induced_ntypes = [graph._ntypes[i] for i in ntypes_invmap]
+    induced_etypes = [graph._etypes[i] for i in etype_ids]   # get the "name" of edge type
+    num_nodes_per_induced_type = [graph.number_of_nodes(ntype) for ntype in induced_ntypes]
+    metagraph = graph_index.from_edge_list((mapped_meta_src, mapped_meta_dst), True)
+    # num_nodes_per_type should be int64
+    hgidx = heterograph_index.create_heterograph_from_relations(
+        metagraph, rel_graphs, utils.toindex(num_nodes_per_induced_type, "int64"))
+    hg = DGLHeteroGraph(hgidx, induced_ntypes, induced_etypes, node_frames, edge_frames)
+    return hg
+DGLHeteroGraph.edge_type_subgraph = edge_type_subgraph
+#################### Internal functions ####################
+def _create_hetero_subgraph(parent, sgi, induced_nodes, induced_edges):
+    """Internal function to create a subgraph.
+    Parameters
+    ----------
+    parent : DGLGraph
+        The parent DGLGraph.
+    sgi : HeteroSubgraphIndex
+        Subgraph object returned by CAPI.
+    induced_nodes : list[Tensor] or None
+        Induced node IDs. Will store it as the dgl.NID ndata unless it
+        is None, which means the induced node IDs are the same as the parent node IDs.
+    induced_edges : list[Tensor] or None
+        Induced edge IDs. Will store it as the dgl.EID ndata unless it
+        is None, which means the induced edge IDs are the same as the parent edge IDs.
+    Returns
+    -------
+    DGLGraph
+        Graph
+    """
+    node_frames, edge_frames = utils.extract_subframes(parent, induced_nodes, induced_edges)
+    hsg = DGLHeteroGraph(sgi.graph, parent.ntypes, parent.etypes,
+                         node_frames, edge_frames)
+    return hsg
+_init_api("dgl.subgraph")
--- a/python/dgl/transform.py
+++ b/python/dgl/transform.py
--- a/python/dgl/udf.py
+++ b/python/dgl/udf.py
--- a/python/dgl/utils/checks.py
+++ b/python/dgl/utils/checks.py
@@ -63,6 +63,36 @@ def prepare_tensor_dict(g, data, name):
    return {key : prepare_tensor(g, val, '{}["{}"]'.format(name, key))
            for key, val in data.items()}
+def parse_edges_arg_to_eid(g, edges, etid, argname='edges'):
+    """Parse the :attr:`edges` argument and return an edge ID tensor.
+    The resulting edge ID tensor has the same ID type and device of :attr:`g`.
+    Parameters
+    ----------
+    g : DGLGraph
+        Graph
+    edges : pair of Tensor, Tensor, iterable[int]
+        Argument for specifying edges.
+    etid : int
+        Edge type ID.
+    argname : str, optional
+        Argument name.
+    Returns
+    -------
+    Tensor
+        Edge ID tensor
+    """
+    if isinstance(edges, tuple):
+        u, v = edges
+        u = prepare_tensor(g, u, '{}[0]'.format(argname))
+        v = prepare_tensor(g, v, '{}[1]'.format(argname))
+        eid = g.edge_ids(u, v, etype=g.canonical_etypes[etid])
+    else:
+        eid = prepare_tensor(g, edges, argname)
+    return eid
 def check_all_same_idtype(glist, name):
    """Check all the graphs have the same idtype."""
    if len(glist) == 0:

--- a/python/dgl/utils/internal.py
+++ b/python/dgl/utils/internal.py
--- a/src/graph/heterograph_capi.cc
+++ b/src/graph/heterograph_capi.cc
@@ -648,7 +648,7 @@ DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroGetFormatGraph")
    *rv = HeteroGraphRef(hgptr);
 });
-DGL_REGISTER_GLOBAL("transform._CAPI_DGLInSubgraph")
+DGL_REGISTER_GLOBAL("subgraph._CAPI_DGLInSubgraph")
 .set_body([] (DGLArgs args, DGLRetValue *rv) {
    HeteroGraphRef hg = args[0];
    const auto& nodes = ListValueToVector<IdArray>(args[1]);
@@ -657,7 +657,7 @@ DGL_REGISTER_GLOBAL("transform._CAPI_DGLInSubgraph")
    *rv = HeteroGraphRef(ret);
  });
-DGL_REGISTER_GLOBAL("transform._CAPI_DGLOutSubgraph")
+DGL_REGISTER_GLOBAL("subgraph._CAPI_DGLOutSubgraph")
 .set_body([] (DGLArgs args, DGLRetValue *rv) {
    HeteroGraphRef hg = args[0];
    const auto& nodes = ListValueToVector<IdArray>(args[1]);

--- a/tests/compute/test_basics.py
+++ b/tests/compute/test_basics.py
--- a/tests/compute/test_frame.py.bak
+++ b/tests/compute/test_frame.py.bak
--- a/tests/compute/test_hetero_basics.py
+++ b/tests/compute/test_hetero_basics.py
--- a/tests/compute/test_heterograph.py
+++ b/tests/compute/test_heterograph.py
--- a/tests/compute/test_inplace_update.py.bak
+++ b/tests/compute/test_inplace_update.py.bak
--- a/tests/compute/test_kernel.py
+++ b/tests/compute/test_kernel.py
--- a/tests/compute/test_pickle.py
+++ b/tests/compute/test_pickle.py
@@ -2,7 +2,6 @@ import networkx as nx
 import scipy.sparse as ssp
 import dgl
 import dgl.contrib as contrib
-from dgl.frame import Frame, FrameRef, Column
 from dgl.graph_index import create_graph_index
 from dgl.utils import toindex
 import backend as F
@@ -128,24 +127,6 @@ def test_pickling_graph_index():
    assert F.array_equal(dst_idx.tousertensor(), dst_idx2.tousertensor())
-def test_pickling_frame():
-    x = F.randn((3, 7))
-    y = F.randn((3, 5))
-    c = Column(x)
-    c2 = _reconstruct_pickle(c)
-    assert F.allclose(c.data, c2.data)
-    fr = Frame({'x': x, 'y': y})
-    fr2 = _reconstruct_pickle(fr)
-    assert F.allclose(fr2['x'].data, x)
-    assert F.allclose(fr2['y'].data, y)
-    fr = Frame()
 def _global_message_func(nodes):
    return {'x': nodes.data['x']}

--- a/tests/compute/test_shared_mem.py
+++ b/tests/compute/test_shared_mem.py
@@ -2,7 +2,6 @@ import networkx as nx
 import scipy.sparse as ssp
 import dgl
 import dgl.contrib as contrib
-from dgl.frame import Frame, FrameRef, Column
 from dgl.graph_index import create_graph_index
 from dgl.utils import toindex
 import backend as F

--- a/tests/compute/test_specialization.py
+++ b/tests/compute/test_specialization.py
--- a/tests/compute/test_subgraph.py
+++ b/tests/compute/test_subgraph.py
--- a/tests/compute/test_transform.py
+++ b/tests/compute/test_transform.py