[API Deprecation]Deprecate contrib module (#5114)

56ffb650 · peizhou001 · GitHub · 436de3d1 · 436de3d1 · 436de3d1
Unverified Commit 56ffb650 authored Jan 06, 2023 by peizhou001 Committed by GitHub Jan 06, 2023
20 changed files
--- a/apps/kg/models/mxnet/tensor_models.py
+++ b/apps/kg/models/mxnet/tensor_models.py
-# -*- coding: utf-8 -*-
-#
-# setup.py
-#
-# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-"""
-KG Sparse embedding
-"""
-import os
-import numpy as np
-import mxnet as mx
-from mxnet import gluon
-from mxnet import ndarray as nd
-
-from .score_fun import *
-from .. import *
-
-def logsigmoid(val):
-    max_elem = nd.maximum(0., -val)
-    z = nd.exp(-max_elem) + nd.exp(-val - max_elem)
-    return -(max_elem + nd.log(z))
-
-get_device = lambda args : mx.gpu(args.gpu[0]) if args.gpu[0] >= 0 else mx.cpu()
-norm = lambda x, p: nd.sum(nd.abs(x) ** p)
-
-get_scalar = lambda x: x.detach().asscalar()
-
-reshape = lambda arr, x, y: arr.reshape(x, y)
-
-cuda = lambda arr, gpu: arr.as_in_context(mx.gpu(gpu))
-
-class ExternalEmbedding:
-    """Sparse Embedding for Knowledge Graph
-    It is used to store both entity embeddings and relation embeddings.
-
-    Parameters
-    ----------
-    args :
-        Global configs.
-    num : int
-        Number of embeddings.
-    dim : int
-        Embedding dimention size.
-    ctx : mx.ctx
-        Device context to store the embedding.
-    """
-    def __init__(self, args, num, dim, ctx):
-        self.gpu = args.gpu
-        self.args = args
-        self.trace = []
-
-        self.emb = nd.empty((num, dim), dtype=np.float32, ctx=ctx)
-        self.state_sum = nd.zeros((self.emb.shape[0]), dtype=np.float32, ctx=ctx)
-        self.state_step = 0
-
-    def init(self, emb_init):
-        """Initializing the embeddings.
-
-        Parameters
-        ----------
-        emb_init : float
-            The intial embedding range should be [-emb_init, emb_init].
-        """
-        nd.random.uniform(-emb_init, emb_init,
-                          shape=self.emb.shape, dtype=self.emb.dtype,
-                          ctx=self.emb.context, out=self.emb)
-
-    def share_memory(self):
-        # TODO(zhengda) fix this later
-        pass
-
-    def __call__(self, idx, gpu_id=-1, trace=True):
-        """ Return sliced tensor.
-
-        Parameters
-        ----------
-        idx : th.tensor
-            Slicing index
-        gpu_id : int
-            Which gpu to put sliced data in.
-        trace : bool
-            If True, trace the computation. This is required in training.
-            If False, do not trace the computation.
-            Default: True
-        """
-        if self.emb.context != idx.context:
-            idx = idx.as_in_context(self.emb.context)
-        data = nd.take(self.emb, idx)
-        if gpu_id >= 0:
-            data = data.as_in_context(mx.gpu(gpu_id))
-        data.attach_grad()
-        if trace:
-            self.trace.append((idx, data))
-        return data
-
-    def update(self, gpu_id=-1):
-        """ Update embeddings in a sparse manner
-        Sparse embeddings are updated in mini batches. We maintain gradient states for
-        each embedding so they can be updated separately.
-
-        Parameters
-        ----------
-        gpu_id : int
-            Which gpu to accelerate the calculation. if -1 is provided, cpu is used.
-        """
-        self.state_step += 1
-        for idx, data in self.trace:
-            grad = data.grad
-
-            clr = self.args.lr
-            #clr = self.args.lr / (1 + (self.state_step - 1) * group['lr_decay'])
-
-            # the update is non-linear so indices must be unique
-            grad_indices = idx
-            grad_values = grad
-
-            grad_sum = (grad_values * grad_values).mean(1)
-            ctx = self.state_sum.context
-            if ctx != grad_indices.context:
-                grad_indices = grad_indices.as_in_context(ctx)
-            if ctx != grad_sum.context:
-                grad_sum = grad_sum.as_in_context(ctx)
-            self.state_sum[grad_indices] += grad_sum
-            std = self.state_sum[grad_indices]  # _sparse_mask
-            if gpu_id >= 0:
-                std = std.as_in_context(mx.gpu(gpu_id))
-            std_values = nd.expand_dims(nd.sqrt(std) + 1e-10, 1)
-            tmp = (-clr * grad_values / std_values)
-            if tmp.context != ctx:
-                tmp = tmp.as_in_context(ctx)
-            # TODO(zhengda) the overhead is here.
-            self.emb[grad_indices] = mx.nd.take(self.emb, grad_indices) + tmp
-        self.trace = []
-
-    def curr_emb(self):
-        """Return embeddings in trace.
-        """
-        data = [data for _, data in self.trace]
-        return nd.concat(*data, dim=0)
-
-    def save(self, path, name):
-        """Save embeddings.
-
-        Parameters
-        ----------
-        path : str
-            Directory to save the embedding.
-        name : str
-            Embedding name.
-        """
-        emb_fname = os.path.join(path, name+'.npy')
-        np.save(emb_fname, self.emb.asnumpy())
-
-    def load(self, path, name):
-        """Load embeddings.
-
-        Parameters
-        ----------
-        path : str
-            Directory to load the embedding.
-        name : str
-            Embedding name.
-        """
-        emb_fname = os.path.join(path, name+'.npy')
-        self.emb = nd.array(np.load(emb_fname))
--- a/apps/kg/models/pytorch/__init__.py
+++ b/apps/kg/models/pytorch/__init__.py
-# -*- coding: utf-8 -*-
-#
-# setup.py
-#
-# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
\ No newline at end of file
--- a/apps/kg/models/pytorch/score_fun.py
+++ b/apps/kg/models/pytorch/score_fun.py
-# -*- coding: utf-8 -*-
-#
-# setup.py
-#
-# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-import torch as th
-import torch.nn as nn
-import torch.nn.functional as functional
-import torch.nn.init as INIT
-import numpy as np
-
-def batched_l2_dist(a, b):
-    a_squared = a.norm(dim=-1).pow(2)
-    b_squared = b.norm(dim=-1).pow(2)
-
-    squared_res = th.baddbmm(
-        b_squared.unsqueeze(-2), a, b.transpose(-2, -1), alpha=-2
-    ).add_(a_squared.unsqueeze(-1))
-    res = squared_res.clamp_min_(1e-30).sqrt_()
-    return res
-
-def batched_l1_dist(a, b):
-    res = th.cdist(a, b, p=1)
-    return res
-
-class TransEScore(nn.Module):
-    """TransE score function
-    Paper link: https://papers.nips.cc/paper/5071-translating-embeddings-for-modeling-multi-relational-data
-    """
-    def __init__(self, gamma, dist_func='l2'):
-        super(TransEScore, self).__init__()
-        self.gamma = gamma
-        if dist_func == 'l1':
-            self.neg_dist_func = batched_l1_dist
-            self.dist_ord = 1
-        else: # default use l2
-            self.neg_dist_func = batched_l2_dist
-            self.dist_ord = 2
-
-    def edge_func(self, edges):
-        head = edges.src['emb']
-        tail = edges.dst['emb']
-        rel = edges.data['emb']
-        score = head + rel - tail
-        return {'score': self.gamma - th.norm(score, p=self.dist_ord, dim=-1)}
-
-    def prepare(self, g, gpu_id, trace=False):
-        pass
-
-    def create_neg_prepare(self, neg_head):
-        def fn(rel_id, num_chunks, head, tail, gpu_id, trace=False):
-            return head, tail
-        return fn
-
-    def forward(self, g):
-        g.apply_edges(lambda edges: self.edge_func(edges))
-
-    def update(self, gpu_id=-1):
-        pass
-
-    def reset_parameters(self):
-        pass
-
-    def save(self, path, name):
-        pass
-
-    def load(self, path, name):
-        pass
-
-    def create_neg(self, neg_head):
-        gamma = self.gamma
-        if neg_head:
-            def fn(heads, relations, tails, num_chunks, chunk_size, neg_sample_size):
-                hidden_dim = heads.shape[1]
-                heads = heads.reshape(num_chunks, neg_sample_size, hidden_dim)
-                tails = tails - relations
-                tails = tails.reshape(num_chunks, chunk_size, hidden_dim)
-                return gamma - self.neg_dist_func(tails, heads)
-            return fn
-        else:
-            def fn(heads, relations, tails, num_chunks, chunk_size, neg_sample_size):
-                hidden_dim = heads.shape[1]
-                heads = heads + relations
-                heads = heads.reshape(num_chunks, chunk_size, hidden_dim)
-                tails = tails.reshape(num_chunks, neg_sample_size, hidden_dim)
-                return gamma - self.neg_dist_func(heads, tails)
-            return fn
-
-class TransRScore(nn.Module):
-    """TransR score function
-    Paper link: https://www.aaai.org/ocs/index.php/AAAI/AAAI15/paper/download/9571/9523
-    """
-    def __init__(self, gamma, projection_emb, relation_dim, entity_dim):
-        super(TransRScore, self).__init__()
-        self.gamma = gamma
-        self.projection_emb = projection_emb
-        self.relation_dim = relation_dim
-        self.entity_dim = entity_dim
-
-    def edge_func(self, edges):
-        head = edges.data['head_emb']
-        tail = edges.data['tail_emb']
-        rel = edges.data['emb']
-        score = head + rel - tail
-        return {'score': self.gamma - th.norm(score, p=1, dim=-1)}
-
-    def prepare(self, g, gpu_id, trace=False):
-        head_ids, tail_ids = g.all_edges(order='eid')
-        projection = self.projection_emb(g.edata['id'], gpu_id, trace)
-        projection = projection.reshape(-1, self.entity_dim, self.relation_dim)
-        g.edata['head_emb'] = th.einsum('ab,abc->ac', g.ndata['emb'][head_ids], projection)
-        g.edata['tail_emb'] = th.einsum('ab,abc->ac', g.ndata['emb'][tail_ids], projection)
-
-    def create_neg_prepare(self, neg_head):
-        if neg_head:
-            def fn(rel_id, num_chunks, head, tail, gpu_id, trace=False):
-                # pos node, project to its relation
-                projection = self.projection_emb(rel_id, gpu_id, trace)
-                projection = projection.reshape(num_chunks, -1, self.entity_dim, self.relation_dim)
-                tail = tail.reshape(num_chunks, -1, 1, self.entity_dim)
-                tail = th.matmul(tail, projection)
-                tail = tail.reshape(num_chunks, -1, self.relation_dim)
-
-                # neg node, each project to all relations
-                head = head.reshape(num_chunks, 1, -1, self.entity_dim)
-                # (num_chunks, num_rel, num_neg_nodes, rel_dim)
-                head = th.matmul(head, projection)
-                return head, tail
-            return fn
-        else:
-            def fn(rel_id, num_chunks, head, tail, gpu_id, trace=False):
-                # pos node, project to its relation
-                projection = self.projection_emb(rel_id, gpu_id, trace)
-                projection = projection.reshape(num_chunks, -1, self.entity_dim, self.relation_dim)
-                head = head.reshape(num_chunks, -1, 1, self.entity_dim)
-                head = th.matmul(head, projection)
-                head = head.reshape(num_chunks, -1, self.relation_dim)
-
-                # neg node, each project to all relations
-                tail = tail.reshape(num_chunks, 1, -1, self.entity_dim)
-                # (num_chunks, num_rel, num_neg_nodes, rel_dim)
-                tail = th.matmul(tail, projection)
-                return head, tail
-            return fn
-
-    def forward(self, g):
-        g.apply_edges(lambda edges: self.edge_func(edges))
-
-    def reset_parameters(self):
-        self.projection_emb.init(1.0)
-
-    def update(self, gpu_id=-1):
-        self.projection_emb.update(gpu_id)
-
-    def save(self, path, name):  
-        self.projection_emb.save(path, name+'projection')
-
-    def load(self, path, name):
-        self.projection_emb.load(path, name+'projection')
-
-    def prepare_local_emb(self, projection_emb):
-        self.global_projection_emb = self.projection_emb
-        self.projection_emb = projection_emb
-
-    def prepare_cross_rels(self, cross_rels):
-        self.projection_emb.setup_cross_rels(cross_rels, self.global_projection_emb)
-
-    def writeback_local_emb(self, idx):
-        self.global_projection_emb.emb[idx] = self.projection_emb.emb.cpu()[idx]
-
-    def load_local_emb(self, projection_emb):
-        device = projection_emb.emb.device
-        projection_emb.emb = self.projection_emb.emb.to(device)
-        self.projection_emb = projection_emb
-
-    def share_memory(self):
-        self.projection_emb.share_memory()  
-
-    def create_neg(self, neg_head):
-        gamma = self.gamma
-        if neg_head:
-            def fn(heads, relations, tails, num_chunks, chunk_size, neg_sample_size):
-                relations = relations.reshape(num_chunks, -1, self.relation_dim)
-                tails = tails - relations
-                tails = tails.reshape(num_chunks, -1, 1, self.relation_dim)
-                score = heads - tails
-                return gamma - th.norm(score, p=1, dim=-1)
-            return fn
-        else:
-            def fn(heads, relations, tails, num_chunks, chunk_size, neg_sample_size):
-                relations = relations.reshape(num_chunks, -1, self.relation_dim)
-                heads = heads - relations
-                heads = heads.reshape(num_chunks, -1, 1, self.relation_dim)
-                score = heads - tails
-                return gamma - th.norm(score, p=1, dim=-1)
-            return fn
-
-class DistMultScore(nn.Module):
-    """DistMult score function
-    Paper link: https://arxiv.org/abs/1412.6575
-    """
-    def __init__(self):
-        super(DistMultScore, self).__init__()
-
-    def edge_func(self, edges):
-        head = edges.src['emb']
-        tail = edges.dst['emb']
-        rel = edges.data['emb']
-        score = head * rel * tail
-        # TODO: check if there exists minus sign and if gamma should be used here(jin)
-        return {'score': th.sum(score, dim=-1)}
-
-    def prepare(self, g, gpu_id, trace=False):
-        pass
-
-    def create_neg_prepare(self, neg_head):
-        def fn(rel_id, num_chunks, head, tail, gpu_id, trace=False):
-            return head, tail
-        return fn
-
-    def update(self, gpu_id=-1):
-        pass
-
-    def reset_parameters(self):
-        pass
-
-    def save(self, path, name):
-        pass
-
-    def load(self, path, name):
-        pass
-
-    def forward(self, g):
-        g.apply_edges(lambda edges: self.edge_func(edges))
-
-    def create_neg(self, neg_head):
-        if neg_head:
-            def fn(heads, relations, tails, num_chunks, chunk_size, neg_sample_size):
-                hidden_dim = heads.shape[1]
-                heads = heads.reshape(num_chunks, neg_sample_size, hidden_dim)
-                heads = th.transpose(heads, 1, 2)
-                tmp = (tails * relations).reshape(num_chunks, chunk_size, hidden_dim)
-                return th.bmm(tmp, heads)
-            return fn
-        else:
-            def fn(heads, relations, tails, num_chunks, chunk_size, neg_sample_size):
-                hidden_dim = tails.shape[1]
-                tails = tails.reshape(num_chunks, neg_sample_size, hidden_dim)
-                tails = th.transpose(tails, 1, 2)
-                tmp = (heads * relations).reshape(num_chunks, chunk_size, hidden_dim)
-                return th.bmm(tmp, tails)
-            return fn
-
-class ComplExScore(nn.Module):
-    """ComplEx score function
-    Paper link: https://arxiv.org/abs/1606.06357
-    """
-    def __init__(self):
-        super(ComplExScore, self).__init__()
-
-    def edge_func(self, edges):
-        real_head, img_head = th.chunk(edges.src['emb'], 2, dim=-1)
-        real_tail, img_tail = th.chunk(edges.dst['emb'], 2, dim=-1)
-        real_rel, img_rel = th.chunk(edges.data['emb'], 2, dim=-1)
-
-        score = real_head * real_tail * real_rel \
-                + img_head * img_tail * real_rel \
-                + real_head * img_tail * img_rel \
-                - img_head * real_tail * img_rel
-        # TODO: check if there exists minus sign and if gamma should be used here(jin)
-        return {'score': th.sum(score, -1)}
-
-    def prepare(self, g, gpu_id, trace=False):
-        pass
-
-    def create_neg_prepare(self, neg_head):
-        def fn(rel_id, num_chunks, head, tail, gpu_id, trace=False):
-            return head, tail
-        return fn
-
-    def update(self, gpu_id=-1):
-        pass
-
-    def reset_parameters(self):
-        pass
-
-    def save(self, path, name):
-        pass
-
-    def load(self, path, name):
-        pass
-
-    def forward(self, g):
-        g.apply_edges(lambda edges: self.edge_func(edges))
-
-    def create_neg(self, neg_head):
-        if neg_head:
-            def fn(heads, relations, tails, num_chunks, chunk_size, neg_sample_size):
-                hidden_dim = heads.shape[1]
-                emb_real = tails[..., :hidden_dim // 2]
-                emb_imag = tails[..., hidden_dim // 2:]
-                rel_real = relations[..., :hidden_dim // 2]
-                rel_imag = relations[..., hidden_dim // 2:]
-                real = emb_real * rel_real + emb_imag * rel_imag
-                imag = -emb_real * rel_imag + emb_imag * rel_real
-                emb_complex = th.cat((real, imag), dim=-1)
-                tmp = emb_complex.reshape(num_chunks, chunk_size, hidden_dim)
-                heads = heads.reshape(num_chunks, neg_sample_size, hidden_dim)
-                heads = th.transpose(heads, 1, 2)
-                return th.bmm(tmp, heads)
-            return fn
-        else:
-            def fn(heads, relations, tails, num_chunks, chunk_size, neg_sample_size):
-                hidden_dim = heads.shape[1]
-                emb_real = heads[..., :hidden_dim // 2]
-                emb_imag = heads[..., hidden_dim // 2:]
-                rel_real = relations[..., :hidden_dim // 2]
-                rel_imag = relations[..., hidden_dim // 2:]
-                real = emb_real * rel_real - emb_imag * rel_imag
-                imag = emb_real * rel_imag + emb_imag * rel_real
-                emb_complex = th.cat((real, imag), dim=-1)
-                tmp = emb_complex.reshape(num_chunks, chunk_size, hidden_dim)
-                tails = tails.reshape(num_chunks, neg_sample_size, hidden_dim)
-                tails = th.transpose(tails, 1, 2)
-                return th.bmm(tmp, tails)
-            return fn
-
-class RESCALScore(nn.Module):
-    """RESCAL score function
-    Paper link: http://www.icml-2011.org/papers/438_icmlpaper.pdf
-    """
-    def __init__(self, relation_dim, entity_dim):
-        super(RESCALScore, self).__init__()
-        self.relation_dim = relation_dim
-        self.entity_dim = entity_dim
-
-    def edge_func(self, edges):
-        head = edges.src['emb']
-        tail = edges.dst['emb'].unsqueeze(-1)
-        rel = edges.data['emb']
-        rel = rel.view(-1, self.relation_dim, self.entity_dim)
-        score = head * th.matmul(rel, tail).squeeze(-1)
-        # TODO: check if use self.gamma
-        return {'score': th.sum(score, dim=-1)}
-        # return {'score': self.gamma - th.norm(score, p=1, dim=-1)}
-
-    def prepare(self, g, gpu_id, trace=False):
-        pass
-
-    def create_neg_prepare(self, neg_head):
-        def fn(rel_id, num_chunks, head, tail, gpu_id, trace=False):
-            return head, tail
-        return fn
-
-    def update(self, gpu_id=-1):
-        pass
-
-    def reset_parameters(self):
-        pass
-
-    def save(self, path, name):
-        pass
-
-    def load(self, path, name):
-        pass
-
-    def forward(self, g):
-        g.apply_edges(lambda edges: self.edge_func(edges))
-
-    def create_neg(self, neg_head):
-        if neg_head:
-            def fn(heads, relations, tails, num_chunks, chunk_size, neg_sample_size):
-                hidden_dim = heads.shape[1]
-                heads = heads.reshape(num_chunks, neg_sample_size, hidden_dim)
-                heads = th.transpose(heads, 1, 2)
-                tails = tails.unsqueeze(-1)
-                relations = relations.view(-1, self.relation_dim, self.entity_dim)
-                tmp = th.matmul(relations, tails).squeeze(-1)
-                tmp = tmp.reshape(num_chunks, chunk_size, hidden_dim)
-                return th.bmm(tmp, heads)
-            return fn
-        else:
-            def fn(heads, relations, tails, num_chunks, chunk_size, neg_sample_size):
-                hidden_dim = heads.shape[1]
-                tails = tails.reshape(num_chunks, neg_sample_size, hidden_dim)
-                tails = th.transpose(tails, 1, 2)
-                heads = heads.unsqueeze(-1)
-                relations = relations.view(-1, self.relation_dim, self.entity_dim)
-                tmp = th.matmul(relations, heads).squeeze(-1)
-                tmp = tmp.reshape(num_chunks, chunk_size, hidden_dim)
-                return th.bmm(tmp, tails)
-            return fn
-
-class RotatEScore(nn.Module):
-    """RotatE score function
-    Paper link: https://arxiv.org/abs/1902.10197
-    """
-    def __init__(self, gamma, emb_init):
-        super(RotatEScore, self).__init__()
-        self.gamma = gamma
-        self.emb_init = emb_init
-
-    def edge_func(self, edges):
-        re_head, im_head = th.chunk(edges.src['emb'], 2, dim=-1)
-        re_tail, im_tail = th.chunk(edges.dst['emb'], 2, dim=-1)
-
-        phase_rel = edges.data['emb'] / (self.emb_init / np.pi)
-        re_rel, im_rel = th.cos(phase_rel), th.sin(phase_rel)
-        re_score = re_head * re_rel - im_head * im_rel
-        im_score = re_head * im_rel + im_head * re_rel
-        re_score = re_score - re_tail
-        im_score = im_score - im_tail
-        score = th.stack([re_score, im_score], dim=0)
-        score = score.norm(dim=0)
-        return {'score': self.gamma - score.sum(-1)}
-
-    def update(self, gpu_id=-1):
-        pass
-
-    def reset_parameters(self):
-        pass
-
-    def save(self, path, name):
-        pass
-
-    def load(self, path, name):
-        pass
-
-    def forward(self, g):
-        g.apply_edges(lambda edges: self.edge_func(edges))
-        
-    def create_neg_prepare(self, neg_head):
-        def fn(rel_id, num_chunks, head, tail, gpu_id, trace=False):
-            return head, tail
-        return fn
-    
-    def prepare(self, g, gpu_id, trace=False):
-        pass
-    
-    def create_neg(self, neg_head):
-        gamma = self.gamma
-        emb_init = self.emb_init
-        if neg_head:
-            def fn(heads, relations, tails, num_chunks, chunk_size, neg_sample_size):
-                hidden_dim = heads.shape[1]
-                emb_real = tails[..., :hidden_dim // 2]
-                emb_imag = tails[..., hidden_dim // 2:]
-
-                phase_rel = relations / (emb_init / np.pi)
-                rel_real, rel_imag = th.cos(phase_rel), th.sin(phase_rel)
-                real = emb_real * rel_real + emb_imag * rel_imag
-                imag = -emb_real * rel_imag + emb_imag * rel_real
-                emb_complex = th.cat((real, imag), dim=-1)
-                tmp = emb_complex.reshape(num_chunks, chunk_size, 1, hidden_dim)
-                heads = heads.reshape(num_chunks, 1, neg_sample_size, hidden_dim)
-                score = tmp - heads
-                score = th.stack([score[..., :hidden_dim // 2],
-                                  score[..., hidden_dim // 2:]], dim=-1).norm(dim=-1)
-                return gamma - score.sum(-1)
-
-            return fn
-        else:
-            def fn(heads, relations, tails, num_chunks, chunk_size, neg_sample_size):
-                hidden_dim = heads.shape[1]
-                emb_real = heads[..., :hidden_dim // 2]
-                emb_imag = heads[..., hidden_dim // 2:]
-
-                phase_rel = relations / (emb_init / np.pi)
-                rel_real, rel_imag = th.cos(phase_rel), th.sin(phase_rel)
-                real = emb_real * rel_real - emb_imag * rel_imag
-                imag = emb_real * rel_imag + emb_imag * rel_real
-
-                emb_complex = th.cat((real, imag), dim=-1)
-                tmp = emb_complex.reshape(num_chunks, chunk_size, 1, hidden_dim)
-                tails = tails.reshape(num_chunks, 1, neg_sample_size, hidden_dim)
-                score = tmp - tails
-                score = th.stack([score[..., :hidden_dim // 2],
-                                  score[..., hidden_dim // 2:]], dim=-1).norm(dim=-1)
-
-                return gamma - score.sum(-1)
-
-            return fn
--- a/apps/kg/models/pytorch/tensor_models.py
+++ b/apps/kg/models/pytorch/tensor_models.py
-# -*- coding: utf-8 -*-
-#
-# setup.py
-#
-# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-"""
-KG Sparse embedding
-"""
-import os
-import numpy as np
-
-import torch as th
-import torch.nn as nn
-import torch.nn.functional as functional
-import torch.nn.init as INIT
-
-import torch.multiprocessing as mp
-from torch.multiprocessing import Queue
-from _thread import start_new_thread
-import traceback
-from functools import wraps
-
-from .. import *
-
-logsigmoid = functional.logsigmoid
-
-def get_device(args):
-    return th.device('cpu') if args.gpu[0] < 0 else th.device('cuda:' + str(args.gpu[0]))
-
-norm = lambda x, p: x.norm(p=p)**p
-get_scalar = lambda x: x.detach().item()
-reshape = lambda arr, x, y: arr.view(x, y)
-cuda = lambda arr, gpu: arr.cuda(gpu)
-
-def thread_wrapped_func(func):
-    """Wrapped func for torch.multiprocessing.Process.
-
-    With this wrapper we can use OMP threads in subprocesses
-    otherwise, OMP_NUM_THREADS=1 is mandatory.
-
-    How to use:
-    @thread_wrapped_func
-    def func_to_wrap(args ...):
-    """
-    @wraps(func)
-    def decorated_function(*args, **kwargs):
-        queue = Queue()
-        def _queue_result():
-            exception, trace, res = None, None, None
-            try:
-                res = func(*args, **kwargs)
-            except Exception as e:
-                exception = e
-                trace = traceback.format_exc()
-            queue.put((res, exception, trace))
-
-        start_new_thread(_queue_result, ())
-        result, exception, trace = queue.get()
-        if exception is None:
-            return result
-        else:
-            assert isinstance(exception, Exception)
-            raise exception.__class__(trace)
-    return decorated_function
-
-@thread_wrapped_func
-def async_update(args, emb, queue):
-    """Asynchronous embedding update for entity embeddings.
-    How it works:
-        1. trainer process push entity embedding update requests into the queue.
-        2. async_update process pull requests from the queue, calculate 
-           the gradient state and gradient and write it into entity embeddings.
-
-    Parameters
-    ----------
-    args :
-        Global confis.
-    emb : ExternalEmbedding
-        The entity embeddings.
-    queue:
-        The request queue.
-    """
-    th.set_num_threads(args.num_thread)
-    while True:
-        (grad_indices, grad_values, gpu_id) = queue.get()
-        clr = emb.args.lr
-        if grad_indices is None:
-            return
-        with th.no_grad():
-            grad_sum = (grad_values * grad_values).mean(1)
-            device = emb.state_sum.device
-            if device != grad_indices.device:
-                grad_indices = grad_indices.to(device)
-            if device != grad_sum.device:
-                grad_sum = grad_sum.to(device)
-
-            emb.state_sum.index_add_(0, grad_indices, grad_sum)
-            std = emb.state_sum[grad_indices]  # _sparse_mask
-            if gpu_id >= 0:
-                std = std.cuda(gpu_id)
-            std_values = std.sqrt_().add_(1e-10).unsqueeze(1)
-            tmp = (-clr * grad_values / std_values)
-            if tmp.device != device:
-                tmp = tmp.to(device)
-            emb.emb.index_add_(0, grad_indices, tmp)
-
-class ExternalEmbedding:
-    """Sparse Embedding for Knowledge Graph
-    It is used to store both entity embeddings and relation embeddings.
-
-    Parameters
-    ----------
-    args :
-        Global configs.
-    num : int
-        Number of embeddings.
-    dim : int
-        Embedding dimention size.
-    device : th.device
-        Device to store the embedding.
-    """
-    def __init__(self, args, num, dim, device):
-        self.gpu = args.gpu
-        self.args = args
-        self.num = num
-        self.trace = []
-
-        self.emb = th.empty(num, dim, dtype=th.float32, device=device)
-        self.state_sum = self.emb.new().resize_(self.emb.size(0)).zero_()
-        self.state_step = 0
-        self.has_cross_rel = False
-        # queue used by asynchronous update
-        self.async_q = None
-        # asynchronous update process
-        self.async_p = None
-
-    def init(self, emb_init):
-        """Initializing the embeddings.
-
-        Parameters
-        ----------
-        emb_init : float
-            The intial embedding range should be [-emb_init, emb_init].
-        """
-        INIT.uniform_(self.emb, -emb_init, emb_init)
-        INIT.zeros_(self.state_sum)
-
-    def setup_cross_rels(self, cross_rels, global_emb):
-        cpu_bitmap = th.zeros((self.num,), dtype=th.bool)
-        for i, rel in enumerate(cross_rels):
-            cpu_bitmap[rel] = 1
-        self.cpu_bitmap = cpu_bitmap
-        self.has_cross_rel = True
-        self.global_emb = global_emb
-
-    def get_noncross_idx(self, idx):
-        cpu_mask = self.cpu_bitmap[idx]
-        gpu_mask = ~cpu_mask
-        return idx[gpu_mask]
-
-    def share_memory(self):
-        """Use torch.tensor.share_memory_() to allow cross process tensor access
-        """
-        self.emb.share_memory_()
-        self.state_sum.share_memory_()
-
-    def __call__(self, idx, gpu_id=-1, trace=True):
-        """ Return sliced tensor.
-
-        Parameters
-        ----------
-        idx : th.tensor
-            Slicing index
-        gpu_id : int
-            Which gpu to put sliced data in.
-        trace : bool
-            If True, trace the computation. This is required in training.
-            If False, do not trace the computation.
-            Default: True
-        """
-        if self.has_cross_rel:
-            cpu_idx = idx.cpu()
-            cpu_mask = self.cpu_bitmap[cpu_idx]
-            cpu_idx = cpu_idx[cpu_mask]
-            cpu_idx = th.unique(cpu_idx)
-            if cpu_idx.shape[0] != 0:
-                cpu_emb = self.global_emb.emb[cpu_idx]
-                self.emb[cpu_idx] = cpu_emb.cuda(gpu_id)
-        s = self.emb[idx]
-        if gpu_id >= 0:
-            s = s.cuda(gpu_id)
-        # During the training, we need to trace the computation.
-        # In this case, we need to record the computation path and compute the gradients.
-        if trace:
-            data = s.clone().detach().requires_grad_(True)
-            self.trace.append((idx, data))
-        else:
-            data = s
-        return data
-
-    def update(self, gpu_id=-1):
-        """ Update embeddings in a sparse manner
-        Sparse embeddings are updated in mini batches. We maintain gradient states for
-        each embedding so they can be updated separately.
-
-        Parameters
-        ----------
-        gpu_id : int
-            Which gpu to accelerate the calculation. if -1 is provided, cpu is used.
-        """
-        self.state_step += 1
-        with th.no_grad():
-            for idx, data in self.trace:
-                grad = data.grad.data
-
-                clr = self.args.lr
-                #clr = self.args.lr / (1 + (self.state_step - 1) * group['lr_decay'])
-
-                # the update is non-linear so indices must be unique
-                grad_indices = idx
-                grad_values = grad
-                if self.async_q is not None:
-                    grad_indices.share_memory_()
-                    grad_values.share_memory_()
-                    self.async_q.put((grad_indices, grad_values, gpu_id))
-                else:
-                    grad_sum = (grad_values * grad_values).mean(1)
-                    device = self.state_sum.device
-                    if device != grad_indices.device:
-                        grad_indices = grad_indices.to(device)
-                    if device != grad_sum.device:
-                        grad_sum = grad_sum.to(device)
-
-                    if self.has_cross_rel:
-                        cpu_mask = self.cpu_bitmap[grad_indices]
-                        cpu_idx = grad_indices[cpu_mask]
-                        if cpu_idx.shape[0] > 0:
-                            cpu_grad = grad_values[cpu_mask]
-                            cpu_sum = grad_sum[cpu_mask].cpu()
-                            cpu_idx = cpu_idx.cpu()
-                            self.global_emb.state_sum.index_add_(0, cpu_idx, cpu_sum)
-                            std = self.global_emb.state_sum[cpu_idx]
-                            if gpu_id >= 0:
-                                std = std.cuda(gpu_id)
-                            std_values = std.sqrt_().add_(1e-10).unsqueeze(1)
-                            tmp = (-clr * cpu_grad / std_values)
-                            tmp = tmp.cpu()
-                            self.global_emb.emb.index_add_(0, cpu_idx, tmp)
-                    self.state_sum.index_add_(0, grad_indices, grad_sum)
-                    std = self.state_sum[grad_indices]  # _sparse_mask
-                    if gpu_id >= 0:
-                        std = std.cuda(gpu_id)
-                    std_values = std.sqrt_().add_(1e-10).unsqueeze(1)
-                    tmp = (-clr * grad_values / std_values)
-                    if tmp.device != device:
-                        tmp = tmp.to(device)
-                    # TODO(zhengda) the overhead is here.
-                    self.emb.index_add_(0, grad_indices, tmp)
-        self.trace = []
-
-    def create_async_update(self):
-        """Set up the async update subprocess.
-        """
-        self.async_q = Queue(1)
-        self.async_p = mp.Process(target=async_update, args=(self.args, self, self.async_q))
-        self.async_p.start()
-
-    def finish_async_update(self):
-        """Notify the async update subprocess to quit.
-        """
-        self.async_q.put((None, None, None))
-        self.async_p.join()
-
-    def curr_emb(self):
-        """Return embeddings in trace.
-        """
-        data = [data for _, data in self.trace]
-        return th.cat(data, 0)
-
-    def save(self, path, name):
-        """Save embeddings.
-
-        Parameters
-        ----------
-        path : str
-            Directory to save the embedding.
-        name : str
-            Embedding name.
-        """
-        file_name = os.path.join(path, name+'.npy')
-        np.save(file_name, self.emb.cpu().detach().numpy())
-
-    def load(self, path, name):
-        """Load embeddings.
-
-        Parameters
-        ----------
-        path : str
-            Directory to load the embedding.
-        name : str
-            Embedding name.
-        """
-        file_name = os.path.join(path, name+'.npy')
-        self.emb = th.Tensor(np.load(file_name))
--- a/apps/kg/partition.py
+++ b/apps/kg/partition.py
-# -*- coding: utf-8 -*-
-#
-# setup.py
-#
-# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from dataloader import get_dataset
-import scipy as sp
-import numpy as np
-import argparse
-import os
-import dgl
-from dgl import backend as F
-from dgl.data.utils import load_graphs, save_graphs
-
-def write_txt_graph(path, file_name, part_dict, total_nodes):
-    partition_book = [0] * total_nodes
-    for part_id in part_dict:
-        print('write graph %d...' % part_id)
-        # Get (h,r,t) triples
-        partition_path = path + str(part_id)
-        if not os.path.exists(partition_path):
-            os.mkdir(partition_path)
-        triple_file = os.path.join(partition_path, file_name)
-        f = open(triple_file, 'w')
-        graph = part_dict[part_id]
-        src, dst = graph.all_edges(form='uv', order='eid')
-        rel = graph.edata['tid']
-        assert len(src) == len(rel)
-        src = F.asnumpy(src)
-        dst = F.asnumpy(dst)
-        rel = F.asnumpy(rel)
-        for i in range(len(src)):
-            f.write(str(src[i])+'\t'+str(rel[i])+'\t'+str(dst[i])+'\n')
-        f.close()
-        # Get local2global
-        l2g_file = os.path.join(partition_path, 'local_to_global.txt')
-        f = open(l2g_file, 'w')
-        pid = F.asnumpy(graph.parent_nid)
-        for i in range(len(pid)):
-            f.write(str(pid[i])+'\n')
-        f.close()
-        # Update partition_book
-        partition = F.asnumpy(graph.ndata['part_id'])
-        for i in range(len(pid)):
-            partition_book[pid[i]] = partition[i]
-    # Write partition_book.txt
-    for part_id in part_dict:
-        partition_path = path + str(part_id)
-        pb_file = os.path.join(partition_path, 'partition_book.txt')
-        f = open(pb_file, 'w')
-        for i in range(len(partition_book)):
-            f.write(str(partition_book[i])+'\n')
-        f.close()
-
-def main():
-    parser = argparse.ArgumentParser(description='Partition a knowledge graph')
-    parser.add_argument('--data_path', type=str, default='data',
-                        help='root path of all dataset')
-    parser.add_argument('--dataset', type=str, default='FB15k',
-                        help='dataset name, under data_path')
-    parser.add_argument('--data_files', type=str, default=None, nargs='+',
-                        help='a list of data files, e.g. entity relation train valid test')
-    parser.add_argument('--format', type=str, default='built_in',
-                        help='the format of the dataset, it can be built_in,'\
-                                'raw_udd_{htr} and udd_{htr}')
-    parser.add_argument('-k', '--num-parts', required=True, type=int,
-                        help='The number of partitions')
-    args = parser.parse_args()
-    num_parts = args.num_parts
-
-    print('load dataset..')
-
-    # load dataset and samplers
-    dataset = get_dataset(args.data_path, args.dataset, args.format, args.data_files)
-
-    print('construct graph...')
-
-    src, etype_id, dst = dataset.train
-    coo = sp.sparse.coo_matrix((np.ones(len(src)), (src, dst)),
-            shape=[dataset.n_entities, dataset.n_entities])
-    g = dgl.DGLGraph(coo, readonly=True, multigraph=True, sort_csr=True)
-    g.edata['tid'] = F.tensor(etype_id, F.int64)
-
-    print('partition graph...')
-
-    part_dict = dgl.transforms.metis_partition(g, num_parts, 1)
-
-    tot_num_inner_edges = 0
-    for part_id in part_dict:
-        part = part_dict[part_id]
-
-        num_inner_nodes = len(np.nonzero(F.asnumpy(part.ndata['inner_node']))[0])
-        num_inner_edges = len(np.nonzero(F.asnumpy(part.edata['inner_edge']))[0])
-        print('part {} has {} nodes and {} edges. {} nodes and {} edges are inside the partition'.format(
-              part_id, part.number_of_nodes(), part.number_of_edges(),
-              num_inner_nodes, num_inner_edges))
-        tot_num_inner_edges += num_inner_edges
-
-        part.copy_from_parent()
-
-    print('write graph to txt file...')
-
-    txt_file_graph = os.path.join(args.data_path, args.dataset)
-    txt_file_graph = os.path.join(txt_file_graph, 'partition_')
-    write_txt_graph(txt_file_graph, 'train.txt', part_dict, g.number_of_nodes())
-
-    print('there are {} edges in the graph and {} edge cuts for {} partitions.'.format(
-        g.number_of_edges(), g.number_of_edges() - tot_num_inner_edges, len(part_dict)))
-
-if __name__ == '__main__':
-    main()
\ No newline at end of file
--- a/apps/kg/tests/test_score.py
+++ b/apps/kg/tests/test_score.py
-# -*- coding: utf-8 -*-
-#
-# setup.py
-#
-# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-import os
-import scipy as sp
-import dgl
-import numpy as np
-import dgl.backend as F
-import dgl
-
-backend = os.environ.get('DGLBACKEND', 'pytorch')
-if backend.lower() == 'mxnet':
-    import mxnet as mx
-    mx.random.seed(42)
-    np.random.seed(42)
-
-    from models.mxnet.score_fun import *
-    from models.mxnet.tensor_models import ExternalEmbedding
-else:
-    import torch as th
-    th.manual_seed(42)
-    np.random.seed(42)
-
-    from models.pytorch.score_fun import *
-    from models.pytorch.tensor_models import ExternalEmbedding
-from models.general_models import KEModel
-from dataloader.sampler import create_neg_subgraph
-
-class dotdict(dict):
-    """dot.notation access to dictionary attributes"""
-    __getattr__ = dict.get
-    __setattr__ = dict.__setitem__
-    __delattr__ = dict.__delitem__
-
-def generate_rand_graph(n, func_name):
-    arr = (sp.sparse.random(n, n, density=0.1, format='coo') != 0).astype(np.int64)
-    g = dgl.DGLGraph(arr, readonly=True)
-    num_rels = 10
-    entity_emb = F.uniform((g.number_of_nodes(), 10), F.float32, F.cpu(), 0, 1)
-    if func_name == 'RotatE':
-        entity_emb = F.uniform((g.number_of_nodes(), 20), F.float32, F.cpu(), 0, 1)
-    rel_emb = F.uniform((num_rels, 10), F.float32, F.cpu(), -1, 1)
-    if func_name == 'RESCAL':
-        rel_emb = F.uniform((num_rels, 10*10), F.float32, F.cpu(), 0, 1)
-    g.ndata['id'] = F.arange(0, g.number_of_nodes())
-    rel_ids = np.random.randint(0, num_rels, g.number_of_edges(), dtype=np.int64)
-    g.edata['id'] = F.tensor(rel_ids, F.int64)
-    # TransR have additional projection_emb
-    if (func_name == 'TransR'):
-        args = {'gpu':-1, 'lr':0.1}
-        args = dotdict(args)
-        projection_emb = ExternalEmbedding(args, 10, 10 * 10, F.cpu())
-        return g, entity_emb, rel_emb, (12.0, projection_emb, 10, 10)
-    elif (func_name == 'TransE'):
-        return g, entity_emb, rel_emb, (12.0)
-    elif (func_name == 'TransE_l1'):
-        return g, entity_emb, rel_emb, (12.0, 'l1')
-    elif (func_name == 'TransE_l2'):
-        return g, entity_emb, rel_emb, (12.0, 'l2')
-    elif (func_name == 'RESCAL'):
-        return g, entity_emb, rel_emb, (10, 10)
-    elif (func_name == 'RotatE'):
-        return g, entity_emb, rel_emb, (12.0, 1.0)
-    else:
-        return g, entity_emb, rel_emb, None
-
-ke_score_funcs = {'TransE': TransEScore,
-                  'TransE_l1': TransEScore,
-                  'TransE_l2': TransEScore,
-                  'DistMult': DistMultScore,
-                  'ComplEx': ComplExScore,
-                  'RESCAL': RESCALScore,
-                  'TransR': TransRScore,
-                  'RotatE': RotatEScore}
-
-class BaseKEModel:
-    def __init__(self, score_func, entity_emb, rel_emb):
-        self.score_func = score_func
-        self.head_neg_score = self.score_func.create_neg(True)
-        self.tail_neg_score = self.score_func.create_neg(False)
-        self.head_neg_prepare = self.score_func.create_neg_prepare(True)
-        self.tail_neg_prepare = self.score_func.create_neg_prepare(False)
-        self.entity_emb = entity_emb
-        self.rel_emb = rel_emb
-        # init score_func specific data if needed
-        self.score_func.reset_parameters()
-
-    def predict_score(self, g):
-        g.ndata['emb'] = self.entity_emb[g.ndata['id']]
-        g.edata['emb'] = self.rel_emb[g.edata['id']]
-        self.score_func.prepare(g, -1, False)
-        self.score_func(g)
-        return g.edata['score']
-
-    def predict_neg_score(self, pos_g, neg_g):
-        pos_g.ndata['emb'] = self.entity_emb[pos_g.ndata['id']]
-        pos_g.edata['emb'] = self.rel_emb[pos_g.edata['id']]
-        neg_g.ndata['emb'] = self.entity_emb[neg_g.ndata['id']]
-        neg_g.edata['emb'] = self.rel_emb[neg_g.edata['id']]
-        num_chunks = neg_g.num_chunks
-        chunk_size = neg_g.chunk_size
-        neg_sample_size = neg_g.neg_sample_size
-        if neg_g.neg_head:
-            neg_head_ids = neg_g.ndata['id'][neg_g.head_nid]
-            neg_head = self.entity_emb[neg_head_ids]
-            _, tail_ids = pos_g.all_edges(order='eid')
-            tail = pos_g.ndata['emb'][tail_ids]
-            rel = pos_g.edata['emb']
-
-            neg_head, tail = self.head_neg_prepare(pos_g.edata['id'], num_chunks, neg_head, tail, -1, False)
-            neg_score = self.head_neg_score(neg_head, rel, tail,
-                                            num_chunks, chunk_size, neg_sample_size)
-        else:
-            neg_tail_ids = neg_g.ndata['id'][neg_g.tail_nid]
-            neg_tail = self.entity_emb[neg_tail_ids]
-            head_ids, _ = pos_g.all_edges(order='eid')
-            head = pos_g.ndata['emb'][head_ids]
-            rel = pos_g.edata['emb']
-
-            head, neg_tail = self.tail_neg_prepare(pos_g.edata['id'], num_chunks, head, neg_tail, -1, False)
-            neg_score = self.tail_neg_score(head, rel, neg_tail,
-                                            num_chunks, chunk_size, neg_sample_size)
-
-        return neg_score
-
-def check_score_func(func_name):
-    batch_size = 10
-    neg_sample_size = 10
-    g, entity_emb, rel_emb, args = generate_rand_graph(100, func_name)
-    hidden_dim = entity_emb.shape[1]
-    ke_score_func = ke_score_funcs[func_name]
-    if args is None:
-        ke_score_func = ke_score_func()
-    elif type(args) is tuple:
-        ke_score_func = ke_score_func(*list(args))
-    else:
-        ke_score_func = ke_score_func(args)
-    model = BaseKEModel(ke_score_func, entity_emb, rel_emb)
-
-    EdgeSampler = getattr(dgl.contrib.sampling, 'EdgeSampler')
-    sampler = EdgeSampler(g, batch_size=batch_size,
-                          neg_sample_size=neg_sample_size,
-                          negative_mode='chunk-head',
-                          num_workers=1,
-                          shuffle=False,
-                          exclude_positive=False,
-                          return_false_neg=False)
-
-    for pos_g, neg_g in sampler:
-        neg_g = create_neg_subgraph(pos_g,
-                                    neg_g,
-                                    neg_sample_size,
-                                    neg_sample_size,
-                                    True,
-                                    True,
-                                    g.number_of_nodes())
-        pos_g.copy_from_parent()
-        neg_g.copy_from_parent()
-        score1 = F.reshape(model.predict_score(neg_g), (batch_size, -1))
-        score2 = model.predict_neg_score(pos_g, neg_g)
-        score2 = F.reshape(score2, (batch_size, -1))
-        np.testing.assert_allclose(F.asnumpy(score1), F.asnumpy(score2),
-                                   rtol=1e-5, atol=1e-5)
-
-def test_score_func_transe():
-    check_score_func('TransE')
-    check_score_func('TransE_l1')
-    check_score_func('TransE_l2')
-
-def test_score_func_distmult():
-    check_score_func('DistMult')
-
-def test_score_func_complex():
-    check_score_func('ComplEx')
-
-def test_score_func_rescal():
-    check_score_func('RESCAL')
-
-def test_score_func_transr():
-    check_score_func('TransR')
-
-def test_score_func_rotate():
-    check_score_func('RotatE')
-        
-if __name__ == '__main__':
-    test_score_func_transe()
-    test_score_func_distmult()
-    test_score_func_complex()
-    test_score_func_rescal()
-    test_score_func_transr()
-    test_score_func_rotate()
--- a/apps/kg/train.py
+++ b/apps/kg/train.py
-# -*- coding: utf-8 -*-
-#
-# setup.py
-#
-# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from dataloader import EvalDataset, TrainDataset, NewBidirectionalOneShotIterator
-from dataloader import get_dataset
-
-import argparse
-import os
-import logging
-import time
-import json
-
-from utils import get_compatible_batch_size
-
-backend = os.environ.get('DGLBACKEND', 'pytorch')
-if backend.lower() == 'mxnet':
-    import multiprocessing as mp
-    from train_mxnet import load_model
-    from train_mxnet import train
-    from train_mxnet import test
-else:
-    import torch.multiprocessing as mp
-    from train_pytorch import load_model
-    from train_pytorch import train, train_mp
-    from train_pytorch import test, test_mp
-
-class ArgParser(argparse.ArgumentParser):
-    def __init__(self):
-        super(ArgParser, self).__init__()
-
-        self.add_argument('--model_name', default='TransE',
-                          choices=['TransE', 'TransE_l1', 'TransE_l2', 'TransR',
-                                   'RESCAL', 'DistMult', 'ComplEx', 'RotatE'],
-                          help='model to use')
-        self.add_argument('--data_path', type=str, default='data',
-                          help='root path of all dataset')
-        self.add_argument('--dataset', type=str, default='FB15k',
-                          help='dataset name, under data_path')
-        self.add_argument('--format', type=str, default='built_in',
-                          help='the format of the dataset, it can be built_in,'\
-                                'raw_udd_{htr} and udd_{htr}')
-        self.add_argument('--data_files', type=str, default=None, nargs='+',
-                          help='a list of data files, e.g. entity relation train valid test')
-        self.add_argument('--save_path', type=str, default='ckpts',
-                          help='place to save models and logs')
-        self.add_argument('--save_emb', type=str, default=None,
-                          help='save the embeddings in the specific location.')
-        self.add_argument('--max_step', type=int, default=80000,
-                          help='train xx steps')
-        self.add_argument('--batch_size', type=int, default=1024,
-                          help='batch size')
-        self.add_argument('--batch_size_eval', type=int, default=8,
-                          help='batch size used for eval and test')
-        self.add_argument('--neg_sample_size', type=int, default=128,
-                          help='negative sampling size')
-        self.add_argument('--neg_deg_sample', action='store_true',
-                          help='negative sample proportional to vertex degree in the training')
-        self.add_argument('--neg_deg_sample_eval', action='store_true',
-                          help='negative sampling proportional to vertex degree in the evaluation')
-        self.add_argument('--neg_sample_size_eval', type=int, default=-1,
-                          help='negative sampling size for evaluation')
-        self.add_argument('--eval_percent', type=float, default=1,
-                          help='sample some percentage for evaluation.')
-        self.add_argument('--hidden_dim', type=int, default=256,
-                          help='hidden dim used by relation and entity')
-        self.add_argument('--lr', type=float, default=0.0001,
-                          help='learning rate')
-        self.add_argument('-g', '--gamma', type=float, default=12.0,
-                          help='margin value')
-        self.add_argument('--no_eval_filter', action='store_true',
-                          help='do not filter positive edges among negative edges for evaluation')
-        self.add_argument('--gpu', type=int, default=[-1], nargs='+', 
-                          help='a list of active gpu ids, e.g. 0 1 2 4')
-        self.add_argument('--mix_cpu_gpu', action='store_true',
-                          help='mix CPU and GPU training')
-        self.add_argument('-de', '--double_ent', action='store_true',
-                          help='double entitiy dim for complex number')
-        self.add_argument('-dr', '--double_rel', action='store_true',
-                          help='double relation dim for complex number')
-        self.add_argument('-log', '--log_interval', type=int, default=1000,
-                          help='do evaluation after every x steps')
-        self.add_argument('--eval_interval', type=int, default=10000,
-                          help='do evaluation after every x steps')
-        self.add_argument('-adv', '--neg_adversarial_sampling', action='store_true',
-                          help='if use negative adversarial sampling')
-        self.add_argument('-a', '--adversarial_temperature', default=1.0, type=float,
-                          help='adversarial_temperature')
-        self.add_argument('--valid', action='store_true',
-                          help='if valid a model')
-        self.add_argument('--test', action='store_true',
-                          help='if test a model')
-        self.add_argument('-rc', '--regularization_coef', type=float, default=0.000002,
-                          help='set value > 0.0 if regularization is used')
-        self.add_argument('-rn', '--regularization_norm', type=int, default=3,
-                          help='norm used in regularization')
-        self.add_argument('--non_uni_weight', action='store_true',
-                          help='if use uniform weight when computing loss')
-        self.add_argument('--pickle_graph', action='store_true',
-                          help='pickle built graph, building a huge graph is slow.')
-        self.add_argument('--num_proc', type=int, default=1,
-                          help='number of process used')
-        self.add_argument('--num_thread', type=int, default=1,
-                          help='number of thread used')
-        self.add_argument('--rel_part', action='store_true',
-                          help='enable relation partitioning')
-        self.add_argument('--soft_rel_part', action='store_true',
-                          help='enable soft relation partition')
-        self.add_argument('--async_update', action='store_true',
-                          help='allow async_update on node embedding')
-        self.add_argument('--force_sync_interval', type=int, default=-1,
-                          help='We force a synchronization between processes every x steps')
-
-
-def get_logger(args):
-    if not os.path.exists(args.save_path):
-        os.mkdir(args.save_path)
-
-    folder = '{}_{}_'.format(args.model_name, args.dataset)
-    n = len([x for x in os.listdir(args.save_path) if x.startswith(folder)])
-    folder += str(n)
-    args.save_path = os.path.join(args.save_path, folder)
-
-    if not os.path.exists(args.save_path):
-        os.makedirs(args.save_path)
-    log_file = os.path.join(args.save_path, 'train.log')
-
-    logging.basicConfig(
-        format='%(asctime)s %(levelname)-8s %(message)s',
-        level=logging.INFO,
-        datefmt='%Y-%m-%d %H:%M:%S',
-        filename=log_file,
-        filemode='w'
-    )
-
-    logger = logging.getLogger(__name__)
-    print("Logs are being recorded at: {}".format(log_file))
-    return logger
-
-
-def run(args, logger):
-    init_time_start = time.time()
-    # load dataset and samplers
-    dataset = get_dataset(args.data_path, args.dataset, args.format, args.data_files)
-
-    if args.neg_sample_size_eval < 0:
-        args.neg_sample_size_eval = dataset.n_entities
-    args.batch_size = get_compatible_batch_size(args.batch_size, args.neg_sample_size)
-    args.batch_size_eval = get_compatible_batch_size(args.batch_size_eval, args.neg_sample_size_eval)
-
-    args.eval_filter = not args.no_eval_filter
-    if args.neg_deg_sample_eval:
-        assert not args.eval_filter, "if negative sampling based on degree, we can't filter positive edges."
-
-    train_data = TrainDataset(dataset, args, ranks=args.num_proc)
-    # if there is no cross partition relaiton, we fall back to strict_rel_part
-    args.strict_rel_part = args.mix_cpu_gpu and (train_data.cross_part == False)
-    args.soft_rel_part = args.mix_cpu_gpu and args.soft_rel_part and train_data.cross_part
-    args.num_workers = 8 # fix num_worker to 8
-
-    if args.num_proc > 1:
-        train_samplers = []
-        for i in range(args.num_proc):
-            train_sampler_head = train_data.create_sampler(args.batch_size,
-                                                           args.neg_sample_size,
-                                                           args.neg_sample_size,
-                                                           mode='head',
-                                                           num_workers=args.num_workers,
-                                                           shuffle=True,
-                                                           exclude_positive=False,
-                                                           rank=i)
-            train_sampler_tail = train_data.create_sampler(args.batch_size,
-                                                           args.neg_sample_size,
-                                                           args.neg_sample_size,
-                                                           mode='tail',
-                                                           num_workers=args.num_workers,
-                                                           shuffle=True,
-                                                           exclude_positive=False,
-                                                           rank=i)
-            train_samplers.append(NewBidirectionalOneShotIterator(train_sampler_head, train_sampler_tail,
-                                                                  args.neg_sample_size, args.neg_sample_size,
-                                                                  True, dataset.n_entities))
-
-        train_sampler = NewBidirectionalOneShotIterator(train_sampler_head, train_sampler_tail,
-                                                        args.neg_sample_size, args.neg_sample_size,
-                                                       True, dataset.n_entities)
-    else: # This is used for debug
-        train_sampler_head = train_data.create_sampler(args.batch_size,
-                                                       args.neg_sample_size,
-                                                       args.neg_sample_size,
-                                                       mode='head',
-                                                       num_workers=args.num_workers,
-                                                       shuffle=True,
-                                                       exclude_positive=False)
-        train_sampler_tail = train_data.create_sampler(args.batch_size,
-                                                       args.neg_sample_size,
-                                                       args.neg_sample_size,
-                                                       mode='tail',
-                                                       num_workers=args.num_workers,
-                                                       shuffle=True,
-                                                       exclude_positive=False)
-        train_sampler = NewBidirectionalOneShotIterator(train_sampler_head, train_sampler_tail,
-                                                        args.neg_sample_size, args.neg_sample_size,
-                                                        True, dataset.n_entities)
-
-
-    if args.valid or args.test:
-        if len(args.gpu) > 1:
-            args.num_test_proc = args.num_proc if args.num_proc < len(args.gpu) else len(args.gpu)
-        else:
-            args.num_test_proc = args.num_proc
-        eval_dataset = EvalDataset(dataset, args)
-
-    if args.valid:
-        if args.num_proc > 1:
-            valid_sampler_heads = []
-            valid_sampler_tails = []
-            for i in range(args.num_proc):
-                valid_sampler_head = eval_dataset.create_sampler('valid', args.batch_size_eval,
-                                                                  args.neg_sample_size_eval,
-                                                                  args.neg_sample_size_eval,
-                                                                  args.eval_filter,
-                                                                  mode='chunk-head',
-                                                                  num_workers=args.num_workers,
-                                                                  rank=i, ranks=args.num_proc)
-                valid_sampler_tail = eval_dataset.create_sampler('valid', args.batch_size_eval,
-                                                                  args.neg_sample_size_eval,
-                                                                  args.neg_sample_size_eval,
-                                                                  args.eval_filter,
-                                                                  mode='chunk-tail',
-                                                                  num_workers=args.num_workers,
-                                                                  rank=i, ranks=args.num_proc)
-                valid_sampler_heads.append(valid_sampler_head)
-                valid_sampler_tails.append(valid_sampler_tail)
-        else: # This is used for debug
-            valid_sampler_head = eval_dataset.create_sampler('valid', args.batch_size_eval,
-                                                             args.neg_sample_size_eval,
-                                                             args.neg_sample_size_eval,
-                                                             args.eval_filter,
-                                                             mode='chunk-head',
-                                                             num_workers=args.num_workers,
-                                                             rank=0, ranks=1)
-            valid_sampler_tail = eval_dataset.create_sampler('valid', args.batch_size_eval,
-                                                             args.neg_sample_size_eval,
-                                                             args.neg_sample_size_eval,
-                                                             args.eval_filter,
-                                                             mode='chunk-tail',
-                                                             num_workers=args.num_workers,
-                                                             rank=0, ranks=1)
-    if args.test:
-        if args.num_test_proc > 1:
-            test_sampler_tails = []
-            test_sampler_heads = []
-            for i in range(args.num_test_proc):
-                test_sampler_head = eval_dataset.create_sampler('test', args.batch_size_eval,
-                                                                 args.neg_sample_size_eval,
-                                                                 args.neg_sample_size_eval,
-                                                                 args.eval_filter,
-                                                                 mode='chunk-head',
-                                                                 num_workers=args.num_workers,
-                                                                 rank=i, ranks=args.num_test_proc)
-                test_sampler_tail = eval_dataset.create_sampler('test', args.batch_size_eval,
-                                                                 args.neg_sample_size_eval,
-                                                                 args.neg_sample_size_eval,
-                                                                 args.eval_filter,
-                                                                 mode='chunk-tail',
-                                                                 num_workers=args.num_workers,
-                                                                 rank=i, ranks=args.num_test_proc)
-                test_sampler_heads.append(test_sampler_head)
-                test_sampler_tails.append(test_sampler_tail)
-        else:
-            test_sampler_head = eval_dataset.create_sampler('test', args.batch_size_eval,
-                                                            args.neg_sample_size_eval,
-                                                            args.neg_sample_size_eval,
-                                                            args.eval_filter,
-                                                            mode='chunk-head',
-                                                            num_workers=args.num_workers,
-                                                            rank=0, ranks=1)
-            test_sampler_tail = eval_dataset.create_sampler('test', args.batch_size_eval,
-                                                            args.neg_sample_size_eval,
-                                                            args.neg_sample_size_eval,
-                                                            args.eval_filter,
-                                                            mode='chunk-tail',
-                                                            num_workers=args.num_workers,
-                                                            rank=0, ranks=1)
-
-    # load model
-    model = load_model(logger, args, dataset.n_entities, dataset.n_relations)
-    if args.num_proc > 1 or args.async_update:
-        model.share_memory()
-
-    # We need to free all memory referenced by dataset.
-    eval_dataset = None
-    dataset = None
-
-    print('Total initialize time {:.3f} seconds'.format(time.time() - init_time_start))
-
-    # train
-    start = time.time()
-    rel_parts = train_data.rel_parts if args.strict_rel_part or args.soft_rel_part else None
-    cross_rels = train_data.cross_rels if args.soft_rel_part else None
-    if args.num_proc > 1:
-        procs = []
-        barrier = mp.Barrier(args.num_proc)
-        for i in range(args.num_proc):
-            valid_sampler = [valid_sampler_heads[i], valid_sampler_tails[i]] if args.valid else None
-            proc = mp.Process(target=train_mp, args=(args,
-                                                     model,
-                                                     train_samplers[i],
-                                                     valid_sampler,
-                                                     i,
-                                                     rel_parts,
-                                                     cross_rels,
-                                                     barrier))
-            procs.append(proc)
-            proc.start()
-        for proc in procs:
-            proc.join()
-    else:
-        valid_samplers = [valid_sampler_head, valid_sampler_tail] if args.valid else None
-        train(args, model, train_sampler, valid_samplers, rel_parts=rel_parts)
-
-    print('training takes {} seconds'.format(time.time() - start))
-
-    if args.save_emb is not None:
-        if not os.path.exists(args.save_emb):
-            os.mkdir(args.save_emb)
-        model.save_emb(args.save_emb, args.dataset)
-
-        # We need to save the model configurations as well.
-        conf_file = os.path.join(args.save_emb, 'config.json')
-        with open(conf_file, 'w') as outfile:
-            json.dump({'dataset': args.dataset,
-                       'model': args.model_name,
-                       'emb_size': args.hidden_dim,
-                       'max_train_step': args.max_step,
-                       'batch_size': args.batch_size,
-                       'neg_sample_size': args.neg_sample_size,
-                       'lr': args.lr,
-                       'gamma': args.gamma,
-                       'double_ent': args.double_ent,
-                       'double_rel': args.double_rel,
-                       'neg_adversarial_sampling': args.neg_adversarial_sampling,
-                       'adversarial_temperature': args.adversarial_temperature,
-                       'regularization_coef': args.regularization_coef,
-                       'regularization_norm': args.regularization_norm},
-                       outfile, indent=4)
-
-    # test
-    if args.test:
-        start = time.time()
-        if args.num_test_proc > 1:
-            queue = mp.Queue(args.num_test_proc)
-            procs = []
-            for i in range(args.num_test_proc):
-                proc = mp.Process(target=test_mp, args=(args,
-                                                        model,
-                                                        [test_sampler_heads[i], test_sampler_tails[i]],
-                                                        i,
-                                                        'Test',
-                                                        queue))
-                procs.append(proc)
-                proc.start()
-
-            total_metrics = {}
-            metrics = {}
-            logs = []
-            for i in range(args.num_test_proc):
-                log = queue.get()
-                logs = logs + log
-            
-            for metric in logs[0].keys():
-                metrics[metric] = sum([log[metric] for log in logs]) / len(logs)
-            for k, v in metrics.items():
-                print('Test average {} : {}'.format(k, v))
-
-            for proc in procs:
-                proc.join()
-        else:
-            test(args, model, [test_sampler_head, test_sampler_tail])
-        print('testing takes {:.3f} seconds'.format(time.time() - start))
-
-if __name__ == '__main__':
-    args = ArgParser().parse_args()
-    logger = get_logger(args)
-    run(args, logger)
--- a/apps/kg/train_mxnet.py
+++ b/apps/kg/train_mxnet.py
-# -*- coding: utf-8 -*-
-#
-# setup.py
-#
-# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from models import KEModel
-
-import mxnet as mx
-from mxnet import gluon
-from mxnet import ndarray as nd
-
-import os
-import logging
-import time
-import json
-
-def load_model(logger, args, n_entities, n_relations, ckpt=None):
-    model = KEModel(args, args.model_name, n_entities, n_relations,
-                    args.hidden_dim, args.gamma,
-                    double_entity_emb=args.double_ent, double_relation_emb=args.double_rel)
-    if ckpt is not None:
-        assert False, "We do not support loading model emb for genernal Embedding"
-
-    logger.info('Load model {}'.format(args.model_name))
-    return model
-
-def load_model_from_checkpoint(logger, args, n_entities, n_relations, ckpt_path):
-    model = load_model(logger, args, n_entities, n_relations)
-    model.load_emb(ckpt_path, args.dataset)
-    return model
-
-def train(args, model, train_sampler, valid_samplers=None, rank=0, rel_parts=None, barrier=None):
-    assert args.num_proc <= 1, "MXNet KGE does not support multi-process now"
-    assert args.rel_part == False, "No need for relation partition in single process for MXNet KGE"
-    logs = []
-
-    for arg in vars(args):
-        logging.info('{:20}:{}'.format(arg, getattr(args, arg)))
-
-    if len(args.gpu) > 0:
-        gpu_id = args.gpu[rank % len(args.gpu)] if args.mix_cpu_gpu and args.num_proc > 1 else args.gpu[0]
-    else:
-        gpu_id = -1
-
-    if args.strict_rel_part:
-        model.prepare_relation(mx.gpu(gpu_id))
-
-    start = time.time()
-    for step in range(0, args.max_step):
-        pos_g, neg_g = next(train_sampler)
-        args.step = step
-        with mx.autograd.record():
-            loss, log = model.forward(pos_g, neg_g, gpu_id)
-        loss.backward()
-        logs.append(log)
-        model.update(gpu_id)
-
-        if step % args.log_interval == 0:
-            for k in logs[0].keys():
-                v = sum(l[k] for l in logs) / len(logs)
-                print('[Train]({}/{}) average {}: {}'.format(step, args.max_step, k, v))
-            logs = []
-            print(time.time() - start)
-            start = time.time()
-
-        if args.valid and step % args.eval_interval == 0 and step > 1 and valid_samplers is not None:
-            start = time.time()
-            test(args, model, valid_samplers, mode='Valid')
-            print('test:', time.time() - start)
-    if args.strict_rel_part:
-        model.writeback_relation(rank, rel_parts)
-
-    # clear cache
-    logs = []
-
-def test(args, model, test_samplers, rank=0, mode='Test', queue=None):
-    assert args.num_proc <= 1, "MXNet KGE does not support multi-process now"
-    logs = []
-
-    if len(args.gpu) > 0:
-        gpu_id = args.gpu[rank % len(args.gpu)] if args.mix_cpu_gpu and args.num_proc > 1 else args.gpu[0]
-    else:
-        gpu_id = -1
-
-    if args.strict_rel_part:
-        model.load_relation(mx.gpu(gpu_id))
-
-    for sampler in test_samplers:
-        #print('Number of tests: ' + len(sampler))
-        count = 0
-        for pos_g, neg_g in sampler:
-            model.forward_test(pos_g, neg_g, logs, gpu_id)
-
-    metrics = {}
-    if len(logs) > 0:
-        for metric in logs[0].keys():
-            metrics[metric] = sum([log[metric] for log in logs]) / len(logs)
-
-    for k, v in metrics.items():
-        print('{} average {}: {}'.format(mode, k, v))
-    for i in range(len(test_samplers)):
-        test_samplers[i] = test_samplers[i].reset()
--- a/apps/kg/train_pytorch.py
+++ b/apps/kg/train_pytorch.py
-# -*- coding: utf-8 -*-
-#
-# setup.py
-#
-# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from models import KEModel
-
-import torch.multiprocessing as mp
-from torch.utils.data import DataLoader
-import torch.optim as optim
-import torch as th
-
-from distutils.version import LooseVersion
-TH_VERSION = LooseVersion(th.__version__)
-if TH_VERSION.version[0] == 1 and TH_VERSION.version[1] < 2:
-    raise Exception("DGL-ke has to work with Pytorch version >= 1.2")
-from models.pytorch.tensor_models import thread_wrapped_func
-
-import os
-import logging
-import time
-from functools import wraps
-
-import dgl
-from dgl.contrib import KVClient
-import dgl.backend as F
-
-from dataloader import EvalDataset
-from dataloader import get_dataset
-
-class KGEClient(KVClient):
-    """User-defined kvclient for DGL-KGE
-    """
-    def _push_handler(self, name, ID, data, target):
-        """Row-Sparse Adagrad updater
-        """
-        original_name = name[0:-6]
-        state_sum = target[original_name+'_state-data-']
-        grad_sum = (data * data).mean(1)
-        state_sum.index_add_(0, ID, grad_sum)
-        std = state_sum[ID]  # _sparse_mask
-        std_values = std.sqrt_().add_(1e-10).unsqueeze(1)
-        tmp = (-self.clr * data / std_values)
-        target[name].index_add_(0, ID, tmp)
-
-
-    def set_clr(self, learning_rate):
-        """Set learning rate
-        """
-        self.clr = learning_rate
-
-
-    def set_local2global(self, l2g):
-        self._l2g = l2g
-
-
-    def get_local2global(self):
-        return self._l2g
-
-
-def connect_to_kvstore(args, entity_pb, relation_pb, l2g):
-    """Create kvclient and connect to kvstore service
-    """
-    server_namebook = dgl.contrib.read_ip_config(filename=args.ip_config)
-
-    my_client = KGEClient(server_namebook=server_namebook)
-
-    my_client.set_clr(args.lr)
-
-    my_client.connect()
-
-    if my_client.get_id() % args.num_client == 0:
-        my_client.set_partition_book(name='entity_emb', partition_book=entity_pb)
-        my_client.set_partition_book(name='relation_emb', partition_book=relation_pb)
-    else:
-        my_client.set_partition_book(name='entity_emb')
-        my_client.set_partition_book(name='relation_emb')
-
-    my_client.set_local2global(l2g)
-
-    return my_client
-
-
-def load_model(logger, args, n_entities, n_relations, ckpt=None):
-    model = KEModel(args, args.model_name, n_entities, n_relations,
-                    args.hidden_dim, args.gamma,
-                    double_entity_emb=args.double_ent, double_relation_emb=args.double_rel)
-    if ckpt is not None:
-        assert False, "We do not support loading model emb for genernal Embedding"
-    return model
-
-
-def load_model_from_checkpoint(logger, args, n_entities, n_relations, ckpt_path):
-    model = load_model(logger, args, n_entities, n_relations)
-    model.load_emb(ckpt_path, args.dataset)
-    return model
-
-def train(args, model, train_sampler, valid_samplers=None, rank=0, rel_parts=None, cross_rels=None, barrier=None, client=None):
-    logs = []
-    for arg in vars(args):
-        logging.info('{:20}:{}'.format(arg, getattr(args, arg)))
-
-    if len(args.gpu) > 0:
-        gpu_id = args.gpu[rank % len(args.gpu)] if args.mix_cpu_gpu and args.num_proc > 1 else args.gpu[0]
-    else:
-        gpu_id = -1
-
-    if args.async_update:
-        model.create_async_update()
-    if args.strict_rel_part or args.soft_rel_part:
-        model.prepare_relation(th.device('cuda:' + str(gpu_id)))
-    if args.soft_rel_part:
-        model.prepare_cross_rels(cross_rels)
-
-    train_start = start = time.time()
-    sample_time = 0
-    update_time = 0
-    forward_time = 0
-    backward_time = 0
-    for step in range(0, args.max_step):
-        start1 = time.time()
-        pos_g, neg_g = next(train_sampler)
-        sample_time += time.time() - start1
-
-        if client is not None:
-            model.pull_model(client, pos_g, neg_g)
-
-        start1 = time.time()
-        loss, log = model.forward(pos_g, neg_g, gpu_id)
-        forward_time += time.time() - start1
-
-        start1 = time.time()
-        loss.backward()
-        backward_time += time.time() - start1
-
-        start1 = time.time()
-        if client is not None:
-            model.push_gradient(client)
-        else:
-            model.update(gpu_id)
-        update_time += time.time() - start1
-        logs.append(log)
-
-        # force synchronize embedding across processes every X steps
-        if args.force_sync_interval > 0 and \
-            (step + 1) % args.force_sync_interval == 0:
-            barrier.wait()
-
-        if (step + 1) % args.log_interval == 0:
-            for k in logs[0].keys():
-                v = sum(l[k] for l in logs) / len(logs)
-                print('[{}][Train]({}/{}) average {}: {}'.format(rank, (step + 1), args.max_step, k, v))
-            logs = []
-            print('[{}][Train] {} steps take {:.3f} seconds'.format(rank, args.log_interval,
-                                                            time.time() - start))
-            print('[{}]sample: {:.3f}, forward: {:.3f}, backward: {:.3f}, update: {:.3f}'.format(
-                rank, sample_time, forward_time, backward_time, update_time))
-            sample_time = 0
-            update_time = 0
-            forward_time = 0
-            backward_time = 0
-            start = time.time()
-
-        if args.valid and (step + 1) % args.eval_interval == 0 and step > 1 and valid_samplers is not None:
-            valid_start = time.time()
-            if args.strict_rel_part or args.soft_rel_part:
-                model.writeback_relation(rank, rel_parts)
-            # forced sync for validation
-            if barrier is not None:
-                barrier.wait()
-            test(args, model, valid_samplers, rank, mode='Valid')
-            print('validation take {:.3f} seconds:'.format(time.time() - valid_start))
-            if args.soft_rel_part:
-                model.prepare_cross_rels(cross_rels)
-            if barrier is not None:
-                barrier.wait()
-
-    print('train {} takes {:.3f} seconds'.format(rank, time.time() - train_start))
-    if args.async_update:
-        model.finish_async_update()
-    if args.strict_rel_part or args.soft_rel_part:
-        model.writeback_relation(rank, rel_parts)
-
-def test(args, model, test_samplers, rank=0, mode='Test', queue=None):
-    if len(args.gpu) > 0:
-        gpu_id = args.gpu[rank % len(args.gpu)] if args.mix_cpu_gpu and args.num_proc > 1 else args.gpu[0]
-    else:
-        gpu_id = -1
-
-    if args.strict_rel_part or args.soft_rel_part:
-        model.load_relation(th.device('cuda:' + str(gpu_id)))
-
-    with th.no_grad():
-        logs = []
-        for sampler in test_samplers:
-            for pos_g, neg_g in sampler:
-                model.forward_test(pos_g, neg_g, logs, gpu_id)
-
-        metrics = {}
-        if len(logs) > 0:
-            for metric in logs[0].keys():
-                metrics[metric] = sum([log[metric] for log in logs]) / len(logs)
-        if queue is not None:
-            queue.put(logs)
-        else:
-            for k, v in metrics.items():
-                print('[{}]{} average {}: {}'.format(rank, mode, k, v))
-    test_samplers[0] = test_samplers[0].reset()
-    test_samplers[1] = test_samplers[1].reset()
-
-@thread_wrapped_func
-def train_mp(args, model, train_sampler, valid_samplers=None, rank=0, rel_parts=None, cross_rels=None, barrier=None):
-    if args.num_proc > 1:
-        th.set_num_threads(args.num_thread)
-    train(args, model, train_sampler, valid_samplers, rank, rel_parts, cross_rels, barrier)
-
-@thread_wrapped_func
-def test_mp(args, model, test_samplers, rank=0, mode='Test', queue=None):
-    if args.num_proc > 1:
-        th.set_num_threads(args.num_thread)
-    test(args, model, test_samplers, rank, mode, queue)
-
-@thread_wrapped_func
-def dist_train_test(args, model, train_sampler, entity_pb, relation_pb, l2g, rank=0, rel_parts=None, cross_rels=None, barrier=None):
-    if args.num_proc > 1:
-        th.set_num_threads(args.num_thread)
-
-    client = connect_to_kvstore(args, entity_pb, relation_pb, l2g)
-    client.barrier()
-    train_time_start = time.time()
-    train(args, model, train_sampler, None, rank, rel_parts, cross_rels, barrier, client)
-    client.barrier()
-    print('Total train time {:.3f} seconds'.format(time.time() - train_time_start))
-
-    model = None
-
-    if client.get_id() % args.num_client == 0: # pull full model from kvstore
-
-        args.num_test_proc = args.num_client
-        dataset_full = get_dataset(args.data_path, args.dataset, args.format)
-
-        print('Full data n_entities: ' + str(dataset_full.n_entities))
-        print("Full data n_relations: " + str(dataset_full.n_relations))
-
-        model_test = load_model(None, args, dataset_full.n_entities, dataset_full.n_relations)
-        eval_dataset = EvalDataset(dataset_full, args)
-
-        if args.test:
-            model_test.share_memory()
-
-        if args.neg_sample_size_eval < 0:
-            args.neg_sample_size_eval = dataset_full.n_entities
-        args.eval_filter = not args.no_eval_filter
-        if args.neg_deg_sample_eval:
-            assert not args.eval_filter, "if negative sampling based on degree, we can't filter positive edges."
-
-        print("Pull relation_emb ...")
-        relation_id = F.arange(0, model_test.n_relations)
-        relation_data = client.pull(name='relation_emb', id_tensor=relation_id)
-        model_test.relation_emb.emb[relation_id] = relation_data
- 
-        print("Pull entity_emb ... ")
-        # split model into 100 small parts
-        start = 0
-        percent = 0
-        entity_id = F.arange(0, model_test.n_entities)
-        count = int(model_test.n_entities / 100)
-        end = start + count
-        while True:
-            print("Pull %d / 100 ..." % percent)
-            if end >= model_test.n_entities:
-                end = -1
-            tmp_id = entity_id[start:end]
-            entity_data = client.pull(name='entity_emb', id_tensor=tmp_id)
-            model_test.entity_emb.emb[tmp_id] = entity_data
-            if end == -1:
-                break
-            start = end
-            end += count
-            percent += 1
-
-            if args.save_emb is not None:
-                if not os.path.exists(args.save_emb):
-                    os.mkdir(args.save_emb)
-                model.save_emb(args.save_emb, args.dataset)
-
-        if args.test:
-            args.num_thread = 1
-            test_sampler_tails = []
-            test_sampler_heads = []
-            for i in range(args.num_test_proc):
-                test_sampler_head = eval_dataset.create_sampler('test', args.batch_size_eval,
-                                                                args.neg_sample_size_eval,
-                                                                args.neg_sample_size_eval,
-                                                                args.eval_filter,
-                                                                mode='chunk-head',
-                                                                num_workers=args.num_workers,
-                                                                rank=i, ranks=args.num_test_proc)
-                test_sampler_tail = eval_dataset.create_sampler('test', args.batch_size_eval,
-                                                                args.neg_sample_size_eval,
-                                                                args.neg_sample_size_eval,
-                                                                args.eval_filter,
-                                                                mode='chunk-tail',
-                                                                num_workers=args.num_workers,
-                                                                rank=i, ranks=args.num_test_proc)
-                test_sampler_heads.append(test_sampler_head)
-                test_sampler_tails.append(test_sampler_tail)
-
-            eval_dataset = None
-            dataset_full = None
-
-            print("Run test, test processes: %d" % args.num_test_proc)
-
-            queue = mp.Queue(args.num_test_proc)
-            procs = []
-            for i in range(args.num_test_proc):
-                proc = mp.Process(target=test_mp, args=(args,
-                                                        model_test,
-                                                        [test_sampler_heads[i], test_sampler_tails[i]],
-                                                        i,
-                                                        'Test',
-                                                        queue))
-                procs.append(proc)
-                proc.start()
-
-            total_metrics = {}
-            metrics = {}
-            logs = []
-            for i in range(args.num_test_proc):
-                log = queue.get()
-                logs = logs + log
-            
-            for metric in logs[0].keys():
-                metrics[metric] = sum([log[metric] for log in logs]) / len(logs)
-            for k, v in metrics.items():
-                print('Test average {} : {}'.format(k, v))
-
-            for proc in procs:
-                proc.join()
-
-        if client.get_id() == 0:
-            client.shut_down()
--- a/apps/kg/utils.py
+++ b/apps/kg/utils.py
-# -*- coding: utf-8 -*-
-#
-# setup.py
-#
-# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-import math
-
-def get_compatible_batch_size(batch_size, neg_sample_size):
-    if neg_sample_size < batch_size and batch_size % neg_sample_size != 0:
-        old_batch_size = batch_size
-        batch_size = int(math.ceil(batch_size / neg_sample_size) * neg_sample_size)
-        print('batch size ({}) is incompatible to the negative sample size ({}). Change the batch size to {}'.format(
-            old_batch_size, neg_sample_size, batch_size))
-    return batch_size
--- a/docs/source/api/python/dgl.contrib.UnifiedTensor.rst
+++ b/docs/source/api/python/dgl.contrib.UnifiedTensor.rst
-.. _apiunifiedtensor:
-
-dgl.contrib.UnifiedTensor
-=========
-
-.. automodule:: dgl.contrib
-
-UnifiedTensor enables direct CPU memory access from GPU.
-This feature is especially useful when GPUs need to access sparse data structure stored in CPU memory for several reasons (e.g., when node features do not fit in GPU memory).
-Without using this feature, sparsely structured data located in CPU memory must be gathered (or packed) before transferring it to the GPU memory because GPU DMA engines can only transfer data in a block granularity.
-
-However, the gathering step wastes CPU cycles and increases the CPU to GPU data copy time.
-The goal of UnifiedTensor is to skip such CPU gathering step by letting GPUs to access even non-regular data in CPU memory.
-In a hardware-level, this function is enabled by NVIDIA GPUs' unified virtual address (UVM) and zero-copy access capabilities.
-For those who wish to further extend the capability of UnifiedTensor may read the following paper (`link <https://arxiv.org/abs/2103.03330>`_) which explains the underlying mechanism of UnifiedTensor in detail.
-
-
-Base Dataset Class
---------------------------
-
-.. autoclass:: UnifiedTensor
-    :members: __getitem__
--- a/docs/source/api/python/index.rst
+++ b/docs/source/api/python/index.rst
@@ -15,6 +15,5 @@ API Reference
   nn-mxnet
   dgl.ops
   dgl.sampling
-   dgl.contrib.UnifiedTensor
   udf
   transforms
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -52,7 +52,6 @@ Welcome to Deep Graph Library Tutorials and Documentation
   api/python/dgl.optim
   api/python/dgl.sampling
   api/python/dgl.multiprocessing
-   api/python/dgl.contrib.UnifiedTensor
   api/python/transforms
   api/python/udf
   api/python/dgl.sparse_v0

--- a/examples/mxnet/rgcn/entity_classify.py
+++ b/examples/mxnet/rgcn/entity_classify.py
@@ -16,7 +16,6 @@ from mxnet import gluon
 import mxnet.ndarray as F
 import dgl
 from dgl.nn.mxnet import RelGraphConv
-from dgl.contrib.data import load_data
 from functools import partial
 from dgl.data.rdf import AIFBDataset, MUTAGDataset, BGSDataset, AMDataset


--- a/python/dgl/__init__.py
+++ b/python/dgl/__init__.py
@@ -18,7 +18,6 @@ from .logging import enable_verbose_logging
 from .backend import load_backend, backend_name

 from . import function
-from . import contrib
 from . import container
 from . import distributed
 from . import random
@@ -53,6 +52,3 @@ from .frame import LazyFeature
 from .utils import apply_each
 from .global_config import is_libxsmm_enabled, use_libxsmm
 from .mpops import *
-
-from ._deprecate.graph import DGLGraph as DGLGraphStale
-from ._deprecate.nodeflow import *
--- a/python/dgl/_deprecate/__init__.py
+++ b/python/dgl/_deprecate/__init__.py
--- a/python/dgl/_deprecate/frame.py
+++ b/python/dgl/_deprecate/frame.py
-"""Columnar storage for DGLGraph."""
-from __future__ import absolute_import
-
-from collections import namedtuple
-from collections.abc import MutableMapping
-
-import numpy as np
-
-from .. import backend as F
-from ..base import DGLError, dgl_warning
-from ..init import zero_initializer
-from .. import utils
-
-class Scheme(namedtuple('Scheme', ['shape', 'dtype'])):
-    """The column scheme.
-
-    Parameters
-    ----------
-    shape : tuple of int
-        The feature shape.
-    dtype : backend-specific type object
-        The feature data type.
-    """
-    # Pickling torch dtypes could be problemetic; this is a workaround.
-    # I also have to create data_type_dict and reverse_data_type_dict
-    # attribute just for this bug.
-    # I raised an issue in PyTorch bug tracker:
-    # https://github.com/pytorch/pytorch/issues/14057
-    def __reduce__(self):
-        state = (self.shape, F.reverse_data_type_dict[self.dtype])
-        return self._reconstruct_scheme, state
-
-    @classmethod
-    def _reconstruct_scheme(cls, shape, dtype_str):
-        dtype = F.data_type_dict[dtype_str]
-        return cls(shape, dtype)
-
-def infer_scheme(tensor):
-    """Infer column scheme from the given tensor data.
-
-    Parameters
-    ---------
-    tensor : Tensor
-        The tensor data.
-
-    Returns
-    -------
-    Scheme
-        The column scheme.
-    """
-    return Scheme(tuple(F.shape(tensor)[1:]), F.dtype(tensor))
-
-class Column(object):
-    """A column is a compact store of features of multiple nodes/edges.
-
-    Currently, we use one dense tensor to batch all the feature tensors
-    together (along the first dimension).
-
-    Parameters
-    ----------
-    data : Tensor
-        The initial data of the column.
-    scheme : Scheme, optional
-        The scheme of the column. Will be inferred if not provided.
-
-    Attributes
-    ----------
-    data : Tensor
-        The data of the column.
-    scheme : Scheme
-        The scheme of the column.
-    """
-    def __init__(self, data, scheme=None):
-        self.data = data
-        self.scheme = scheme if scheme else infer_scheme(data)
-
-    def __len__(self):
-        """The column length."""
-        return F.shape(self.data)[0]
-
-    @property
-    def shape(self):
-        """Return the scheme shape (feature shape) of this column."""
-        return self.scheme.shape
-
-    def __getitem__(self, idx):
-        """Return the feature data given the index.
-
-        Parameters
-        ----------
-        idx : utils.Index
-            The index.
-
-        Returns
-        -------
-        Tensor
-            The feature data
-        """
-        if idx.slice_data() is not None:
-            slc = idx.slice_data()
-            return F.narrow_row(self.data, slc.start, slc.stop)
-        else:
-            user_idx = idx.tousertensor(F.context(self.data))
-            return F.gather_row(self.data, user_idx)
-
-    def __setitem__(self, idx, feats):
-        """Update the feature data given the index.
-
-        The update is performed out-placely so it can be used in autograd mode.
-        For inplace write, please use ``update``.
-
-        Parameters
-        ----------
-        idx : utils.Index or slice
-            The index.
-        feats : Tensor
-            The new features.
-        """
-        self.update(idx, feats, inplace=False)
-
-    def update(self, idx, feats, inplace):
-        """Update the feature data given the index.
-
-        Parameters
-        ----------
-        idx : utils.Index
-            The index.
-        feats : Tensor
-            The new features.
-        inplace : bool
-            If true, use inplace write.
-        """
-        feat_scheme = infer_scheme(feats)
-        if feat_scheme != self.scheme:
-            raise DGLError("Cannot update column of scheme %s using feature of scheme %s."
-                           % (feat_scheme, self.scheme))
-
-        if inplace:
-            idx = idx.tousertensor(F.context(self.data))
-            F.scatter_row_inplace(self.data, idx, feats)
-        elif idx.slice_data() is not None:
-            # for contiguous indices narrow+concat is usually faster than scatter row
-            slc = idx.slice_data()
-            parts = [feats]
-            if slc.start > 0:
-                parts.insert(0, F.narrow_row(self.data, 0, slc.start))
-            if slc.stop < len(self):
-                parts.append(F.narrow_row(self.data, slc.stop, len(self)))
-            self.data = F.cat(parts, dim=0)
-        else:
-            idx = idx.tousertensor(F.context(self.data))
-            self.data = F.scatter_row(self.data, idx, feats)
-
-    def extend(self, feats, feat_scheme=None):
-        """Extend the feature data.
-
-         Parameters
-        ----------
-        feats : Tensor
-            The new features.
-        feat_scheme : Scheme, optional
-            The scheme
-        """
-        if feat_scheme is None:
-            feat_scheme = infer_scheme(feats)
-
-        if feat_scheme != self.scheme:
-            raise DGLError("Cannot update column of scheme %s using feature of scheme %s."
-                           % (feat_scheme, self.scheme))
-
-        feats = F.copy_to(feats, F.context(self.data))
-        self.data = F.cat([self.data, feats], dim=0)
-
-    def clone(self):
-        """Return a deepcopy of this column."""
-        return Column(F.clone(self.data), self.scheme)
-
-    @staticmethod
-    def create(data):
-        """Create a new column using the given data."""
-        if isinstance(data, Column):
-            return Column(data.data, data.scheme)
-        else:
-            return Column(data)
-
-    def __repr__(self):
-        return repr(self.data)
-
-class Frame(MutableMapping):
-    """The columnar storage for node/edge features.
-
-    The frame is a dictionary from feature fields to feature columns.
-    All columns should have the same number of rows (i.e. the same first dimension).
-
-    Parameters
-    ----------
-    data : dict-like, optional
-        The frame data in dictionary. If the provided data is another frame,
-        this frame will NOT share columns with the given frame. So any out-place
-        update on one will not reflect to the other. The inplace update will
-        be seen by both. This follows the semantic of python's container.
-    num_rows : int, optional [default=0]
-        The number of rows in this frame. If ``data`` is provided and is not empty,
-        ``num_rows`` will be ignored and inferred from the given data.
-    """
-    def __init__(self, data=None, num_rows=0):
-        if data is None:
-            self._columns = dict()
-            self._num_rows = num_rows
-        else:
-            # Note that we always create a new column for the given data.
-            # This avoids two frames accidentally sharing the same column.
-            self._columns = {k : Column.create(v) for k, v in data.items()}
-            if isinstance(data, (Frame, FrameRef)):
-                self._num_rows = data.num_rows
-            elif len(self._columns) != 0:
-                self._num_rows = len(next(iter(self._columns.values())))
-            else:
-                self._num_rows = num_rows
-            # sanity check
-            for name, col in self._columns.items():
-                if len(col) != self._num_rows:
-                    raise DGLError('Expected all columns to have same # rows (%d), '
-                                   'got %d on %r.' % (self._num_rows, len(col), name))
-        # Initializer for empty values. Initializer is a callable.
-        # If is none, then a warning will be raised
-        # in the first call and zero initializer will be used later.
-        self._initializers = {}  # per-column initializers
-        self._remote_init_builder = None
-        self._default_initializer = None
-
-    def _set_zero_default_initializer(self):
-        """Set the default initializer to be zero initializer."""
-        self._default_initializer = zero_initializer
-
-    def get_initializer(self, column=None):
-        """Get the initializer for empty values for the given column.
-
-        Parameters
-        ----------
-        column : str
-            The column
-
-        Returns
-        -------
-        callable
-            The initializer
-        """
-        return self._initializers.get(column, self._default_initializer)
-
-    def set_initializer(self, initializer, column=None):
-        """Set the initializer for empty values, for a given column or all future
-        columns.
-
-        Initializer is a callable that returns a tensor given the shape and data type.
-
-        Parameters
-        ----------
-        initializer : callable
-            The initializer.
-        column : str, optional
-            The column name
-        """
-        if column is None:
-            self._default_initializer = initializer
-        else:
-            self._initializers[column] = initializer
-
-    def set_remote_init_builder(self, builder):
-        """Set an initializer builder to create a remote initializer for a new column to a frame.
-
-        NOTE(minjie): This is a temporary solution. Will be replaced by KVStore in the future.
-
-        The builder is a callable that returns an initializer. The returned initializer
-        is also a callable that returns a tensor given a local tensor and tensor name.
-
-        Parameters
-        ----------
-        builder : callable
-            The builder to construct a remote initializer.
-        """
-        self._remote_init_builder = builder
-
-    def get_remote_initializer(self, name):
-        """Get a remote initializer.
-
-        NOTE(minjie): This is a temporary solution. Will be replaced by KVStore in the future.
-
-        Parameters
-        ----------
-        name : string
-            The column name.
-        """
-        if self._remote_init_builder is None:
-            return None
-
-        if self.get_initializer(name) is None:
-            self._set_zero_default_initializer()
-        initializer = self.get_initializer(name)
-        return self._remote_init_builder(initializer, name)
-
-    @property
-    def schemes(self):
-        """Return a dictionary of column name to column schemes."""
-        return {k : col.scheme for k, col in self._columns.items()}
-
-    @property
-    def num_columns(self):
-        """Return the number of columns in this frame."""
-        return len(self._columns)
-
-    @property
-    def num_rows(self):
-        """Return the number of rows in this frame."""
-        return self._num_rows
-
-    def __contains__(self, name):
-        """Return true if the given column name exists."""
-        return name in self._columns
-
-    def __getitem__(self, name):
-        """Return the column of the given name.
-
-        Parameters
-        ----------
-        name : str
-            The column name.
-
-        Returns
-        -------
-        Column
-            The column.
-        """
-        return self._columns[name]
-
-    def __setitem__(self, name, data):
-        """Update the whole column.
-
-        Parameters
-        ----------
-        name : str
-            The column name.
-        col : Column or data convertible to Column
-            The column data.
-        """
-        self.update_column(name, data)
-
-    def __delitem__(self, name):
-        """Delete the whole column.
-
-        Parameters
-        ----------
-        name : str
-            The column name.
-        """
-        del self._columns[name]
-
-    def add_column(self, name, scheme, ctx):
-        """Add a new column to the frame.
-
-        The frame will be initialized by the initializer.
-
-        Parameters
-        ----------
-        name : str
-            The column name.
-        scheme : Scheme
-            The column scheme.
-        ctx : DGLContext
-            The column context.
-        """
-        if name in self:
-            dgl_warning('Column "%s" already exists. Ignore adding this column again.' % name)
-            return
-
-        # If the data is backed by a remote server, we need to move data
-        # to the remote server.
-        initializer = self.get_remote_initializer(name)
-        if initializer is not None:
-            init_data = initializer((self.num_rows,) + scheme.shape, scheme.dtype, ctx)
-        else:
-            if self.get_initializer(name) is None:
-                self._set_zero_default_initializer()
-            initializer = self.get_initializer(name)
-            init_data = initializer((self.num_rows,) + scheme.shape, scheme.dtype,
-                                    ctx, slice(0, self.num_rows))
-        self._columns[name] = Column(init_data, scheme)
-
-    def add_rows(self, num_rows):
-        """Add blank rows to this frame.
-
-        For existing fields, the rows will be extended according to their
-        initializers.
-
-        Parameters
-        ----------
-        num_rows : int
-            The number of new rows
-        """
-        feat_placeholders = {}
-        for key, col in self._columns.items():
-            scheme = col.scheme
-            ctx = F.context(col.data)
-            if self.get_initializer(key) is None:
-                self._set_zero_default_initializer()
-            initializer = self.get_initializer(key)
-            new_data = initializer((num_rows,) + scheme.shape, scheme.dtype,
-                                   ctx, slice(self._num_rows, self._num_rows + num_rows))
-            feat_placeholders[key] = new_data
-        self._append(Frame(feat_placeholders))
-        self._num_rows += num_rows
-
-    def update_column(self, name, data):
-        """Add or replace the column with the given name and data.
-
-        Parameters
-        ----------
-        name : str
-            The column name.
-        data : Column or data convertible to Column
-            The column data.
-        """
-        # If the data is backed by a remote server, we need to move data
-        # to the remote server.
-        initializer = self.get_remote_initializer(name)
-        if initializer is not None:
-            new_data = initializer(F.shape(data), F.dtype(data), F.context(data))
-            new_data[:] = data
-            data = new_data
-        col = Column.create(data)
-        if len(col) != self.num_rows:
-            raise DGLError('Expected data to have %d rows, got %d.' %
-                           (self.num_rows, len(col)))
-        self._columns[name] = col
-
-    def _append(self, other):
-        assert self._remote_init_builder is None, \
-                "We don't support append if data in the frame is mapped from a remote server."
-        # NOTE: `other` can be empty.
-        if self.num_rows == 0:
-            # if no rows in current frame; append is equivalent to
-            # directly updating columns.
-            self._columns = {key: Column.create(data) for key, data in other.items()}
-        else:
-            # pad columns that are not provided in the other frame with initial values
-            for key, col in self.items():
-                if key in other:
-                    continue
-                scheme = col.scheme
-                ctx = F.context(col.data)
-                if self.get_initializer(key) is None:
-                    self._set_zero_default_initializer()
-                initializer = self.get_initializer(key)
-                new_data = initializer((other.num_rows,) + scheme.shape,
-                                       scheme.dtype, ctx,
-                                       slice(self._num_rows, self._num_rows + other.num_rows))
-                other[key] = new_data
-            # append other to self
-            for key, col in other.items():
-                if key not in self._columns:
-                    # the column does not exist; init a new column
-                    self.add_column(key, col.scheme, F.context(col.data))
-                self._columns[key].extend(col.data, col.scheme)
-
-    def append(self, other):
-        """Append another frame's data into this frame.
-
-        If the current frame is empty, it will just use the columns of the
-        given frame. Otherwise, the given data should contain all the
-        column keys of this frame.
-
-        Parameters
-        ----------
-        other : Frame or dict-like
-            The frame data to be appended.
-        """
-        if not isinstance(other, Frame):
-            other = Frame(other)
-        self._append(other)
-        self._num_rows += other.num_rows
-
-    def clear(self):
-        """Clear this frame. Remove all the columns."""
-        self._columns = {}
-        self._num_rows = 0
-
-    def __iter__(self):
-        """Return an iterator of columns."""
-        return iter(self._columns)
-
-    def __len__(self):
-        """Return the number of columns."""
-        return self.num_columns
-
-    def keys(self):
-        """Return the keys."""
-        return self._columns.keys()
-
-    def values(self):
-        """Return the values."""
-        return self._columns.values()
-
-    def clone(self):
-        """Return a clone of this frame.
-
-        The clone frame does not share the underlying storage with this frame,
-        i.e., adding or removing columns will not be visible to each other. However,
-        they still share the tensor contents so any mutable operation on the column
-        tensor are visible to each other. Hence, the function does not allocate extra
-        tensor memory. Use :func:`~dgl.Frame.deepclone` for cloning
-        a frame that does not share any data.
-
-        Returns
-        -------
-        Frame
-            A cloned frame.
-        """
-        newframe = Frame(self._columns, self._num_rows)
-        newframe._initializers = self._initializers
-        newframe._remote_init_builder = self._remote_init_builder
-        newframe._default_initializer = self._default_initializer
-        return newframe
-
-    def deepclone(self):
-        """Return a deep clone of this frame.
-
-        The clone frame has an copy of this frame and any modification to the clone frame
-        is not visible to this frame. The function allocate new tensors and copy the contents
-        from this frame. Use :func:`~dgl.Frame.clone` for cloning a frame that does not
-        allocate extra tensor memory.
-
-        Returns
-        -------
-        Frame
-            A deep-cloned frame.
-        """
-        newframe = Frame({k : col.clone() for k, col in self._columns.items()}, self._num_rows)
-        newframe._initializers = self._initializers
-        newframe._remote_init_builder = self._remote_init_builder
-        newframe._default_initializer = self._default_initializer
-        return newframe
-
-class FrameRef(MutableMapping):
-    """Reference object to a frame on a subset of rows.
-
-    Parameters
-    ----------
-    frame : Frame, optional
-        The underlying frame. If not given, the reference will point to a
-        new empty frame.
-    index : utils.Index, optional
-        The rows that are referenced in the underlying frame. If not given,
-        the whole frame is referenced. The index should be distinct (no
-        duplication is allowed).
-    """
-    def __init__(self, frame=None, index=None):
-        self._frame = frame if frame is not None else Frame()
-        # TODO(minjie): check no duplication
-        assert index is None or isinstance(index, utils.Index)
-        if index is None:
-            self._index = utils.toindex(slice(0, self._frame.num_rows))
-        else:
-            self._index = index
-
-    @property
-    def schemes(self):
-        """Return the frame schemes.
-
-        Returns
-        -------
-        dict of str to Scheme
-            The frame schemes.
-        """
-        return self._frame.schemes
-
-    @property
-    def num_columns(self):
-        """Return the number of columns in the referred frame."""
-        return self._frame.num_columns
-
-    @property
-    def num_rows(self):
-        """Return the number of rows referred."""
-        return len(self._index)
-
-    def set_initializer(self, initializer, column=None):
-        """Set the initializer for empty values.
-
-        Initializer is a callable that returns a tensor given the shape and data type.
-
-        Parameters
-        ----------
-        initializer : callable
-            The initializer.
-        column : str, optional
-            The column name
-        """
-        self._frame.set_initializer(initializer, column=column)
-
-    def set_remote_init_builder(self, builder):
-        """Set an initializer builder to create a remote initializer for a new column to a frame.
-
-        NOTE(minjie): This is a temporary solution. Will be replaced by KVStore in the future.
-
-        The builder is a callable that returns an initializer. The returned initializer
-        is also a callable that returns a tensor given a local tensor and tensor name.
-
-        Parameters
-        ----------
-        builder : callable
-            The builder to construct a remote initializer.
-        """
-        self._frame.set_remote_init_builder(builder)
-
-    def get_initializer(self, column=None):
-        """Get the initializer for empty values for the given column.
-
-        Parameters
-        ----------
-        column : str
-            The column
-
-        Returns
-        -------
-        callable
-            The initializer
-        """
-        return self._frame.get_initializer(column)
-
-    def __contains__(self, name):
-        """Return whether the column name exists."""
-        return name in self._frame
-
-    def __iter__(self):
-        """Return the iterator of the columns."""
-        return iter(self._frame)
-
-    def __len__(self):
-        """Return the number of columns."""
-        return self.num_columns
-
-    def keys(self):
-        """Return the keys."""
-        return self._frame.keys()
-
-    def values(self):
-        """Return the values."""
-        return self._frame.values()
-
-    def __getitem__(self, key):
-        """Get data from the frame.
-
-        If the provided key is string, the corresponding column data will be returned.
-        If the provided key is an index or a slice, the corresponding rows will be selected.
-        The returned rows are saved in a lazy dictionary so only the real selection happens
-        when the explicit column name is provided.
-
-        Examples (using pytorch)
-        ------------------------
-        >>> # create a frame of two columns and five rows
-        >>> f = Frame({'c1' : torch.zeros([5, 2]), 'c2' : torch.ones([5, 2])})
-        >>> fr = FrameRef(f)
-        >>> # select the row 1 and 2, the returned `rows` is a lazy dictionary.
-        >>> rows = fr[Index([1, 2])]
-        >>> rows['c1']  # only select rows for 'c1' column; 'c2' column is not sliced.
-
-        Parameters
-        ----------
-        key : str or utils.Index
-            The key.
-
-        Returns
-        -------
-        Tensor or lazy dict or tensors
-            Depends on whether it is a column selection or row selection.
-        """
-        if not isinstance(key, (str, utils.Index)):
-            raise DGLError('Argument "key" must be either str or utils.Index type.')
-        if isinstance(key, str):
-            return self.select_column(key)
-        elif key.is_slice(0, self.num_rows):
-            # shortcut for selecting all the rows
-            return self
-        else:
-            return self.select_rows(key)
-
-    def select_column(self, name):
-        """Return the column of the given name.
-
-        If only part of the rows are referenced, the fetching the whole column will
-        also slice out the referenced rows.
-
-        Parameters
-        ----------
-        name : str
-            The column name.
-
-        Returns
-        -------
-        Tensor
-            The column data.
-        """
-        col = self._frame[name]
-        if self.is_span_whole_column():
-            return col.data
-        else:
-            return col[self._index]
-
-    def select_rows(self, query):
-        """Return the rows given the query.
-
-        Parameters
-        ----------
-        query : utils.Index or slice
-            The rows to be selected.
-
-        Returns
-        -------
-        utils.LazyDict
-            The lazy dictionary from str to the selected data.
-        """
-        rows = self._getrows(query)
-        return utils.LazyDict(lambda key: self._frame[key][rows], keys=self.keys())
-
-    def __setitem__(self, key, val):
-        """Update the data in the frame. The update is done out-of-place.
-
-        Parameters
-        ----------
-        key : str or utils.Index
-            The key.
-        val : Tensor or dict of tensors
-            The value.
-
-        See Also
-        --------
-        update
-        """
-        self.update_data(key, val, inplace=False)
-
-    def update_data(self, key, val, inplace):
-        """Update the data in the frame.
-
-        If the provided key is string, the corresponding column data will be updated.
-        The provided value should be one tensor that have the same scheme and length
-        as the column.
-
-        If the provided key is an index, the corresponding rows will be updated. The
-        value provided should be a dictionary of string to the data of each column.
-
-        All updates are performed out-placely to be work with autograd. For inplace
-        update, use ``update_column`` or ``update_rows``.
-
-        Parameters
-        ----------
-        key : str or utils.Index
-            The key.
-        val : Tensor or dict of tensors
-            The value.
-        inplace: bool
-            If True, update will be done in place
-        """
-        if not isinstance(key, (str, utils.Index)):
-            raise DGLError('Argument "key" must be either str or utils.Index type.')
-        if isinstance(key, str):
-            self.update_column(key, val, inplace=inplace)
-        elif key.is_slice(0, self.num_rows):
-            # shortcut for updating all the rows
-            for colname, col in val.items():
-                self.update_column(colname, col, inplace=inplace)
-        else:
-            self.update_rows(key, val, inplace=inplace)
-
-    def update_column(self, name, data, inplace):
-        """Update the column.
-
-        If this frameref spans the whole column of the underlying frame, this is
-        equivalent to update the column of the frame.
-
-        If this frameref only points to part of the rows, then update the column
-        here will correspond to update part of the column in the frame. Raise error
-        if the given column name does not exist.
-
-        Parameters
-        ----------
-        name : str
-            The column name.
-        data : Tensor
-            The update data.
-        inplace : bool
-            True if the update is performed inplacely.
-        """
-        if self.is_span_whole_column():
-            if self.num_columns == 0:
-                # the frame is empty
-                self._index = utils.toindex(slice(0, len(data)))
-            self._frame[name] = data
-        else:
-            if name not in self._frame:
-                ctx = F.context(data)
-                self._frame.add_column(name, infer_scheme(data), ctx)
-            fcol = self._frame[name]
-            fcol.update(self._index, data, inplace)
-
-    def add_rows(self, num_rows):
-        """Add blank rows to the underlying frame.
-
-        For existing fields, the rows will be extended according to their
-        initializers.
-
-        Note: only available for FrameRef that spans the whole column.  The row
-        span will extend to new rows. Other FrameRefs referencing the same
-        frame will not be affected.
-
-        Parameters
-        ----------
-        num_rows : int
-            Number of rows to add
-        """
-        if not self.is_span_whole_column():
-            raise RuntimeError('FrameRef not spanning whole column.')
-        self._frame.add_rows(num_rows)
-        if self._index.slice_data() is not None:
-            # the index is a slice
-            slc = self._index.slice_data()
-            self._index = utils.toindex(slice(slc.start, slc.stop + num_rows))
-        else:
-            selfidxdata = self._index.tousertensor()
-            newdata = F.arange(self.num_rows, self.num_rows + num_rows)
-            self._index = utils.toindex(F.cat([selfidxdata, newdata], dim=0))
-
-    def update_rows(self, query, data, inplace):
-        """Update the rows.
-
-        If the provided data has new column, it will be added to the frame.
-
-        See Also
-        --------
-        ``update_column``
-
-        Parameters
-        ----------
-        query : utils.Index or slice
-            The rows to be updated.
-        data : dict-like
-            The row data.
-        inplace : bool
-            True if the update is performed inplace.
-        """
-        rows = self._getrows(query)
-        for key, col in data.items():
-            if key not in self:
-                # add new column
-                tmpref = FrameRef(self._frame, rows)
-                tmpref.update_column(key, col, inplace)
-            else:
-                self._frame[key].update(rows, col, inplace)
-
-    def __delitem__(self, key):
-        """Delete data in the frame.
-
-        If the provided key is a string, the corresponding column will be deleted.
-        If the provided key is an index object or a slice, the corresponding rows will
-        be deleted.
-
-        Please note that "deleted" rows are not really deleted, but simply removed
-        in the reference. As a result, if two FrameRefs point to the same Frame, deleting
-        from one ref will not reflect on the other. However, deleting columns is real.
-
-        Parameters
-        ----------
-        key : str or utils.Index
-            The key.
-        """
-        if not isinstance(key, (str, utils.Index)):
-            raise DGLError('Argument "key" must be either str or utils.Index type.')
-        if isinstance(key, str):
-            del self._frame[key]
-        else:
-            self.delete_rows(key)
-
-    def delete_rows(self, query):
-        """Delete rows.
-
-        Please note that "deleted" rows are not really deleted, but simply removed
-        in the reference. As a result, if two FrameRefs point to the same Frame, deleting
-        from one ref will not reflect on the other. By contrast, deleting columns is real.
-
-        Parameters
-        ----------
-        query : utils.Index
-            The rows to be deleted.
-        """
-        query = query.tonumpy()
-        index = self._index.tonumpy()
-        self._index = utils.toindex(np.delete(index, query))
-
-    def append(self, other):
-        """Append another frame into this one.
-
-        Parameters
-        ----------
-        other : dict of str to tensor
-            The data to be appended.
-        """
-        old_nrows = self._frame.num_rows
-        self._frame.append(other)
-        new_nrows = self._frame.num_rows
-        # update index
-        if (self._index.slice_data() is not None
-                and self._index.slice_data().stop == old_nrows):
-            # Self index is a slice and index.stop is equal to the size of the
-            # underlying frame. Can still use a slice for the new index.
-            oldstart = self._index.slice_data().start
-            self._index = utils.toindex(slice(oldstart, new_nrows))
-        else:
-            # convert it to user tensor and concat
-            selfidxdata = self._index.tousertensor()
-            newdata = F.arange(old_nrows, new_nrows)
-            self._index = utils.toindex(F.cat([selfidxdata, newdata], dim=0))
-
-    def clear(self):
-        """Clear the frame."""
-        self._frame.clear()
-        self._index = utils.toindex(slice(0, 0))
-
-    def is_contiguous(self):
-        """Return whether this refers to a contiguous range of rows."""
-        # NOTE: this check could have false negatives
-        return self._index.slice_data() is not None
-
-    def is_span_whole_column(self):
-        """Return whether this refers to all the rows."""
-        return self.is_contiguous() and self.num_rows == self._frame.num_rows
-
-    def clone(self):
-        """Return a new reference to a clone of the underlying frame.
-
-        Returns
-        -------
-        FrameRef
-            A cloned frame reference.
-
-        See Also
-        --------
-        dgl.Frame.clone
-        """
-        return FrameRef(self._frame.clone(), self._index)
-
-    def deepclone(self):
-        """Return a new reference to a deep clone of the underlying frame.
-
-        Returns
-        -------
-        FrameRef
-            A deep-cloned frame reference.
-
-        See Also
-        --------
-        dgl.Frame.deepclone
-        """
-        return FrameRef(self._frame.deepclone(), self._index)
-
-    def _getrows(self, query):
-        """Internal function to convert from the local row ids to the row ids of the frame.
-
-        Parameters
-        ----------
-        query : utils.Index
-            The query index.
-
-        Returns
-        -------
-        utils.Index
-            The actual index to the underlying frame.
-        """
-        return self._index.get_items(query)
-
-def frame_like(other, num_rows=None):
-    """Create an empty frame that has the same initializer as the given one.
-
-    Parameters
-    ----------
-    other : Frame
-        The given frame.
-    num_rows : int
-        The number of rows of the new one. If None, use other.num_rows
-        (Default: None)
-
-    Returns
-    -------
-    Frame
-        The new frame.
-    """
-    num_rows = other.num_rows if num_rows is None else num_rows
-    newf = Frame(num_rows=num_rows)
-    # set global initializr
-    if other.get_initializer() is None:
-        other._set_zero_default_initializer()
-    sync_frame_initializer(newf, other)
-    return newf
-
-def sync_frame_initializer(new_frame, reference_frame):
-    """Set the initializers of the new_frame to be the same as the reference_frame,
-    for both the default initializer and per-column initializers.
-
-    Parameters
-    ----------
-    new_frame : Frame
-        The frame to set initializers
-    reference_frame : Frame
-        The frame to copy initializers
-    """
-    new_frame._default_initializer = reference_frame._default_initializer
-    # set per-col initializer
-    # TODO(minjie): hack; cannot rely on keys as the _initializers
-    #   now supports non-exist columns.
-    new_frame._initializers = reference_frame._initializers
--- a/python/dgl/_deprecate/graph.py
+++ b/python/dgl/_deprecate/graph.py
-"""Base graph class specialized for neural networks on graphs."""
-# pylint: disable=too-many-lines
-from __future__ import absolute_import
-
-from collections import defaultdict
-from contextlib import contextmanager
-from typing import Iterable
-from functools import wraps
-import networkx as nx
-
-import dgl
-from ..base import ALL, NID, EID, is_all, DGLError, dgl_warning
-from .. import backend as F
-from .. import init
-from .frame import FrameRef, Frame, Scheme, sync_frame_initializer
-from .. import graph_index
-from .runtime import ir, scheduler, Runtime, GraphAdapter
-from .. import utils
-from .view import NodeView, EdgeView
-from .udf import NodeBatch, EdgeBatch
-
-__all__ = ['DGLGraph', 'batch', 'unbatch']
-
-class DGLBaseGraph(object):
-    """Base graph class.
-
-    DGL graph is always directional. Undirected graph can be represented using
-    two bi-directional edges.
-
-    Nodes are identified by consecutive integers starting from zero.
-
-    Edges can be specified by two end points (u, v) or the integer id assigned
-    when the edges are added.  Edge IDs are automatically assigned by the order
-    of addition, i.e. the first edge being added has an ID of 0, the second
-    being 1, so on so forth.
-
-    Parameters
-    ----------
-    graph : graph index, optional
-        Data to initialize graph.
-    """
-
-    is_block = False        # for compatibility with DGLGraph
-
-    def __init__(self, graph):
-        self._graph = graph
-
-    def number_of_nodes(self):
-        """Return the number of nodes in the graph.
-
-        Returns
-        -------
-        int
-            The number of nodes
-        """
-        return self._graph.number_of_nodes()
-
-    def number_of_src_nodes(self):
-        """Return the number of nodes in the graph.
-
-        For compatibility with heterographs.
-
-        Returns
-        -------
-        int
-            The number of nodes
-        """
-        return self._graph.number_of_nodes()
-
-    def number_of_dst_nodes(self):
-        """Return the number of nodes in the graph.
-
-        For compatibility with heterographs.
-
-        Returns
-        -------
-        int
-            The number of nodes
-        """
-        return self._graph.number_of_nodes()
-
-    def __len__(self):
-        """Return the number of nodes in the graph."""
-        return self.number_of_nodes()
-
-    @property
-    def is_multigraph(self):
-        """True if the graph is a multigraph, False otherwise.
-        """
-        return self._graph.is_multigraph()
-
-    @property
-    def is_readonly(self):
-        """True if the graph is readonly, False otherwise.
-        """
-        return self._graph.is_readonly()
-
-    def number_of_edges(self):
-        """Return the number of edges in the graph.
-
-        Returns
-        -------
-        int
-            The number of edges
-        """
-        return self._graph.number_of_edges()
-
-    def has_node(self, vid):
-        """Return True if the graph contains node `vid`.
-
-        Identical to `vid in G`.
-
-        Parameters
-        ----------
-        vid : int
-            The node ID.
-
-        Returns
-        -------
-        bool
-            True if the node exists
-
-        Examples
-        --------
-        >>> G = dgl.DGLGraph()
-        >>> G.add_nodes(3)
-        >>> G.has_node(0)
-        True
-        >>> G.has_node(4)
-        False
-
-        Equivalently,
-
-        >>> 0 in G
-        True
-
-        See Also
-        --------
-        has_nodes
-        """
-        return self._graph.has_node(vid)
-
-    def __contains__(self, vid):
-        """Return True if the graph contains node `vid`.
-
-        Examples
-        --------
-        >>> G = dgl.DGLGraph()
-        >>> G.add_nodes(3)
-        >>> 0 in G
-        True
-        """
-        return self._graph.has_node(vid)
-
-    def has_nodes(self, vids):
-        """Return a 0-1 array ``a`` given the node ID array ``vids``.
-
-        ``a[i]`` is 1 if the graph contains node ``vids[i]``, 0 otherwise.
-
-        Parameters
-        ----------
-        vid : list or tensor
-            The array of node IDs.
-
-        Returns
-        -------
-        a : tensor
-            0-1 array indicating existence
-
-        Examples
-        --------
-        The following example uses PyTorch backend.
-
-        >>> G = dgl.DGLGraph()
-        >>> G.add_nodes(3)
-        >>> G.has_nodes([0, 1, 2, 3, 4])
-        tensor([1, 1, 1, 0, 0])
-
-        See Also
-        --------
-        has_node
-        """
-        vids = utils.toindex(vids)
-        rst = self._graph.has_nodes(vids)
-        return rst.tousertensor()
-
-    def has_edge_between(self, u, v):
-        """Return True if the edge (u, v) is in the graph.
-
-        Parameters
-        ----------
-        u : int
-            The source node ID.
-        v : int
-            The destination node ID.
-
-        Returns
-        -------
-        bool
-            True if the edge is in the graph, False otherwise.
-
-        Examples
-        --------
-        >>> G = dgl.DGLGraph()
-        >>> G.add_nodes(3)
-        >>> G.add_edge(0, 1)
-        >>> G.has_edge_between(0, 1)
-        True
-        >>> G.has_edge_between(1, 0)
-        False
-
-        See Also
-        --------
-        has_edges_between
-        """
-        return self._graph.has_edge_between(u, v)
-
-    def has_edges_between(self, u, v):
-        """Return a 0-1 array `a` given the source node ID array `u` and
-        destination node ID array `v`.
-
-        `a[i]` is 1 if the graph contains edge `(u[i], v[i])`, 0 otherwise.
-
-        Parameters
-        ----------
-        u : list, tensor
-            The source node ID array.
-        v : list, tensor
-            The destination node ID array.
-
-        Returns
-        -------
-        a : tensor
-            0-1 array indicating existence.
-
-        Examples
-        --------
-        The following example uses PyTorch backend.
-
-        >>> G = dgl.DGLGraph()
-        >>> G.add_nodes(3)
-        >>> G.add_edges([0, 0], [1, 2]) # (0, 1), (0, 2)
-
-        Check if (0, 1), (0, 2), (1, 0), (2, 0) exist in the graph above:
-
-        >>> G.has_edges_between([0, 0, 1, 2], [1, 2, 0, 0])
-        tensor([1, 1, 0, 0])
-
-        See Also
-        --------
-        has_edge_between
-        """
-        u = utils.toindex(u)
-        v = utils.toindex(v)
-        rst = self._graph.has_edges_between(u, v)
-        return rst.tousertensor()
-
-    def predecessors(self, v):
-        """Return the predecessors of node `v` in the graph.
-
-        Node `u` is a predecessor of `v` if an edge `(u, v)` exist in the
-        graph.
-
-        Parameters
-        ----------
-        v : int
-            The node.
-
-        Returns
-        -------
-        tensor
-            Array of predecessor node IDs.
-
-        Examples
-        --------
-        The following example uses PyTorch backend.
-
-        >>> G = dgl.DGLGraph()
-        >>> G.add_nodes(3)
-        >>> G.add_edges([1, 2], [0, 0]) # (1, 0), (2, 0)
-        >>> G.predecessors(0)
-        tensor([1, 2])
-
-        See Also
-        --------
-        successors
-        """
-        return self._graph.predecessors(v).tousertensor()
-
-    def successors(self, v):
-        """Return the successors of node `v` in the graph.
-
-        Node `u` is a successor of `v` if an edge `(v, u)` exist in the
-        graph.
-
-        Parameters
-        ----------
-        v : int
-            The node.
-
-        Returns
-        -------
-        tensor
-            Array of successor node IDs.
-
-        Examples
-        --------
-        The following example uses PyTorch backend.
-
-        >>> G = dgl.DGLGraph()
-        >>> G.add_nodes(3)
-        >>> G.add_edges([0, 0], [1, 2]) # (0, 1), (0, 2)
-        >>> G.successors(0)
-        tensor([1, 2])
-
-        See Also
-        --------
-        predecessors
-        """
-        return self._graph.successors(v).tousertensor()
-
-    def edge_id(self, u, v, force_multi=None, return_array=False):
-        """Return the edge ID, or an array of edge IDs, between source node
-        `u` and destination node `v`.
-
-        Parameters
-        ----------
-        u : int
-            The source node ID.
-        v : int
-            The destination node ID.
-        force_multi : bool
-            Deprecated (Will be deleted in the future).
-            If False, will return a single edge ID.
-            If True, will always return an array.
-        return_array : bool
-            If False, will return a single edge ID.
-            If True, will always return an array.
-
-        Returns
-        -------
-        int or tensor
-            The edge ID if return_array is False.
-            The edge ID array otherwise.
-
-        Notes
-        -----
-        If multiply edges exist between `u` and `v` and return_array is False,
-        the result is undefined.
-
-        Examples
-        --------
-        The following example uses PyTorch backend.
-
-        For simple graphs:
-
-        >>> G = dgl.DGLGraph()
-        >>> G.add_node(3)
-        >>> G.add_edges([0, 0], [1, 2]) # (0, 1), (0, 2)
-        >>> G.edge_id(0, 2)
-        1
-        >>> G.edge_id(0, 1)
-        0
-
-        For multigraphs:
-
-        >>> G = dgl.DGLGraph()
-        >>> G.add_nodes(3)
-
-        Adding edges (0, 1), (0, 2), (0, 1), (0, 2), so edge ID 0 and 2 both
-        connect from 0 and 1, while edge ID 1 and 3 both connect from 0 and 2.
-
-        >>> G.add_edges([0, 0, 0, 0], [1, 2, 1, 2])
-        >>> G.edge_id(0, 1, return_array=True)
-        tensor([0, 2])
-
-        See Also
-        --------
-        edge_ids
-        """
-        idx = self._graph.edge_id(u, v)
-        if force_multi is not None:
-            dgl_warning("force_multi will be deprecated." \
-                        "Please use return_array instead")
-            return_array = force_multi
-
-        if return_array:
-            return idx.tousertensor()
-        else:
-            assert len(idx) == 1, "For return_array=False, there should be one and " \
-                "only one edge between u and v, but get {} edges. " \
-                "Please use return_array=True instead".format(len(idx))
-            return idx[0]
-
-    def edge_ids(self, u, v, force_multi=None, return_uv=False):
-        """Return all edge IDs between source node array `u` and destination
-        node array `v`.
-
-        Parameters
-        ----------
-        u : list, tensor
-            The source node ID array.
-        v : list, tensor
-            The destination node ID array.
-        force_multi : bool
-            Deprecated (Will be deleted in the future).
-            Whether to always treat the graph as a multigraph.
-        return_uv : bool
-            Whether return e or (eu, ev, e)
-
-        Returns
-        -------
-        tensor, or (tensor, tensor, tensor)
-            If 'return_uv` is False, return a single edge ID array `e`.
-            `e[i]` is the edge ID between `u[i]` and `v[i]`.
-            Otherwise, return three arrays `(eu, ev, e)`.  `e[i]` is the ID
-            of an edge between `eu[i]` and `ev[i]`.  All edges between `u[i]`
-            and `v[i]` are returned.
-
-        Notes
-        -----
-        If the graph is a simple graph, `return_uv` is False, and no edge
-        exist between some pairs of `u[i]` and `v[i]`, the result is undefined.
-
-        If the graph is a multi graph, `return_uv` is False, and multi edges
-        exist between some pairs of `u[i]` and `v[i]`, the result is undefined.
-
-        Examples
-        --------
-        The following example uses PyTorch backend.
-
-        For simple graphs:
-
-        >>> G = dgl.DGLGraph()
-        >>> G.add_nodes(3)
-        >>> G.add_edges([0, 0], [1, 2]) # (0, 1), (0, 2)
-        >>> G.edge_ids([0, 0], [2, 1])  # get edge ID of (0, 2) and (0, 1)
-        >>> G.edge_ids([0, 0], [2, 1])
-        tensor([1, 0])
-
-        For multigraphs
-
-        >>> G = dgl.DGLGraph()
-        >>> G.add_nodes(4)
-        >>> G.add_edges([0, 0, 0], [1, 1, 2])   # (0, 1), (0, 1), (0, 2)
-
-        Get all edges between (0, 1), (0, 2), (0, 3).  Note that there is no
-        edge between 0 and 3:
-
-        >>> G.edge_ids([0, 0, 0], [1, 2, 3], return_uv=True)
-        (tensor([0, 0, 0]), tensor([1, 1, 2]), tensor([0, 1, 2]))
-
-        See Also
-        --------
-        edge_id
-        """
-        u = utils.toindex(u)
-        v = utils.toindex(v)
-        src, dst, eid = self._graph.edge_ids(u, v)
-        if force_multi is not None:
-            dgl_warning("force_multi will be deprecated, " \
-                        "Please use return_uv instead")
-            return_uv = force_multi
-
-        if return_uv:
-            return src.tousertensor(), dst.tousertensor(), eid.tousertensor()
-        else:
-            assert len(eid) == max(len(u), len(v)), "If return_uv=False, there should be one and " \
-                "only one edge between each u and v, expect {} edges but get {}. " \
-                "Please use return_uv=True instead".format(max(len(u), len(v)), len(eid))
-            return eid.tousertensor()
-
-    def find_edges(self, eid):
-        """Given an edge ID array, return the source and destination node ID
-        array `s` and `d`.  `s[i]` and `d[i]` are source and destination node
-        ID for edge `eid[i]`.
-
-        Parameters
-        ----------
-        eid : list, tensor
-            The edge ID array.
-
-        Returns
-        -------
-        tensor
-            The source node ID array.
-        tensor
-            The destination node ID array.
-
-        Examples
-        --------
-        The following example uses PyTorch backend.
-
-        >>> G = dgl.DGLGraph()
-        >>> G.add_nodes(3)
-        >>> G.add_edges([0, 0, 1], [1, 2, 2])   # (0, 1), (0, 2), (1, 2)
-        >>> G.find_edges([0, 2])
-        (tensor([0, 1]), tensor([1, 2]))
-        """
-        eid = utils.toindex(eid)
-        src, dst, _ = self._graph.find_edges(eid)
-        return src.tousertensor(), dst.tousertensor()
-
-    def in_edges(self, v, form='uv'):
-        """Return the inbound edges of the node(s).
-
-        Parameters
-        ----------
-        v : int, list, tensor
-            The node(s).
-        form : str, optional
-            The return form. Currently support:
-
-            - 'all' : a tuple (u, v, eid)
-            - 'uv'  : a pair (u, v), default
-            - 'eid' : one eid tensor
-
-        Returns
-        -------
-        A tuple of Tensors ``(eu, ev, eid)`` if ``form == 'all'``.
-            ``eid[i]`` is the ID of an inbound edge to ``ev[i]`` from ``eu[i]``.
-            All inbound edges to ``v`` are returned.
-        A pair of Tensors (eu, ev) if form == 'uv'
-            ``eu[i]`` is the source node of an inbound edge to ``ev[i]``.
-            All inbound edges to ``v`` are returned.
-        One Tensor if form == 'eid'
-            ``eid[i]`` is ID of an inbound edge to any of the nodes in ``v``.
-
-        Examples
-        --------
-        The following example uses PyTorch backend.
-
-        >>> G = dgl.DGLGraph()
-        >>> G.add_nodes(3)
-        >>> G.add_edges([0, 0, 1], [1, 2, 2])   # (0, 1), (0, 2), (1, 2)
-
-        For a single node:
-
-        >>> G.in_edges(2)
-        (tensor([0, 1]), tensor([2, 2]))
-        >>> G.in_edges(2, 'all')
-        (tensor([0, 1]), tensor([2, 2]), tensor([1, 2]))
-        >>> G.in_edges(2, 'eid')
-        tensor([1, 2])
-
-        For multiple nodes:
-
-        >>> G.in_edges([1, 2])
-        (tensor([0, 0, 1]), tensor([1, 2, 2]))
-        >>> G.in_edges([1, 2], 'all')
-        (tensor([0, 0, 1]), tensor([1, 2, 2]), tensor([0, 1, 2]))
-        """
-        v = utils.toindex(v)
-        src, dst, eid = self._graph.in_edges(v)
-        if form == 'all':
-            return (src.tousertensor(), dst.tousertensor(), eid.tousertensor())
-        elif form == 'uv':
-            return (src.tousertensor(), dst.tousertensor())
-        elif form == 'eid':
-            return eid.tousertensor()
-        else:
-            raise DGLError('Invalid form:', form)
-
-    def out_edges(self, v, form='uv'):
-        """Return the outbound edges of the node(s).
-
-        Parameters
-        ----------
-        v : int, list, tensor
-            The node(s).
-        form : str, optional
-            The return form. Currently support:
-
-            - 'all' : a tuple (u, v, eid)
-            - 'uv'  : a pair (u, v), default
-            - 'eid' : one eid tensor
-
-        Returns
-        -------
-        A tuple of Tensors ``(eu, ev, eid)`` if ``form == 'all'``.
-            ``eid[i]`` is the ID of an outbound edge from ``eu[i]`` to ``ev[i]``.
-            All outbound edges from ``v`` are returned.
-        A pair of Tensors (eu, ev) if form == 'uv'
-            ``ev[i]`` is the destination node of an outbound edge from ``eu[i]``.
-            All outbound edges from ``v`` are returned.
-        One Tensor if form == 'eid'
-            ``eid[i]`` is ID of an outbound edge from any of the nodes in ``v``.
-
-        Examples
-        --------
-        The following example uses PyTorch backend.
-
-        >>> G = dgl.DGLGraph()
-        >>> G.add_nodes(3)
-        >>> G.add_edges([0, 0, 1], [1, 2, 2])   # (0, 1), (0, 2), (1, 2)
-
-        For a single node:
-
-        >>> G.out_edges(0)
-        (tensor([0, 0]), tensor([1, 2]))
-        >>> G.out_edges(0, 'all')
-        (tensor([0, 0]), tensor([1, 2]), tensor([0, 1]))
-        >>> G.out_edges(0, 'eid')
-        tensor([0, 1])
-
-        For multiple nodes:
-
-        >>> G.out_edges([0, 1])
-        (tensor([0, 0, 1]), tensor([1, 2, 2]))
-        >>> G.out_edges([0, 1], 'all')
-        (tensor([0, 0, 1]), tensor([1, 2, 2]), tensor([0, 1, 2]))
-        """
-        v = utils.toindex(v)
-        src, dst, eid = self._graph.out_edges(v)
-        if form == 'all':
-            return (src.tousertensor(), dst.tousertensor(), eid.tousertensor())
-        elif form == 'uv':
-            return (src.tousertensor(), dst.tousertensor())
-        elif form == 'eid':
-            return eid.tousertensor()
-        else:
-            raise DGLError('Invalid form:', form)
-
-    def all_edges(self, form='uv', order=None):
-        """Return all the edges.
-
-        Parameters
-        ----------
-        form : str, optional
-            The return form. Currently support:
-
-            - 'all' : a tuple (u, v, eid)
-            - 'uv'  : a pair (u, v), default
-            - 'eid' : one eid tensor
-        order : string
-            The order of the returned edges. Currently support:
-
-            - 'srcdst' : sorted by their src and dst ids.
-            - 'eid'    : sorted by edge Ids.
-            - None     : the arbitrary order.
-
-        Returns
-        -------
-        A tuple of Tensors (u, v, eid) if form == 'all'
-            ``eid[i]`` is the ID of an edge between ``u[i]`` and ``v[i]``.
-            All edges are returned.
-        A pair of Tensors (u, v) if form == 'uv'
-            An edge exists between ``u[i]`` and ``v[i]``.
-            If ``n`` edges exist between ``u`` and ``v``, then ``u`` and ``v`` as a pair
-            will appear ``n`` times.
-        One Tensor if form == 'eid'
-            ``eid[i]`` is the ID of an edge in the graph.
-
-        Examples
-        --------
-        The following example uses PyTorch backend.
-
-        >>> G = dgl.DGLGraph()
-        >>> G.add_nodes(3)
-        >>> G.add_edges([0, 0, 1], [1, 2, 2])   # (0, 1), (0, 2), (1, 2)
-        >>> G.all_edges()
-        (tensor([0, 0, 1]), tensor([1, 2, 2]))
-        >>> G.all_edges('all')
-        (tensor([0, 0, 1]), tensor([1, 2, 2]), tensor([0, 1, 2]))
-        """
-        src, dst, eid = self._graph.edges(order)
-        if form == 'all':
-            return (src.tousertensor(), dst.tousertensor(), eid.tousertensor())
-        elif form == 'uv':
-            return (src.tousertensor(), dst.tousertensor())
-        elif form == 'eid':
-            return eid.tousertensor()
-        else:
-            raise DGLError('Invalid form:', form)
-
-    def in_degree(self, v):
-        """Return the in-degree of node ``v``.
-
-        Parameters
-        ----------
-        v : int
-            The node ID.
-
-        Returns
-        -------
-        int
-            The in-degree.
-
-        Examples
-        --------
-        >>> G = dgl.DGLGraph()
-        >>> G.add_nodes(3)
-        >>> G.add_edges([0, 0, 1], [1, 2, 2])   # (0, 1), (0, 2), (1, 2)
-        >>> G.in_degree(2)
-        2
-
-        See Also
-        --------
-        in_degrees
-        """
-        return self._graph.in_degree(v)
-
-    def in_degrees(self, v=ALL):
-        """Return the array `d` of in-degrees of the node array `v`.
-
-        `d[i]` is the in-degree of node `v[i]`.
-
-        Parameters
-        ----------
-        v : list, tensor, optional.
-            The node ID array. Default is to return the degrees of all the nodes.
-
-        Returns
-        -------
-        d : tensor
-            The in-degree array.
-
-        Examples
-        --------
-        The following example uses PyTorch backend.
-
-        >>> G = dgl.DGLGraph()
-        >>> G.add_nodes(3)
-        >>> G.add_edges([0, 0, 1], [1, 2, 2])   # (0, 1), (0, 2), (1, 2)
-        >>> G.in_degrees([1, 2])
-        tensor([1, 2])
-
-        See Also
-        --------
-        in_degree
-        """
-        if is_all(v):
-            v = utils.toindex(slice(0, self.number_of_nodes()))
-        else:
-            v = utils.toindex(v)
-        return self._graph.in_degrees(v).tousertensor()
-
-    def out_degree(self, v):
-        """Return the out-degree of node `v`.
-
-        Parameters
-        ----------
-        v : int
-            The node ID.
-
-        Returns
-        -------
-        int
-            The out-degree.
-
-        Examples
-        --------
-        >>> G = dgl.DGLGraph()
-        >>> G.add_nodes(3)
-        >>> G.add_edges([0, 0, 1], [1, 2, 2])   # (0, 1), (0, 2), (1, 2)
-        >>> G.out_degree(0)
-        2
-
-        See Also
-        --------
-        out_degrees
-        """
-        return self._graph.out_degree(v)
-
-    def out_degrees(self, v=ALL):
-        """Return the array `d` of out-degrees of the node array `v`.
-
-        `d[i]` is the out-degree of node `v[i]`.
-
-        Parameters
-        ----------
-        v : list, tensor
-            The node ID array. Default is to return the degrees of all the nodes.
-
-        Returns
-        -------
-        d : tensor
-            The out-degree array.
-
-        Examples
-        --------
-        The following example uses PyTorch backend.
-
-        >>> G = dgl.DGLGraph()
-        >>> G.add_nodes(3)
-        >>> G.add_edges([0, 0, 1], [1, 2, 2])   # (0, 1), (0, 2), (1, 2)
-        >>> G.out_degrees([0, 1])
-        tensor([2, 1])
-
-        See Also
-        --------
-        out_degree
-        """
-        if is_all(v):
-            v = utils.toindex(slice(0, self.number_of_nodes()))
-        else:
-            v = utils.toindex(v)
-        return self._graph.out_degrees(v).tousertensor()
-
-    @property
-    def idtype(self):
-        """Return the dtype of the graph index
-
-        Returns
-        ---------
-        backend dtype object
-            th.int32/th.int64 or tf.int32/tf.int64 etc.
-        """
-        return getattr(F, self._graph.dtype)
-
-
-    @property
-    def _idtype_str(self):
-        """The dtype of graph index
-
-        Returns
-        -------
-        backend dtype object
-            th.int32/th.int64 or tf.int32/tf.int64 etc.
-        """
-        return self._graph.dtype
-
-
-def mutation(func):
-    """A decorator to decorate functions that might change graph structure."""
-    @wraps(func)
-    def inner(g, *args, **kwargs):
-        if g.is_readonly:
-            raise DGLError("Readonly graph. Mutation is not allowed. "
-                           "To mutate it, call g.readonly(False) first.")
-        if g.batch_size > 1:
-            dgl_warning("The graph has batch_size > 1, and mutation would break"
-                        " batching related properties, call `flatten` to remove"
-                        " batching information of the graph.")
-        if g._parent is not None:
-            dgl_warning("The graph is a subgraph of a parent graph, and mutation"
-                        " would break subgraph related properties, call `detach"
-                        "_parent` to remove its connection with its parent.")
-        func(g, *args, **kwargs)
-    return inner
-
-
-class DGLGraph(DGLBaseGraph):
-    """Base graph class.
-
-    The graph stores nodes, edges and also their features.
-
-    DGL graph is always directional. Undirected graph can be represented using
-    two bi-directional edges.
-
-    Nodes are identified by consecutive integers starting from zero.
-
-    Edges can be specified by two end points (u, v) or the integer id assigned
-    when the edges are added.  Edge IDs are automatically assigned by the order
-    of addition, i.e. the first edge being added has an ID of 0, the second
-    being 1, so on so forth.
-
-    Node and edge features are stored as a dictionary from the feature name
-    to the feature data (in tensor).
-
-    DGL graph accepts graph data of multiple formats:
-
-    * NetworkX graph,
-    * scipy matrix,
-    * DGLGraph.
-
-    If the input graph data is DGLGraph, the constructed DGLGraph only contains
-    its graph index.
-
-    Parameters
-    ----------
-    graph_data : graph data, optional
-        Data to initialize graph.
-    node_frame : FrameRef, optional
-        Node feature storage.
-    edge_frame : FrameRef, optional
-        Edge feature storage.
-    multigraph : bool, optional
-        Deprecated (Will be deleted in the future).
-        Whether the graph would be a multigraph. If none, the flag will be
-        set to True. (default: None)
-    readonly : bool, optional
-        Whether the graph structure is read-only (default: False).
-
-    Examples
-    --------
-    Create an empty graph with no nodes and edges.
-
-    >>> G = dgl.DGLGraph()
-
-    G can be grown in several ways.
-
-    **Nodes:**
-
-    Add N nodes:
-
-    >>> G.add_nodes(10)  # 10 isolated nodes are added
-
-    **Edges:**
-
-    Add one edge at a time,
-
-    >>> G.add_edge(0, 1)
-
-    or multiple edges,
-
-    >>> G.add_edges([1, 2, 3], [3, 4, 5])  # three edges: 1->3, 2->4, 3->5
-
-    or multiple edges starting from the same node,
-
-    >>> G.add_edges(4, [7, 8, 9])  # three edges: 4->7, 4->8, 4->9
-
-    or multiple edges pointing to the same node,
-
-    >>> G.add_edges([2, 6, 8], 5)  # three edges: 2->5, 6->5, 8->5
-
-    or multiple edges using tensor type
-
-    .. note:: Here we use pytorch syntax for demo. The general idea applies
-        to other frameworks with minor syntax change (e.g. replace
-        ``torch.tensor`` with ``mxnet.ndarray``).
-
-    >>> import torch as th
-    >>> G.add_edges(th.tensor([3, 4, 5]), 1)  # three edges: 3->1, 4->1, 5->1
-
-    NOTE: Removing nodes and edges is not supported by DGLGraph.
-
-    **Features:**
-
-    Both nodes and edges can have feature data. Features are stored as
-    key/value pair. The key must be hashable while the value must be tensor
-    type. Features are batched on the first dimension.
-
-    Use G.ndata to get/set features for all nodes.
-
-    >>> G = dgl.DGLGraph()
-    >>> G.add_nodes(3)
-    >>> G.ndata['x'] = th.zeros((3, 5))  # init 3 nodes with zero vector(len=5)
-    >>> G.ndata
-    {'x' : tensor([[0., 0., 0., 0., 0.],
-                   [0., 0., 0., 0., 0.],
-                   [0., 0., 0., 0., 0.]])}
-
-    Use G.nodes to get/set features for some nodes.
-
-    >>> G.nodes[[0, 2]].data['x'] = th.ones((2, 5))
-    >>> G.ndata
-    {'x' : tensor([[1., 1., 1., 1., 1.],
-                   [0., 0., 0., 0., 0.],
-                   [1., 1., 1., 1., 1.]])}
-
-    Similarly, use G.edata and G.edges to get/set features for edges.
-
-    >>> G.add_edges([0, 1], 2)  # 0->2, 1->2
-    >>> G.edata['y'] = th.zeros((2, 4))  # init 2 edges with zero vector(len=4)
-    >>> G.edata
-    {'y' : tensor([[0., 0., 0., 0.],
-                   [0., 0., 0., 0.]])}
-    >>> G.edges[1, 2].data['y'] = th.ones((1, 4))
-    >>> G.edata
-    {'y' : tensor([[0., 0., 0., 0.],
-                   [1., 1., 1., 1.]])}
-
-    Note that each edge is assigned a unique id equal to its adding
-    order. So edge 1->2 has id=1. DGL supports directly use edge id
-    to access edge features.
-
-    >>> G.edges[0].data['y'] += 2.
-    >>> G.edata
-    {'y' : tensor([[2., 2., 2., 2.],
-                   [1., 1., 1., 1.]])}
-
-    **Message Passing:**
-
-    One common operation for updating node features is message passing,
-    where the source nodes send messages through edges to the destinations.
-    With :class:`DGLGraph`, we can do this with :func:`send` and :func:`recv`.
-
-    In the example below, the source nodes add 1 to their node features as
-    the messages and send the messages to the destinations.
-
-    >>> # Define the function for sending messages.
-    >>> def send_source(edges): return {'m': edges.src['x'] + 1}
-    >>> # Set the function defined to be the default message function.
-    >>> G.register_message_func(send_source)
-    >>> # Send messages through all edges.
-    >>> G.send(G.edges())
-
-    Just like you need to go to your mailbox for retrieving mails, the destination
-    nodes also need to receive the messages and potentially update their features.
-
-    >>> # Define a function for summing messages received and replacing the original feature.
-    >>> def simple_reduce(nodes): return {'x': nodes.mailbox['m'].sum(1)}
-    >>> # Set the function defined to be the default message reduce function.
-    >>> G.register_reduce_func(simple_reduce)
-    >>> # All existing edges have node 2 as the destination.
-    >>> # Receive the messages for node 2 and update its feature.
-    >>> G.recv(v=2)
-    >>> G.ndata
-    {'x': tensor([[1., 1., 1., 1., 1.],
-                  [0., 0., 0., 0., 0.],
-                  [3., 3., 3., 3., 3.]])} # 3 = (1 + 1) + (0 + 1)
-
-    For more examples about message passing, please read our tutorials.
-    """
-    def __init__(self,
-                 graph_data=None,
-                 node_frame=None,
-                 edge_frame=None,
-                 multigraph=None,
-                 readonly=False,
-                 sort_csr=False,
-                 batch_num_nodes=None,
-                 batch_num_edges=None,
-                 parent=None):
-        # graph
-        if isinstance(graph_data, DGLGraph):
-            gidx = graph_data._graph
-            if sort_csr:
-                gidx.sort_csr()
-        else:
-            if multigraph is not None:
-                dgl_warning("multigraph will be deprecated." \
-                            "DGL will treat all graphs as multigraph in the future.")
-            gidx = graph_index.create_graph_index(graph_data, readonly)
-            if sort_csr:
-                gidx.sort_csr()
-        super(DGLGraph, self).__init__(gidx)
-
-        # node and edge frame
-        if node_frame is None:
-            self._node_frame = FrameRef(Frame(num_rows=self.number_of_nodes()))
-        else:
-            self._node_frame = node_frame
-        if edge_frame is None:
-            self._edge_frame = FrameRef(Frame(num_rows=self.number_of_edges()))
-        else:
-            self._edge_frame = edge_frame
-        # message indicator:
-        # if self._msg_index[eid] == 1, then edge eid has message
-        self._msg_index = None
-        # message frame
-        self._msg_frame = FrameRef(Frame(num_rows=self.number_of_edges()))
-        # set initializer for message frame
-        self._msg_frame.set_initializer(init.zero_initializer)
-        # registered functions
-        self._message_func = None
-        self._reduce_func = None
-        self._apply_node_func = None
-        self._apply_edge_func = None
-
-        # batched graph
-        self._batch_num_nodes = batch_num_nodes
-        self._batch_num_edges = batch_num_edges
-
-        # set parent if the graph is a subgraph.
-        self._parent = parent
-
-    def __setstate__(self, state):
-        # Compatibility with pickles from DGL 0.4.2-
-        if '_batch_num_nodes' not in state:
-            state = state.copy()
-            state.setdefault('_batch_num_nodes', None)
-            state.setdefault('_batch_num_edges', None)
-            state.setdefault('_parent', None)
-        self.__dict__.update(state)
-
-    def _create_subgraph(self, sgi, induced_nodes, induced_edges):
-        """Internal function to create a subgraph from index."""
-        subg = DGLGraph(graph_data=sgi.graph,
-                        readonly=True,
-                        parent=self)
-        subg.ndata[NID] = induced_nodes.tousertensor()
-        subg.edata[EID] = induced_edges.tousertensor()
-        return subg
-
-    def _get_msg_index(self):
-        if self._msg_index is None:
-            self._msg_index = utils.zero_index(size=self.number_of_edges())
-        return self._msg_index
-
-    def _set_msg_index(self, index):
-        self._msg_index = index
-
-    @mutation
-    def add_nodes(self, num, data=None):
-        """Add multiple new nodes.
-
-        Parameters
-        ----------
-        num : int
-            Number of nodes to be added.
-        data : dict, optional
-            Feature data of the added nodes.
-
-        Notes
-        -----
-        If new nodes are added with features, and any of the old nodes
-        do not have some of the feature fields, those fields are filled
-        by initializers defined with ``set_n_initializer`` (default filling
-        with zeros).
-
-        Examples
-        --------
-        >>> G = dgl.DGLGraph()
-        >>> g.add_nodes(2)
-        >>> g.number_of_nodes()
-        2
-        >>> g.add_nodes(3)
-        >>> g.number_of_nodes()
-        5
-
-        Adding new nodes with features:
-
-        .. note:: Here we use pytorch syntax for demo. The general idea applies
-            to other frameworks with minor syntax change (e.g. replace
-            ``torch.tensor`` with ``mxnet.ndarray``).
-
-        >>> import torch as th
-        >>> g.add_nodes(2, {'x': th.ones(2, 4)})    # default zero initializer
-        >>> g.ndata['x']
-        tensor([[0., 0., 0., 0.],
-                [0., 0., 0., 0.],
-                [0., 0., 0., 0.],
-                [0., 0., 0., 0.],
-                [0., 0., 0., 0.],
-                [1., 1., 1., 1.],
-                [1., 1., 1., 1.]])
-        """
-        self._graph.add_nodes(num)
-        if data is None:
-            # Initialize feature placeholders if there are features existing
-            self._node_frame.add_rows(num)
-        else:
-            self._node_frame.append(data)
-
-    @mutation
-    def add_edge(self, u, v, data=None):
-        """Add one new edge between u and v.
-
-        Parameters
-        ----------
-        u : int
-            The source node ID.  Must exist in the graph.
-        v : int
-            The destination node ID.  Must exist in the graph.
-        data : dict, optional
-            Feature data of the added edges.
-
-        Notes
-        -----
-        If new edges are added with features, and any of the old edges
-        do not have some of the feature fields, those fields are filled
-        by initializers defined with ``set_e_initializer`` (default filling
-        with zeros).
-
-        Examples
-        --------
-        The following example uses PyTorch backend.
-
-        >>> G = dgl.DGLGraph()
-        >>> G.add_nodes(3)
-        >>> G.add_edge(0, 1)
-
-        Adding new edge with features
-
-        .. note:: Here we use pytorch syntax for demo. The general idea applies
-            to other frameworks with minor syntax change (e.g. replace
-            ``torch.tensor`` with ``mxnet.ndarray``).
-
-        >>> import torch as th
-        >>> G.add_edge(0, 2, {'x': th.ones(1, 4)})
-        >>> G.edges()
-        (tensor([0, 0]), tensor([1, 2]))
-        >>> G.edata['x']
-        tensor([[0., 0., 0., 0.],
-                [1., 1., 1., 1.]])
-        >>> G.edges[0, 2].data['x']
-        tensor([[1., 1., 1., 1.]])
-
-        See Also
-        --------
-        add_edges
-        """
-        self._graph.add_edge(u, v)
-        if data is None:
-            # Initialize feature placeholders if there are features existing
-            self._edge_frame.add_rows(1)
-        else:
-            self._edge_frame.append(data)
-        # resize msg_index and msg_frame
-        if self._msg_index is not None:
-            self._msg_index = self._msg_index.append_zeros(1)
-        self._msg_frame.add_rows(1)
-
-    @mutation
-    def add_edges(self, u, v, data=None):
-        """Add multiple edges for list of source nodes u and destination nodes
-        v.  A single edge is added between every pair of ``u[i]`` and ``v[i]``.
-
-        Parameters
-        ----------
-        u : list, tensor
-            The source node IDs.  All nodes must exist in the graph.
-        v : list, tensor
-            The destination node IDs.  All nodes must exist in the graph.
-        data : dict, optional
-            Feature data of the added edges.
-
-        Notes
-        -----
-        If new edges are added with features, and any of the old edges
-        do not have some of the feature fields, those fields are filled
-        by initializers defined with ``set_e_initializer`` (default filling
-        with zeros).
-
-        Examples
-        --------
-        The following example uses PyTorch backend.
-
-        >>> G = dgl.DGLGraph()
-        >>> G.add_nodes(4)
-        >>> G.add_edges([0, 2], [1, 3]) # add edges (0, 1) and (2, 3)
-
-        Adding new edges with features
-
-        .. note:: Here we use pytorch syntax for demo. The general idea applies
-            to other frameworks with minor syntax change (e.g. replace
-            ``torch.tensor`` with ``mxnet.ndarray``).
-
-        >>> import torch as th
-        >>> G.add_edges([1, 3], [2, 0], {'x': th.ones(2, 4)}) # (1, 2), (3, 0)
-        >>> G.edata['x']
-        tensor([[0., 0., 0., 0.],
-                [0., 0., 0., 0.],
-                [1., 1., 1., 1.],
-                [1., 1., 1., 1.]])
-
-        See Also
-        --------
-        add_edge
-        """
-        u = utils.toindex(u)
-        v = utils.toindex(v)
-        self._graph.add_edges(u, v)
-        num = max(len(u), len(v))
-        if data is None:
-            # Initialize feature placeholders if there are features existing
-            # NOTE: use max due to edge broadcasting syntax
-            self._edge_frame.add_rows(num)
-        else:
-            self._edge_frame.append(data)
-        # initialize feature placeholder for messages
-        if self._msg_index is not None:
-            self._msg_index = self._msg_index.append_zeros(num)
-        self._msg_frame.add_rows(num)
-
-    @mutation
-    def remove_nodes(self, vids):
-        """Remove multiple nodes, edges that have connection with these nodes would also be removed.
-
-        Parameters
-        ----------
-        vids: list, tensor
-            The id of nodes to remove.
-
-        Notes
-        -----
-        The nodes and edges in the graph would be re-indexed after the removal.
-
-        Examples
-        --------
-        The following example uses PyTorch backend.
-
-        >>> import torch as th
-        >>> G = dgl.DGLGraph()
-        >>> G.add_nodes(5, {'x': th.arange(5) * 2})
-        >>> G.add_edges([0, 1, 2, 3, 4], [1, 2, 3, 4, 0], {'x': th.arange(15).view(5, 3)})
-        >>> G.nodes()
-        tensor([0, 1, 2, 3, 4])
-        >>> G.edges()
-        (tensor([0, 1, 2, 3, 4]), tensor([1, 2, 3, 4, 0]))
-        >>> G.ndata['x']
-        tensor([0, 2, 4, 6, 8])
-        >>> G.edata['x']
-        tensor([[ 0,  1,  2],
-                [ 3,  4,  5],
-                [ 6,  7,  8],
-                [ 9, 10, 11],
-                [12, 13, 14]])
-        >>> G.remove_nodes([2, 3])
-        >>> G.nodes()
-        tensor([0, 1, 2]
-        >>> G.edges()
-        (tensor([0, 2]), tensor([1, 0]))
-        >>> G.ndata['x']
-        tensor([0, 2, 8])
-        >>> G.edata['x']
-        tensor([[ 0,  1,  2],
-                [12, 13, 14]])
-
-        See Also
-        --------
-        add_nodes
-        add_edges
-        remove_edges
-        """
-        induced_nodes = utils.set_diff(utils.toindex(self.nodes()), utils.toindex(vids))
-        sgi = self._graph.node_subgraph(induced_nodes)
-
-        num_nodes = len(sgi.induced_nodes)
-        num_edges = len(sgi.induced_edges)
-        if isinstance(self._node_frame, FrameRef):
-            self._node_frame = FrameRef(Frame(self._node_frame[sgi.induced_nodes],
-                                              num_rows=num_nodes))
-        else:
-            self._node_frame = FrameRef(self._node_frame, sgi.induced_nodes)
-
-        if isinstance(self._edge_frame, FrameRef):
-            self._edge_frame = FrameRef(Frame(self._edge_frame[sgi.induced_edges],
-                                              num_rows=num_edges))
-        else:
-            self._edge_frame = FrameRef(self._edge_frame, sgi.induced_edges)
-
-        self._graph = sgi.graph
-
-    @mutation
-    def remove_edges(self, eids):
-        """Remove multiple edges.
-
-        Parameters
-        ----------
-        eids: list, tensor
-            The id of edges to remove.
-
-        Notes
-        -----
-        The edges in the graph would be re-indexed after the removal.  The nodes are preserved.
-
-        Examples
-        --------
-        The following example uses PyTorch backend.
-
-        >>> import torch as th
-        >>> G = dgl.DGLGraph()
-        >>> G.add_nodes(5)
-        >>> G.add_edges([0, 1, 2, 3, 4], [1, 2, 3, 4, 0], {'x': th.arange(15).view(5, 3)})
-        >>> G.nodes()
-        tensor([0, 1, 2, 3, 4])
-        >>> G.edges()
-        (tensor([0, 1, 2, 3, 4]), tensor([1, 2, 3, 4, 0]))
-        >>> G.edata['x']
-        tensor([[ 0,  1,  2],
-                [ 3,  4,  5],
-                [ 6,  7,  8],
-                [ 9, 10, 11],
-                [12, 13, 14]])
-        >>> G.remove_edges([1, 2])
-        >>> G.nodes()
-        tensor([0, 1, 2, 3, 4])
-        >>> G.edges()
-        (tensor([0, 3, 4]), tensor([1, 4, 0]))
-        >>> G.edata['x']
-        tensor([[ 0,  1,  2],
-                [ 9, 10, 11],
-                [12, 13, 14]])
-
-        See Also
-        --------
-        add_nodes
-        add_edges
-        remove_nodes
-        """
-        induced_edges = utils.set_diff(
-            utils.toindex(range(self.number_of_edges())), utils.toindex(eids))
-        sgi = self._graph.edge_subgraph(induced_edges, preserve_nodes=True)
-
-        num_nodes = len(sgi.induced_nodes)
-        num_edges = len(sgi.induced_edges)
-        if isinstance(self._node_frame, FrameRef):
-            self._node_frame = FrameRef(Frame(self._node_frame[sgi.induced_nodes],
-                                              num_rows=num_nodes))
-        else:
-            self._node_frame = FrameRef(self._node_frame, sgi.induced_nodes)
-
-        if isinstance(self._edge_frame, FrameRef):
-            self._edge_frame = FrameRef(Frame(self._edge_frame[sgi.induced_edges],
-                                              num_rows=num_edges))
-        else:
-            self._edge_frame = FrameRef(self._edge_frame, sgi.induced_edges)
-
-        self._graph = sgi.graph
-
-    @property
-    def parent_nid(self):
-        """Get the parent node ids.
-
-        The returned tensor can be used as a map from the node id
-        in this subgraph to the node id in the parent graph.
-
-        Returns
-        -------
-        Tensor
-            The parent node id array.
-
-        Notes
-        -----
-        The parent node id information is stored in ``_ID`` field in the
-        node frame of the graph, so please do not manually change
-        this field.
-        """
-        if self._parent is None:
-            raise DGLError("We only support parent_nid for subgraphs.")
-        return self.ndata[NID]
-
-    @property
-    def parent_eid(self):
-        """Get the parent edge ids.
-
-        The returned tensor can be used as a map from the edge id
-        in this subgraph to the edge id in the parent graph.
-
-        Returns
-        -------
-        Tensor
-            The parent edge id array.
-
-        Notes
-        -----
-        The parent edge id information is stored in ``_ID`` field in the
-        edge frame of the graph, so please do not manually change
-        this field.
-        """
-        if self._parent is None:
-            raise DGLError("We only support parent_eid for subgraphs.")
-        return self.edata[EID]
-
-    def copy_to_parent(self, inplace=False):
-        """Write node/edge features to the parent graph.
-
-        Parameters
-        ----------
-        inplace : bool
-            If true, use inplace write (no gradient but faster)
-
-        Examples
-        --------
-        >>> import dgl
-        >>> import torch as th
-        >>> g = dgl.DGLGraph()
-        >>> g.add_nodes(5)                  # Create a DGLGraph with 5 nodes
-        >>> g.add_edges([0,1,2,3,4], [1,2,3,4,0])
-        >>> subg.ndata['h'] = th.rand(4, 3)
-        >>> subg.edata['h'] = th.rand(3, 3)
-        >>> subg.ndata
-        {'_ID': tensor([0, 1, 3, 4]), 'h': tensor([[0.3803, 0.9351, 0.0611],
-                [0.6492, 0.4327, 0.3610],
-                [0.7471, 0.4257, 0.4130],
-                [0.9766, 0.6280, 0.6075]])}
-        >>> subg.edata
-        {'_ID': tensor([0, 3, 4]), 'h': tensor([[0.8192, 0.2409, 0.6278],
-                [0.9600, 0.3501, 0.8037],
-                [0.6521, 0.9029, 0.4901]])}
-        >>> g
-        DGLGraph(num_nodes=5, num_edges=5,
-                ndata_schemes={}
-                edata_schemes={})
-        >>> subg.copy_to_parent()
-        >>> g.ndata
-        {'h': tensor([[0.3803, 0.9351, 0.0611],
-                [0.6492, 0.4327, 0.3610],
-                [0.0000, 0.0000, 0.0000],
-                [0.7471, 0.4257, 0.4130],
-                [0.9766, 0.6280, 0.6075]])}
-        >>> g.edata
-        {'h': tensor([[0.8192, 0.2409, 0.6278],
-                [0.0000, 0.0000, 0.0000],
-                [0.0000, 0.0000, 0.0000],
-                [0.9600, 0.3501, 0.8037],
-                [0.6521, 0.9029, 0.4901]])}
-
-        Notes
-        -----
-        This API excludes the ``_ID`` field in both node frame and edge frame.
-        This being said if user take a subgraph ``sg`` of a graph ``g`` and
-        apply :func:`~dgl.copy_from_parent` on ``sg``, it would not polluate the
-        ``_ID`` field of node/edge frame of ``g``.
-
-        See Also
-        --------
-        """
-        if self._parent is None:
-            raise DGLError("We only support copy_to_parent for subgraphs.")
-        nids = self.ndata.pop(NID)
-        eids = self.edata.pop(EID)
-        self._parent._node_frame.update_rows(
-            utils.toindex(nids), self._node_frame, inplace=inplace)
-        if self._parent._edge_frame.num_rows != 0:
-            self._parent._edge_frame.update_rows(
-                utils.toindex(eids), self._edge_frame, inplace=inplace)
-        self.ndata[NID] = nids
-        self.edata[EID] = eids
-
-    def copy_from_parent(self):
-        """Copy node/edge features from the parent graph.
-
-        All old features will be removed.
-
-        Examples
-        --------
-        >>> import dgl
-        >>> import torch as th
-        >>> g = dgl.DGLGraph()
-        >>> g.add_nodes(5)                  # Create a DGLGraph with 5 nodes
-        >>> g.add_edges([0,1,2,3,4], [1,2,3,4,0])
-        >>> g.ndata['h'] = th.rand(5, 3)
-        >>> g.ndata['h']
-        tensor([[0.3749, 0.5681, 0.4749],
-                [0.6312, 0.7955, 0.3682],
-                [0.0215, 0.0303, 0.0282],
-                [0.8840, 0.6842, 0.3645],
-                [0.9253, 0.8427, 0.6626]])
-        >>> g.edata['h'] = th.rand(5, 3)
-        >>> g.edata['h']
-        tensor([[0.0659, 0.8552, 0.9208],
-                [0.8238, 0.0332, 0.7864],
-                [0.1629, 0.4149, 0.1363],
-                [0.0648, 0.6582, 0.4400],
-                [0.4321, 0.1612, 0.7893]])
-        >>> g
-        DGLGraph(num_nodes=5, num_edges=5,
-                ndata_schemes={'h': Scheme(shape=(3,), dtype=torch.float32)}
-                edata_schemes={'h': Scheme(shape=(3,), dtype=torch.float32)})
-        >>> subg = g.subgraph([0,1,3,4])    # Take subgraph induced by node 0,1,3,4
-        >>> subg                            # '_ID' field records node/edge mapping
-        DGLGraph(num_nodes=4, num_edges=3,
-                ndata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)}
-                edata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)})
-        >>> subg.copy_from_parent()
-        >>> subg.ndata
-        {'h': tensor([[0.3749, 0.5681, 0.4749],
-                [0.6312, 0.7955, 0.3682],
-                [0.8840, 0.6842, 0.3645],
-                [0.9253, 0.8427, 0.6626]]), '_ID': tensor([0, 1, 3, 4])}
-        >>> subg.edata
-        {'h': tensor([[0.0659, 0.8552, 0.9208],
-                [0.0648, 0.6582, 0.4400],
-                [0.4321, 0.1612, 0.7893]]), '_ID': tensor([0, 3, 4])}
-
-        Notes
-        -----
-        This API excludes the ``_ID`` field in both node frame and edge frame.
-        This being said if user take a subgraph ``sg1`` of a subgraph ``sg``
-        whose ``_ID`` field in node/edge frame is not None and
-        apply :func:`~dgl.copy_from_parent` on ``sg1``, it would not polluate
-        the ``_ID`` field of node/edge frame of ``sg1``.
-
-        See Also
-        --------
-        subgraph
-        edge_subgraph
-        parent_nid
-        parent_eid
-        copy_to_parent
-        map_to_subgraph_nid
-        """
-        if self._parent is None:
-            raise DGLError("We only support copy_from_parent for subgraphs.")
-        nids = self.ndata[NID]
-        eids = self.edata[EID]
-        if self._parent._node_frame.num_rows != 0 and self._parent._node_frame.num_columns != 0:
-            self._node_frame = FrameRef(Frame(
-                self._parent._node_frame[utils.toindex(nids)]))
-        if self._parent._edge_frame.num_rows != 0 and self._parent._edge_frame.num_columns != 0:
-            self._edge_frame = FrameRef(Frame(
-                self._parent._edge_frame[utils.toindex(eids)]))
-        self.ndata[NID] = nids
-        self.edata[NID] = eids
-
-    def map_to_subgraph_nid(self, parent_vids):
-        """Map the node Ids in the parent graph to the node Ids in the subgraph.
-
-        Parameters
-        ----------
-        parent_vids : list, tensor
-            The node ID array in the parent graph.
-
-        Returns
-        -------
-        tensor
-            The node ID array in the subgraph.
-
-        Examples
-        --------
-        >>> import dgl
-        >>> g = dgl.DGLGraph()
-        >>> g.add_nodes(5)
-        >>> sg = g.subgrph([0,2,4])
-        >>> sg.map_to_subgraph([2,4])
-        tensor([1, 2])
-
-        See Also
-        --------
-        subgraph
-        edge_subgraph
-        parent_nid
-        parent_eid
-        copy_to_parent
-        copy_from_parent
-        """
-        if self._parent is None:
-            raise DGLError("We only support map_to_subgraph_nid for subgraphs.")
-        v = graph_index.map_to_subgraph_nid(
-            utils.toindex(self.ndata[NID]), utils.toindex(parent_vids))
-        return v.tousertensor()
-
-    def flatten(self):
-        """Remove all batching information of the graph, and regard the current
-        graph as an independent graph rather then a batched graph.
-        Graph topology and attributes would not be affected.
-
-        User can change the structure of the flattened graph.
-
-        Examples
-        --------
-        >>> import dgl
-        >>> import torch as th
-        >>> g_list = []
-        >>> for _ in range(3)            # Create three graphs, each with #nodes 4
-        >>>     g = dgl.DGLGraph()
-        >>>     g.add_nodes(4)
-        >>>     g.add_edges([0,1,2,3], [1,2,3,0])
-        >>>     g.ndata['h'] = th.rand(4, 3)
-        >>>     g_list.append(g)
-        >>> bg = dgl.batch(g_list)
-        >>> bg.ndata
-        {'h': tensor([[0.0463, 0.1251, 0.5967],
-                [0.8633, 0.9812, 0.8601],
-                [0.7828, 0.3624, 0.7845],
-                [0.2169, 0.8761, 0.3237],
-                [0.1752, 0.1478, 0.5611],
-                [0.5279, 0.2556, 0.2304],
-                [0.8950, 0.8203, 0.5604],
-                [0.2999, 0.2946, 0.2676],
-                [0.3419, 0.2935, 0.6618],
-                [0.8137, 0.8927, 0.8953],
-                [0.6229, 0.7153, 0.5041],
-                [0.5659, 0.0612, 0.2351]])}
-        >>> bg.batch_size
-        3
-        >>> bg.batch_num_nodes
-        [4, 4, 4]
-        >>> bg.batch_num_edges
-        [4, 4, 4]
-        >>> bg.flatten()
-        >>> bg.batch_size
-        1
-        >>> bg.batch_num_nodes
-        [12]
-        >>> bg.batch_num_edges
-        [12]
-        >>> bg.remove_nodes([1,3,5,7,9,11])
-        >>> bg.ndata
-        {'h': tensor([[0.0463, 0.1251, 0.5967],
-                [0.7828, 0.3624, 0.7845],
-                [0.1752, 0.1478, 0.5611],
-                [0.8950, 0.8203, 0.5604],
-                [0.3419, 0.2935, 0.6618],
-                [0.6229, 0.7153, 0.5041]])}
-        """
-        self._batch_num_nodes = None
-        self._batch_num_edges = None
-
-    def detach_parent(self):
-        """Detach the current graph from its parent, and regard the current graph
-        as an independent graph rather then a subgraph.
-        Graph topology and attributes would not be affected.
-
-        User can change the structure of the detached graph.
-
-        Examples
-        --------
-        >>> import dgl
-        >>> import torch as th
-        >>> g = dgl.DGLGraph()              # Graph 1
-        >>> g.add_nodes(5)
-        >>> g.ndata['h'] = th.rand(5, 3)
-        >>> g.ndata['h']
-        {'h': tensor([[0.9595, 0.7450, 0.5495],
-                [0.8253, 0.2902, 0.4393],
-                [0.3783, 0.4548, 0.6075],
-                [0.2323, 0.0936, 0.6580],
-                [0.1624, 0.3484, 0.3750]])}
-        >>> subg = g.subgraph([0,1,3])      # Create a subgraph
-        >>> subg.parent                     # Get the parent reference of subg
-        DGLGraph(num_nodes=5, num_edges=0,
-                 ndata_schemes={'h': Scheme(shape=(3,), dtype=torch.float32)}
-                 edata_schemes={})
-        >>> subg.copy_from_parent()
-        >>> subg.detach_parent()            # Detach the subgraph from its parent
-        >>> subg.parent == None
-        True
-        >>> subg.add_nodes(1)               # Change the structure of the subgraph
-        >>> subg
-        DGLGraph(num_nodes=4, num_edges=0,
-                 ndata_schemes={'h': Scheme(shape=(3,), dtype=torch.float32)}
-                 edata_schemes={})
-        >>> subg.ndata
-        {'h': tensor([[0.9595, 0.7450, 0.5495],
-                [0.8253, 0.2902, 0.4393],
-                [0.2323, 0.0936, 0.6580],
-                [0.0000, 0.0000, 0.0000]])}
-        """
-        self._parent = None
-        self.ndata.pop(NID)
-        self.edata.pop(EID)
-
-    def clear(self):
-        """Remove all nodes and edges, as well as their features, from the
-        graph.
-
-        Examples
-        --------
-        >>> G = dgl.DGLGraph()
-        >>> G.add_nodes(4)
-        >>> G.add_edges([0, 1, 2, 3], [1, 2, 3, 0])
-        >>> G.number_of_nodes()
-        4
-        >>> G.number_of_edges()
-        4
-        >>> G.clear()
-        >>> G.number_of_nodes()
-        0
-        >>> G.number_of_edges()
-        0
-        """
-        self._graph.clear()
-        self._node_frame.clear()
-        self._edge_frame.clear()
-        self._msg_index = None
-        self._msg_frame.clear()
-
-    def clear_cache(self):
-        """Clear all cached graph structures such as adjmat.
-
-        By default, all graph structure related sparse matrices (e.g. adjmat, incmat)
-        are cached so they could be reused with the cost of extra memory consumption.
-        This function can be used to clear the cached matrices if memory is an issue.
-        """
-        self._graph.clear_cache()
-
-    def to_networkx(self, node_attrs=None, edge_attrs=None):
-        """Convert to networkx graph.
-
-        The edge id will be saved as the 'id' edge attribute.
-
-        Parameters
-        ----------
-        node_attrs : iterable of str, optional
-            The node attributes to be copied.
-        edge_attrs : iterable of str, optional
-            The edge attributes to be copied.
-
-        Returns
-        -------
-        networkx.DiGraph
-            The nx graph
-
-        Examples
-        --------
-
-        .. note:: Here we use pytorch syntax for demo. The general idea applies
-            to other frameworks with minor syntax change (e.g. replace
-            ``torch.tensor`` with ``mxnet.ndarray``).
-
-        >>> import torch as th
-        >>> g = DGLGraph()
-        >>> g.add_nodes(5, {'n1': th.randn(5, 10)})
-        >>> g.add_edges([0,1,3,4], [2,4,0,3], {'e1': th.randn(4, 6)})
-        >>> nxg = g.to_networkx(node_attrs=['n1'], edge_attrs=['e1'])
-        """
-        nx_graph = self._graph.to_networkx()
-        if node_attrs is not None:
-            for nid, attr in nx_graph.nodes(data=True):
-                feat_dict = self.get_n_repr(nid)
-                attr.update({key: F.squeeze(feat_dict[key], 0) for key in node_attrs})
-        if edge_attrs is not None:
-            for _, _, attr in nx_graph.edges(data=True):
-                eid = attr['id']
-                feat_dict = self.get_e_repr(eid)
-                attr.update({key: F.squeeze(feat_dict[key], 0) for key in edge_attrs})
-        return nx_graph
-
-    def from_networkx(self, nx_graph, node_attrs=None, edge_attrs=None):
-        """Convert from networkx graph.
-
-        If 'id' edge attribute exists, the edge will be added follows
-        the edge id order. Otherwise, order is undefined.
-
-        Parameters
-        ----------
-        nx_graph : networkx.DiGraph
-            If the node labels of ``nx_graph`` are not consecutive
-            integers, its nodes will be relabeled using consecutive integers.
-            The new node ordering will inherit that of ``sorted(nx_graph.nodes())``
-        node_attrs : iterable of str, optional
-            The node attributes needs to be copied.
-        edge_attrs : iterable of str, optional
-            The edge attributes needs to be copied.
-
-        Examples
-        --------
-
-        .. note:: Here we use pytorch syntax for demo. The general idea applies
-            to other frameworks with minor syntax change (e.g. replace
-            ``torch.tensor`` with ``mxnet.ndarray``).
-
-        >>> import torch as th
-        >>> import networkx as nx
-        >>> nxg = nx.DiGraph()
-        >>> nxg.add_edge(0, 1, id=0, e1=5, e2=th.zeros(4))
-        >>> nxg.add_edge(2, 3, id=2, e1=6, e2=th.ones(4))
-        >>> nxg.add_edge(1, 2, id=1, e1=2, e2=th.full((4,), 2))
-        >>> g = dgl.DGLGraph()
-        >>> g.from_networkx(nxg, edge_attrs=['e1', 'e2'])
-        >>> g.edata['e1']
-        tensor([5, 2, 6])
-        >>> g.edata['e2']
-        tensor([[0., 0., 0., 0.],
-                [2., 2., 2., 2.],
-                [1., 1., 1., 1.]])
-        """
-        # Relabel nodes using consecutive integers
-        nx_graph = nx.convert_node_labels_to_integers(nx_graph, ordering='sorted')
-        # With to_directed we will get a directed version of the original networkx
-        # graph, with the original nodes, edges and their attributes preserved.
-        # This is particularly helpful when we are also converting the edge attributes
-        # as the reversed edges (u, v) will be created with the same attributes as the
-        # original edges (v, u).
-        if not nx_graph.is_directed():
-            nx_graph = nx_graph.to_directed()
-
-        self.clear()
-        self._graph = graph_index.from_networkx(nx_graph, self.is_readonly)
-        self._node_frame.add_rows(self.number_of_nodes())
-        self._edge_frame.add_rows(self.number_of_edges())
-        self._msg_frame.add_rows(self.number_of_edges())
-
-        # copy attributes
-        def _batcher(lst):
-            if F.is_tensor(lst[0]):
-                return F.cat([F.unsqueeze(x, 0) for x in lst], dim=0)
-            else:
-                return F.tensor(lst)
-        if node_attrs is not None:
-            # mapping from feature name to a list of tensors to be concatenated
-            attr_dict = defaultdict(list)
-            for nid in range(self.number_of_nodes()):
-                for attr in node_attrs:
-                    attr_dict[attr].append(nx_graph.nodes[nid][attr])
-            for attr in node_attrs:
-                self._node_frame[attr] = _batcher(attr_dict[attr])
-        if edge_attrs is not None:
-            has_edge_id = 'id' in next(iter(nx_graph.edges(data=True)))[-1]
-            # mapping from feature name to a list of tensors to be concatenated
-            attr_dict = defaultdict(lambda: [None] * self.number_of_edges())
-            # each defaultdict value is initialized to be a list of None
-            # None here serves as placeholder to be replaced by feature with
-            # corresponding edge id
-            if has_edge_id:
-                num_edges = self.number_of_edges()
-                for _, _, attrs in nx_graph.edges(data=True):
-                    if attrs['id'] >= num_edges:
-                        raise DGLError('Expect the pre-specified edge ids to be'
-                                       ' smaller than the number of edges --'
-                                       ' {}, got {}.'.format(num_edges, attrs['id']))
-                    for key in edge_attrs:
-                        attr_dict[key][attrs['id']] = attrs[key]
-            else:
-                # XXX: assuming networkx iteration order is deterministic
-                #      so the order is the same as graph_index.from_networkx
-                for eid, (_, _, attrs) in enumerate(nx_graph.edges(data=True)):
-                    for key in edge_attrs:
-                        attr_dict[key][eid] = attrs[key]
-            for attr in edge_attrs:
-                for val in attr_dict[attr]:
-                    if val is None:
-                        raise DGLError('Not all edges have attribute {}.'.format(attr))
-                self._edge_frame[attr] = _batcher(attr_dict[attr])
-
-    def from_scipy_sparse_matrix(self, spmat, multigraph=None):
-        """ Convert from scipy sparse matrix.
-
-        Parameters
-        ----------
-        spmat : scipy sparse matrix
-            The graph's adjacency matrix
-
-        multigraph : bool, optional
-            Deprecated (Will be deleted in the future).
-            Whether the graph would be a multigraph. If the input scipy sparse matrix is CSR,
-            this argument is ignored.
-
-        Examples
-        --------
-        >>> from scipy.sparse import coo_matrix
-        >>> row = np.array([0, 3, 1, 0])
-        >>> col = np.array([0, 3, 1, 2])
-        >>> data = np.array([4, 5, 7, 9])
-        >>> a = coo_matrix((data, (row, col)), shape=(4, 4))
-        >>> g = dgl.DGLGraph()
-        >>> g.from_scipy_sparse_matrix(a)
-        """
-        self.clear()
-        if multigraph is not None:
-            dgl_warning("multigraph will be deprecated." \
-                        "DGL will treat all graphs as multigraph in the future.")
-
-        self._graph = graph_index.from_scipy_sparse_matrix(spmat, self.is_readonly)
-        self._node_frame.add_rows(self.number_of_nodes())
-        self._edge_frame.add_rows(self.number_of_edges())
-        self._msg_frame.add_rows(self.number_of_edges())
-
-    def node_attr_schemes(self):
-        """Return the node feature schemes.
-
-        Each feature scheme is a named tuple that stores the shape and data type
-        of the node feature
-
-        Returns
-        -------
-        dict of str to schemes
-            The schemes of node feature columns.
-
-        Examples
-        --------
-        >>> G = dgl.DGLGraph()
-        >>> G.add_nodes(3)
-        >>> G.ndata['x'] = torch.zeros((3,5))
-        >>> G.node_attr_schemes()
-        {'x': Scheme(shape=(5,), dtype=torch.float32)}
-        """
-        return self._node_frame.schemes
-
-    def edge_attr_schemes(self):
-        """Return the edge feature schemes.
-
-        Each feature scheme is a named tuple that stores the shape and data type
-        of the node feature
-
-        Returns
-        -------
-        dict of str to schemes
-            The schemes of edge feature columns.
-
-        Examples
-        --------
-
-        .. note:: Here we use pytorch syntax for demo. The general idea applies
-            to other frameworks with minor syntax change (e.g. replace
-            ``torch.tensor`` with ``mxnet.ndarray``).
-
-        >>> import torch as th
-        >>> G = dgl.DGLGraph()
-        >>> G.add_nodes(3)
-        >>> G.add_edges([0, 1], 2)  # 0->2, 1->2
-        >>> G.edata['y'] = th.zeros((2, 4))
-        >>> G.edge_attr_schemes()
-        {'y': Scheme(shape=(4,), dtype=torch.float32)}
-        """
-        return self._edge_frame.schemes
-
-    def set_n_initializer(self, initializer, field=None):
-        """Set the initializer for empty node features.
-
-        Initializer is a callable that returns a tensor given the shape, data type
-        and device context.
-
-        When a subset of the nodes are assigned a new feature, initializer is
-        used to create feature for rest of the nodes.
-
-        Parameters
-        ----------
-        initializer : callable
-            The initializer.
-        field : str, optional
-            The feature field name. Default is set an initializer for all the
-            feature fields.
-
-        Examples
-        --------
-
-        .. note:: Here we use pytorch syntax for demo. The general idea applies
-            to other frameworks with minor syntax change (e.g. replace
-            ``torch.tensor`` with ``mxnet.ndarray``).
-
-        >>> import torch as th
-        >>> G = dgl.DGLGraph()
-        >>> G.add_nodes(3)
-
-        Set initializer for all node features
-
-        >>> G.set_n_initializer(dgl.init.zero_initializer)
-
-        Set feature for partial nodes
-
-        >>> G.nodes[[0, 2]].data['x'] = th.ones((2, 5))
-        >>> G.ndata
-        {'x' : tensor([[1., 1., 1., 1., 1.],
-                       [0., 0., 0., 0., 0.],
-                       [1., 1., 1., 1., 1.]])}
-
-        Note
-        -----
-        User defined initializer must follow the signature of
-        :func:`dgl.init.base_initializer() <dgl.init.base_initializer>`
-
-        """
-        self._node_frame.set_initializer(initializer, field)
-
-    def set_e_initializer(self, initializer, field=None):
-        """Set the initializer for empty edge features.
-
-        Initializer is a callable that returns a tensor given the shape, data
-        type and device context.
-
-        When a subset of the edges are assigned a new feature, initializer is
-        used to create feature for rest of the edges.
-
-        Parameters
-        ----------
-        initializer : callable
-            The initializer.
-        field : str, optional
-            The feature field name. Default is set an initializer for all the
-            feature fields.
-
-        Examples
-        --------
-
-        .. note:: Here we use pytorch syntax for demo. The general idea applies
-            to other frameworks with minor syntax change (e.g. replace
-            ``torch.tensor`` with ``mxnet.ndarray``).
-
-        >>> import torch as th
-        >>> G = dgl.DGLGraph()
-        >>> G.add_nodes(3)
-        >>> G.add_edges([0, 1], 2)  # 0->2, 1->2
-
-        Set initializer for edge features
-
-        >>> G.set_e_initializer(dgl.init.zero_initializer)
-
-        Set feature for partial edges
-
-        >>> G.edges[1, 2].data['y'] = th.ones((1, 4))
-        >>> G.edata
-        {'y' : tensor([[0., 0., 0., 0.],
-                       [1., 1., 1., 1.]])}
-
-        Note
-        -----
-        User defined initializer must follow the signature of
-        :func:`dgl.init.base_initializer() <dgl.init.base_initializer>`
-        """
-        self._edge_frame.set_initializer(initializer, field)
-
-    @property
-    def nodes(self):
-        """Return a node view that can used to set/get feature data.
-
-        Examples
-        --------
-
-        >>> G = dgl.DGLGraph()
-        >>> G.add_nodes(3)
-
-        Get nodes in graph G:
-
-        >>> G.nodes()
-        tensor([0, 1, 2])
-
-        Get feature dictionary of all nodes:
-
-        >>> G.nodes[:].data
-        {}
-
-        The above can be abbreviated as
-
-        >>> G.ndata
-        {}
-
-        Init all 3 nodes with zero vector(len=5)
-
-        .. note:: Here we use pytorch syntax for demo. The general idea applies
-            to other frameworks with minor syntax change (e.g. replace
-            ``torch.tensor`` with ``mxnet.ndarray``).
-
-        >>> import torch as th
-        >>> G.ndata['x'] = th.zeros((3, 5))
-        >>> G.ndata['x']
-        {'x' : tensor([[0., 0., 0., 0., 0.],
-                       [0., 0., 0., 0., 0.],
-                       [0., 0., 0., 0., 0.]])}
-
-        Use G.nodes to get/set features for some nodes.
-
-        >>> G.nodes[[0, 2]].data['x'] = th.ones((2, 5))
-        >>> G.ndata
-        {'x' : tensor([[1., 1., 1., 1., 1.],
-                       [0., 0., 0., 0., 0.],
-                       [1., 1., 1., 1., 1.]])}
-
-        See Also
-        --------
-        dgl.DGLGraph.ndata
-
-        """
-        return NodeView(self)
-
-    @property
-    def ndata(self):
-        """Return the data view of all the nodes.
-
-        DGLGraph.ndata is an abbreviation of DGLGraph.nodes[:].data
-
-        See Also
-        --------
-        dgl.DGLGraph.nodes
-        """
-        return self.nodes[:].data
-
-    @property
-    def srcdata(self):
-        """Compatibility interface with heterogeneous graphs; identical to ``ndata``"""
-        return self.ndata
-
-    @property
-    def dstdata(self):
-        """Compatibility interface with heterogeneous graphs; identical to ``ndata``"""
-        return self.ndata
-
-    @property
-    def edges(self):
-        """Return a edges view that can used to set/get feature data.
-
-        >>> G = dgl.DGLGraph()
-        >>> G.add_nodes(3)
-        >>> G.add_edges([0, 1], 2)  # 0->2, 1->2
-
-        Get edges in graph G:
-
-        >>> G.edges()
-        (tensor([0, 1]), tensor([2, 2]))
-
-        Get feature dictionary of all edges:
-
-        >>> G.edges[:].data
-        {}
-
-        The above can be abbreviated as
-
-        >>> G.edata
-        {}
-
-        Init 2 edges with zero vector(len=4)
-
-        .. note:: Here we use pytorch syntax for demo. The general idea applies
-            to other frameworks with minor syntax change (e.g. replace
-            ``torch.tensor`` with ``mxnet.ndarray``).
-
-        >>> import torch as th
-        >>> G.edata['y'] = th.zeros((2, 4))
-        >>> G.edata
-        {'y' : tensor([[0., 0., 0., 0.],
-                       [0., 0., 0., 0.]])}
-
-        Use G.edges to get/set features for some edges.
-
-        >>> G.edges[1, 2].data['y'] = th.ones((1, 4))
-        >>> G.edata
-        {'y' : tensor([[0., 0., 0., 0.],
-                       [1., 1., 1., 1.]])}
-
-        See Also
-        --------
-        dgl.DGLGraph.edata
-        """
-        return EdgeView(self)
-
-    @property
-    def edata(self):
-        """Return the data view of all the edges.
-
-        DGLGraph.data is an abbreviation of DGLGraph.edges[:].data
-
-        See Also
-        --------
-        dgl.DGLGraph.edges
-        """
-        return self.edges[:].data
-
-    @property
-    def batch_size(self):
-        """Number of graphs in this batch.
-
-        Returns
-        -------
-        int
-            Number of graphs in this batch."""
-        return 1 if self.batch_num_nodes is None else len(self.batch_num_nodes)
-
-    @property
-    def batch_num_nodes(self):
-        """Number of nodes of each graph in this batch.
-
-        Returns
-        -------
-        list
-            Number of nodes of each graph in this batch."""
-        if self._batch_num_nodes is None:
-            return [self.number_of_nodes()]
-        else:
-            return self._batch_num_nodes
-
-    @property
-    def batch_num_edges(self):
-        """Number of edges of each graph in this batch.
-
-        Returns
-        -------
-        list
-            Number of edges of each graph in this batch."""
-        if self._batch_num_edges is None:
-            return [self.number_of_edges()]
-        else:
-            return self._batch_num_edges
-
-    @property
-    def parent(self):
-        """If current graph is a subgraph of a parent graph, return
-        its parent graph, else return None.
-
-        Returns
-        -------
-        DGLGraph or None
-            The parent graph of current graph.
-        """
-        return self._parent
-
-    def init_ndata(self, ndata_name, shape, dtype, ctx=F.cpu()):
-        """Create node embedding.
-
-        It first creates the node embedding in the server and maps it to the current process
-        with shared memory.
-
-        Parameters
-        ----------
-        ndata_name : string
-            The name of node embedding
-        shape : tuple
-            The shape of the node embedding
-        dtype : string
-            The data type of the node embedding. The currently supported data types
-            are "float32" and "int32".
-        ctx : DGLContext
-            The column context.
-        """
-        scheme = Scheme(tuple(shape[1:]), F.data_type_dict[dtype])
-        self._node_frame._frame.add_column(ndata_name, scheme, ctx)
-
-    def init_edata(self, edata_name, shape, dtype, ctx=F.cpu()):
-        """Create edge embedding.
-
-        It first creates the edge embedding in the server and maps it to the current process
-        with shared memory.
-
-        Parameters
-        ----------
-        edata_name : string
-            The name of edge embedding
-        shape : tuple
-            The shape of the edge embedding
-        dtype : string
-            The data type of the edge embedding. The currently supported data types
-            are "float32" and "int32".
-        ctx : DGLContext
-            The column context.
-        """
-        scheme = Scheme(tuple(shape[1:]), F.data_type_dict[dtype])
-        self._edge_frame._frame.add_column(edata_name, scheme, ctx)
-
-
-    def set_n_repr(self, data, u=ALL, inplace=False):
-        """Set node(s) representation.
-
-        `data` is a dictionary from the feature name to feature tensor. Each tensor
-        is of shape (B, D1, D2, ...), where B is the number of nodes to be updated,
-        and (D1, D2, ...) be the shape of the node representation tensor. The
-        length of the given node ids must match B (i.e, len(u) == B).
-
-        All update will be done out of place to work with autograd unless the
-        inplace flag is true.
-
-        Parameters
-        ----------
-        data : dict of tensor
-            Node representation.
-        u : node, container or tensor
-            The node(s).
-        inplace : bool
-            If True, update will be done in place, but autograd will break.
-        """
-        # sanity check
-        if not utils.is_dict_like(data):
-            raise DGLError('Expect dictionary type for feature data.'
-                           ' Got "%s" instead.' % type(data))
-        if is_all(u):
-            num_nodes = self.number_of_nodes()
-        else:
-            u = utils.toindex(u)
-            num_nodes = len(u)
-        for key, val in data.items():
-            nfeats = F.shape(val)[0]
-            if nfeats != num_nodes:
-                raise DGLError('Expect number of features to match number of nodes (len(u)).'
-                               ' Got %d and %d instead.' % (nfeats, num_nodes))
-        # set
-        if is_all(u):
-            for key, val in data.items():
-                self._node_frame[key] = val
-        else:
-            self._node_frame.update_rows(u, data, inplace=inplace)
-
-    def get_n_repr(self, u=ALL):
-        """Get node(s) representation.
-
-        The returned feature tensor batches multiple node features on the first dimension.
-
-        Parameters
-        ----------
-        u : node, container or tensor
-            The node(s).
-
-        Returns
-        -------
-        dict
-            Representation dict from feature name to feature tensor.
-        """
-        if len(self.node_attr_schemes()) == 0:
-            return dict()
-        if is_all(u):
-            return dict(self._node_frame)
-        else:
-            u = utils.toindex(u)
-            return self._node_frame.select_rows(u)
-
-    def pop_n_repr(self, key):
-        """Get and remove the specified node repr.
-
-        Parameters
-        ----------
-        key : str
-            The attribute name.
-
-        Returns
-        -------
-        Tensor
-            The popped representation
-        """
-        return self._node_frame.pop(key)
-
-    def set_e_repr(self, data, edges=ALL, inplace=False):
-        """Set edge(s) representation.
-
-        `data` is a dictionary from the feature name to feature tensor. Each tensor
-        is of shape (B, D1, D2, ...), where B is the number of edges to be updated,
-        and (D1, D2, ...) be the shape of the edge representation tensor.
-
-        All update will be done out of place to work with autograd unless the
-        inplace flag is true.
-
-        Parameters
-        ----------
-        data : tensor or dict of tensor
-            Edge representation.
-        edges : edges
-            Edges can be a pair of endpoint nodes (u, v), or a
-            tensor of edge ids. The default value is all the edges.
-        inplace : bool
-            If True, update will be done in place, but autograd will break.
-        """
-        # parse argument
-        if is_all(edges):
-            eid = ALL
-        elif isinstance(edges, tuple):
-            u, v = edges
-            u = utils.toindex(u)
-            v = utils.toindex(v)
-            # Rewrite u, v to handle edge broadcasting and multigraph.
-            _, _, eid = self._graph.edge_ids(u, v)
-        else:
-            eid = utils.toindex(edges)
-
-        # sanity check
-        if not utils.is_dict_like(data):
-            raise DGLError('Expect dictionary type for feature data.'
-                           ' Got "%s" instead.' % type(data))
-
-        if is_all(eid):
-            num_edges = self.number_of_edges()
-        else:
-            eid = utils.toindex(eid)
-            num_edges = len(eid)
-        for key, val in data.items():
-            nfeats = F.shape(val)[0]
-            if nfeats != num_edges:
-                raise DGLError('Expect number of features to match number of edges.'
-                               ' Got %d and %d instead.' % (nfeats, num_edges))
-        # set
-        if is_all(eid):
-            # update column
-            for key, val in data.items():
-                self._edge_frame[key] = val
-        else:
-            # update row
-            self._edge_frame.update_rows(eid, data, inplace=inplace)
-
-    def get_e_repr(self, edges=ALL):
-        """Get edge(s) representation.
-
-        Parameters
-        ----------
-        edges : edges
-            Edges can be a pair of endpoint nodes (u, v), or a
-            tensor of edge ids. The default value is all the edges.
-
-        Returns
-        -------
-        dict
-            Representation dict
-        """
-        if len(self.edge_attr_schemes()) == 0:
-            return dict()
-        # parse argument
-        if is_all(edges):
-            eid = ALL
-        elif isinstance(edges, tuple):
-            u, v = edges
-            u = utils.toindex(u)
-            v = utils.toindex(v)
-            # Rewrite u, v to handle edge broadcasting and multigraph.
-            _, _, eid = self._graph.edge_ids(u, v)
-        else:
-            eid = utils.toindex(edges)
-
-        if is_all(eid):
-            return dict(self._edge_frame)
-        else:
-            eid = utils.toindex(eid)
-            return self._edge_frame.select_rows(eid)
-
-    def pop_e_repr(self, key):
-        """Get and remove the specified edge repr.
-
-        Parameters
-        ----------
-        key : str
-          The attribute name.
-
-        Returns
-        -------
-        Tensor
-            The popped representation
-        """
-        return self._edge_frame.pop(key)
-
-    def register_message_func(self, func):
-        """Register global message function.
-
-        Once registered, ``func`` will be used as the default
-        message function in message passing operations, including
-        :func:`send`, :func:`send_and_recv`, :func:`pull`,
-        :func:`push`, :func:`update_all`.
-
-        Parameters
-        ----------
-        func : callable
-            Message function on the edge. The function should be
-            an :mod:`Edge UDF <dgl.udf>`.
-
-        See Also
-        --------
-        send
-        send_and_recv
-        pull
-        push
-        update_all
-        """
-        self._message_func = func
-
-    def register_reduce_func(self, func):
-        """Register global message reduce function.
-
-        Once registered, ``func`` will be used as the default
-        message reduce function in message passing operations, including
-        :func:`recv`, :func:`send_and_recv`, :func:`push`, :func:`pull`,
-        :func:`update_all`.
-
-        Parameters
-        ----------
-        func : callable
-            Reduce function on the node. The function should be
-            a :mod:`Node UDF <dgl.udf>`.
-
-        See Also
-        --------
-        recv
-        send_and_recv
-        push
-        pull
-        update_all
-        """
-        self._reduce_func = func
-
-    def register_apply_node_func(self, func):
-        """Register global node apply function.
-
-        Once registered, ``func`` will be used as the default apply
-        node function. Related operations include :func:`apply_nodes`,
-        :func:`recv`, :func:`send_and_recv`, :func:`push`, :func:`pull`,
-        :func:`update_all`.
-
-        Parameters
-        ----------
-        func : callable
-            Apply function on the nodes. The function should be
-            a :mod:`Node UDF <dgl.udf>`.
-
-        See Also
-        --------
-        apply_nodes
-        register_apply_edge_func
-        """
-        self._apply_node_func = func
-
-    def register_apply_edge_func(self, func):
-        """Register global edge apply function.
-
-        Once registered, ``func`` will be used as the default apply
-        edge function in :func:`apply_edges`.
-
-        Parameters
-        ----------
-        func : callable
-            Apply function on the edge. The function should be
-            an :mod:`Edge UDF <dgl.udf>`.
-
-        See Also
-        --------
-        apply_edges
-        register_apply_node_func
-        """
-        self._apply_edge_func = func
-
-    def apply_nodes(self, func="default", v=ALL, inplace=False):
-        """Apply the function on the nodes to update their features.
-
-        If None is provided for ``func``, nothing will happen.
-
-        Parameters
-        ----------
-        func : callable or None, optional
-            Apply function on the nodes. The function should be
-            a :mod:`Node UDF <dgl.udf>`.
-        v : int, iterable of int, tensor, optional
-            The node (ids) on which to apply ``func``. The default
-            value is all the nodes.
-        inplace : bool, optional
-            If True, update will be done in place, but autograd will break.
-
-        Examples
-        --------
-
-        .. note:: Here we use pytorch syntax for demo. The general idea applies
-            to other frameworks with minor syntax change (e.g. replace
-            ``torch.tensor`` with ``mxnet.ndarray``).
-
-        >>> import torch as th
-        >>> g = dgl.DGLGraph()
-        >>> g.add_nodes(3)
-        >>> g.ndata['x'] = th.ones(3, 1)
-
-        >>> # Increment the node feature by 1.
-        >>> def increment_feature(nodes): return {'x': nodes.data['x'] + 1}
-        >>> g.apply_nodes(func=increment_feature, v=[0, 2]) # Apply func to nodes 0, 2
-        >>> g.ndata
-        {'x': tensor([[2.],
-                      [1.],
-                      [2.]])}
-
-        See Also
-        --------
-        register_apply_node_func
-        apply_edges
-        """
-        if func == "default":
-            func = self._apply_node_func
-        if is_all(v):
-            v = utils.toindex(slice(0, self.number_of_nodes()))
-        else:
-            v = utils.toindex(v)
-        with ir.prog() as prog:
-            scheduler.schedule_apply_nodes(v=v,
-                                           apply_func=func,
-                                           node_frame=self._node_frame,
-                                           inplace=inplace)
-            Runtime.run(prog)
-
-    def apply_edges(self, func="default", edges=ALL, inplace=False):
-        """Apply the function on the edges to update their features.
-
-        If None is provided for ``func``, nothing will happen.
-
-        Parameters
-        ----------
-        func : callable, optional
-            Apply function on the edge. The function should be
-            an :mod:`Edge UDF <dgl.udf>`.
-        edges : valid edges type, optional
-            Edges on which to apply ``func``. See :func:`send` for valid
-            edges type. Default is all the edges.
-        inplace: bool, optional
-            If True, update will be done in place, but autograd will break.
-
-        Notes
-        -----
-        On multigraphs, if :math:`u` and :math:`v` are specified, then all the edges
-        between :math:`u` and :math:`v` will be updated.
-
-        Examples
-        --------
-
-        .. note:: Here we use pytorch syntax for demo. The general idea applies
-            to other frameworks with minor syntax change (e.g. replace
-            ``torch.tensor`` with ``mxnet.ndarray``).
-
-        >>> import torch as th
-
-        >>> g = dgl.DGLGraph()
-        >>> g.add_nodes(3)
-        >>> g.add_edges([0, 1], [1, 2])   # 0 -> 1, 1 -> 2
-        >>> g.edata['y'] = th.ones(2, 1)
-
-        >>> # Doubles the edge feature.
-        >>> def double_feature(edges): return {'y': edges.data['y'] * 2}
-        >>> g.apply_edges(func=double_feature, edges=0) # Apply func to the first edge.
-        >>> g.edata
-        {'y': tensor([[2.],   # 2 * 1
-                      [1.]])}
-
-        See Also
-        --------
-        apply_nodes
-        """
-        if func == "default":
-            func = self._apply_edge_func
-        assert func is not None
-
-        if is_all(edges):
-            u, v, _ = self._graph.edges('eid')
-            eid = utils.toindex(slice(0, self.number_of_edges()))
-        elif isinstance(edges, tuple):
-            u, v = edges
-            u = utils.toindex(u)
-            v = utils.toindex(v)
-            # Rewrite u, v to handle edge broadcasting and multigraph.
-            u, v, eid = self._graph.edge_ids(u, v)
-        else:
-            eid = utils.toindex(edges)
-            u, v, _ = self._graph.find_edges(eid)
-
-        with ir.prog() as prog:
-            scheduler.schedule_apply_edges(AdaptedDGLGraph(self), u, v, eid, func, inplace)
-            Runtime.run(prog)
-
-    def group_apply_edges(self, group_by, func, edges=ALL, inplace=False):
-        """Group the edges by nodes and apply the function on the grouped edges to
-         update their features.
-
-        Parameters
-        ----------
-        group_by : str
-            Specify how to group edges. Expected to be either 'src' or 'dst'
-        func : callable
-            Apply function on the edge. The function should be
-            an :mod:`Edge UDF <dgl.udf>`. The input of `Edge UDF` should
-            be (bucket_size, degrees, *feature_shape), and
-            return the dict with values of the same shapes.
-        edges : valid edges type, optional
-            Edges on which to group and apply ``func``. See :func:`send` for valid
-            edges type. Default is all the edges.
-        inplace: bool, optional
-            If True, update will be done in place, but autograd will break.
-
-        Notes
-        -----
-        On multigraphs, if :math:`u` and :math:`v` are specified, then all the edges
-        between :math:`u` and :math:`v` will be updated.
-
-        Examples
-        --------
-
-        .. note:: Here we use pytorch syntax for demo. The general idea applies
-            to other frameworks with minor syntax change (e.g. replace
-            ``torch.tensor`` with ``mxnet.ndarray``).
-
-        >>> import torch as th
-
-        >>> g = dgl.DGLGraph()
-        >>> g.add_nodes(4)
-        >>> g.add_edges(0, [1, 2, 3])
-        >>> g.add_edges(1, [2, 3])
-        >>> g.add_edges(2, [2, 3])
-        >>> g.edata['feat'] = th.randn((g.number_of_edges(), 1))
-
-        >>> # Softmax over the out edges of each node
-        >>> # Second dimension of edges.data is the degree dimension
-        >>> def softmax_feat(edges): return {'norm_feat': th.softmax(edges.data['feat'], dim=1)}
-        >>> g.group_apply_edges(func=softmax_feat, group_by='src') # Apply func to the first edge.
-        >>> u, v, eid = g.out_edges(1, form='all')
-        >>> in_feat = g.edata['feat'][eid]
-        >>> out_feat = g.edata['norm_feat'][eid]
-        >>> print(out_feat - th.softmax(in_feat, 0))
-            tensor([[0.],
-            [0.]])
-
-        See Also
-        --------
-        apply_edges
-        """
-        assert func is not None
-
-        if group_by not in ('src', 'dst'):
-            raise DGLError("Group_by should be either src or dst")
-
-        if is_all(edges):
-            u, v, _ = self._graph.edges('eid')
-            eid = utils.toindex(slice(0, self.number_of_edges()))
-        elif isinstance(edges, tuple):
-            u, v = edges
-            u = utils.toindex(u)
-            v = utils.toindex(v)
-            # Rewrite u, v to handle edge broadcasting and multigraph.
-            u, v, eid = self._graph.edge_ids(u, v)
-        else:
-            eid = utils.toindex(edges)
-            u, v, _ = self._graph.find_edges(eid)
-
-        with ir.prog() as prog:
-            scheduler.schedule_group_apply_edge(graph=AdaptedDGLGraph(self),
-                                                u=u, v=v, eid=eid,
-                                                apply_func=func,
-                                                group_by=group_by,
-                                                inplace=inplace)
-            Runtime.run(prog)
-
-
-    def send(self, edges=ALL, message_func="default"):
-        """Send messages along the given edges.
-
-        ``edges`` can be any of the following types:
-
-        * ``int`` : Specify one edge using its edge id.
-        * ``pair of int`` : Specify one edge using its endpoints.
-        * ``int iterable`` / ``tensor`` : Specify multiple edges using their edge ids.
-        * ``pair of int iterable`` / ``pair of tensors`` :
-          Specify multiple edges using their endpoints.
-
-        The UDF returns messages on the edges and can be later fetched in
-        the destination node's ``mailbox``. Receiving will consume the messages.
-        See :func:`recv` for example.
-
-        If multiple ``send`` are triggered on the same edge without ``recv``. Messages
-        generated by the later ``send`` will overwrite previous messages.
-
-        Parameters
-        ----------
-        edges : valid edges type, optional
-            Edges on which to apply ``message_func``. Default is sending along all
-            the edges.
-        message_func : callable
-            Message function on the edges. The function should be
-            an :mod:`Edge UDF <dgl.udf>`.
-
-        Notes
-        -----
-        On multigraphs, if :math:`u` and :math:`v` are specified, then the messages will be sent
-        along all edges between :math:`u` and :math:`v`.
-
-        Examples
-        --------
-        See the *message passing* example in :class:`DGLGraph` or :func:`recv`.
-        """
-        if message_func == "default":
-            message_func = self._message_func
-
-        if is_all(edges):
-            eid = utils.toindex(slice(0, self.number_of_edges()))
-            u, v, _ = self._graph.edges('eid')
-        elif isinstance(edges, tuple):
-            u, v = edges
-            u = utils.toindex(u)
-            v = utils.toindex(v)
-            # Rewrite u, v to handle edge broadcasting and multigraph.
-            u, v, eid = self._graph.edge_ids(u, v)
-        else:
-            eid = utils.toindex(edges)
-            u, v, _ = self._graph.find_edges(eid)
-
-        if len(eid) == 0:
-            # no edge to be triggered
-            return
-
-        with ir.prog() as prog:
-            scheduler.schedule_send(graph=AdaptedDGLGraph(self), u=u, v=v, eid=eid,
-                                    message_func=message_func)
-            Runtime.run(prog)
-
-    def recv(self,
-             v=ALL,
-             reduce_func="default",
-             apply_node_func="default",
-             inplace=False):
-        """Receive and reduce incoming messages and update the features of node(s) :math:`v`.
-
-        Optionally, apply a function to update the node features after receive.
-
-        * `reduce_func` will be skipped for nodes with no incoming message.
-        * If all ``v`` have no incoming message, this will downgrade to an :func:`apply_nodes`.
-        * If some ``v`` have no incoming message, their new feature value will be calculated
-          by the column initializer (see :func:`set_n_initializer`). The feature shapes and
-          dtypes will be inferred.
-
-        The node features will be updated by the result of the ``reduce_func``.
-
-        Messages are consumed once received.
-
-        The provided UDF maybe called multiple times so it is recommended to provide
-        function with no side effect.
-
-        Parameters
-        ----------
-        v : node, container or tensor, optional
-            The node to be updated. Default is receiving all the nodes.
-        reduce_func : callable, optional
-            Reduce function on the node. The function should be
-            a :mod:`Node UDF <dgl.udf>`.
-        apply_node_func : callable
-            Apply function on the nodes. The function should be
-            a :mod:`Node UDF <dgl.udf>`.
-        inplace: bool, optional
-            If True, update will be done in place, but autograd will break.
-
-        Examples
-        --------
-        Create a graph object for demo.
-
-        .. note:: Here we use pytorch syntax for demo. The general idea applies
-            to other frameworks with minor syntax change (e.g. replace
-            ``torch.tensor`` with ``mxnet.ndarray``).
-
-        >>> import torch as th
-        >>> g = dgl.DGLGraph()
-        >>> g.add_nodes(3)
-        >>> g.ndata['x'] = th.tensor([[1.], [2.], [3.]])
-        >>> g.add_edges([0, 1], [1, 2])
-
-        >>> # Define the function for sending node features as messages.
-        >>> def send_source(edges): return {'m': edges.src['x']}
-        >>> # Set the function defined to be the default message function.
-        >>> g.register_message_func(send_source)
-
-        >>> # Sum the messages received and use this to replace the original node feature.
-        >>> def simple_reduce(nodes): return {'x': nodes.mailbox['m'].sum(1)}
-        >>> # Set the function defined to be the default message reduce function.
-        >>> g.register_reduce_func(simple_reduce)
-
-        Send and receive messages. Note that although node :math:`0` has no incoming edges,
-        its feature gets changed from :math:`1` to :math:`0` as it is also included in
-        ``g.nodes()``.
-
-        >>> g.send(g.edges())
-        >>> g.recv(g.nodes())
-        >>> g.ndata['x']
-        tensor([[0.],
-                [1.],
-                [2.]])
-
-        Once messages are received, one will need another call of :func:`send` again before
-        another call of :func:`recv`. Otherwise, nothing will happen.
-
-        >>> g.recv(g.nodes())
-        >>> g.ndata['x']
-        tensor([[0.],
-                [1.],
-                [2.]])
-        """
-        if reduce_func == "default":
-            reduce_func = self._reduce_func
-        if apply_node_func == "default":
-            apply_node_func = self._apply_node_func
-        assert reduce_func is not None
-
-        if is_all(v):
-            v = F.arange(0, self.number_of_nodes())
-        elif isinstance(v, int):
-            v = [v]
-        v = utils.toindex(v)
-        if len(v) == 0:
-            # no vertex to be triggered.
-            return
-
-        with ir.prog() as prog:
-            scheduler.schedule_recv(graph=AdaptedDGLGraph(self),
-                                    recv_nodes=v,
-                                    reduce_func=reduce_func,
-                                    apply_func=apply_node_func,
-                                    inplace=inplace)
-            Runtime.run(prog)
-
-    def send_and_recv(self,
-                      edges,
-                      message_func="default",
-                      reduce_func="default",
-                      apply_node_func="default",
-                      inplace=False):
-        """Send messages along edges and let destinations receive them.
-
-        Optionally, apply a function to update the node features after receive.
-
-        This is a convenient combination for performing
-        ``send(self, self.edges, message_func)`` and
-        ``recv(self, dst, reduce_func, apply_node_func)``, where ``dst``
-        are the destinations of the ``edges``.
-
-        Parameters
-        ----------
-        edges : valid edges type
-            Edges on which to apply ``func``. See :func:`send` for valid
-            edges type.
-        message_func : callable, optional
-            Message function on the edges. The function should be
-            an :mod:`Edge UDF <dgl.udf>`.
-        reduce_func : callable, optional
-            Reduce function on the node. The function should be
-            a :mod:`Node UDF <dgl.udf>`.
-        apply_node_func : callable, optional
-            Apply function on the nodes. The function should be
-            a :mod:`Node UDF <dgl.udf>`.
-        inplace: bool, optional
-            If True, update will be done in place, but autograd will break.
-
-        Examples
-        --------
-
-        .. note:: Here we use pytorch syntax for demo. The general idea applies
-            to other frameworks with minor syntax change (e.g. replace
-            ``torch.tensor`` with ``mxnet.ndarray``).
-
-        >>> import torch as th
-        >>> g = dgl.DGLGraph()
-        >>> g.add_nodes(3)
-        >>> g.ndata['x'] = th.tensor([[1.], [2.], [3.]])
-        >>> g.add_edges([0, 1], [1, 2])
-
-        >>> # Define the function for sending node features as messages.
-        >>> def send_source(edges): return {'m': edges.src['x']}
-        >>> # Set the function defined to be the default message function.
-        >>> g.register_message_func(send_source)
-
-        >>> # Sum the messages received and use this to replace the original node feature.
-        >>> def simple_reduce(nodes): return {'x': nodes.mailbox['m'].sum(1)}
-        >>> # Set the function defined to be the default message reduce function.
-        >>> g.register_reduce_func(simple_reduce)
-
-        Send and receive messages.
-
-        >>> g.send_and_recv(g.edges())
-        >>> g.ndata['x']
-        tensor([[1.],
-                [1.],
-                [2.]])
-
-        Note that the feature of node :math:`0` remains the same as it has no
-        incoming edges.
-
-        Notes
-        -----
-        On multigraphs, if u and v are specified, then the messages will be sent
-        and received along all edges between u and v.
-
-        See Also
-        --------
-        send
-        recv
-        """
-        if message_func == "default":
-            message_func = self._message_func
-        if reduce_func == "default":
-            reduce_func = self._reduce_func
-        if apply_node_func == "default":
-            apply_node_func = self._apply_node_func
-
-        assert message_func is not None
-        assert reduce_func is not None
-
-        if isinstance(edges, tuple):
-            u, v = edges
-            u = utils.toindex(u)
-            v = utils.toindex(v)
-            # Rewrite u, v to handle edge broadcasting and multigraph.
-            u, v, eid = self._graph.edge_ids(u, v)
-        else:
-            eid = utils.toindex(edges)
-            u, v, _ = self._graph.find_edges(eid)
-
-        if len(u) == 0:
-            # no edges to be triggered
-            return
-
-        with ir.prog() as prog:
-            scheduler.schedule_snr(graph=AdaptedDGLGraph(self),
-                                   edge_tuples=(u, v, eid),
-                                   message_func=message_func,
-                                   reduce_func=reduce_func,
-                                   apply_func=apply_node_func,
-                                   inplace=inplace)
-            Runtime.run(prog)
-
-    def pull(self,
-             v,
-             message_func="default",
-             reduce_func="default",
-             apply_node_func="default",
-             inplace=False):
-        """Pull messages from the node(s)' predecessors and then update their features.
-
-        Optionally, apply a function to update the node features after receive.
-
-        * `reduce_func` will be skipped for nodes with no incoming message.
-        * If all ``v`` have no incoming message, this will downgrade to an :func:`apply_nodes`.
-        * If some ``v`` have no incoming message, their new feature value will be calculated
-          by the column initializer (see :func:`set_n_initializer`). The feature shapes and
-          dtypes will be inferred.
-
-        Parameters
-        ----------
-        v : int, iterable of int, or tensor
-            The node(s) to be updated.
-        message_func : callable, optional
-            Message function on the edges. The function should be
-            an :mod:`Edge UDF <dgl.udf>`.
-        reduce_func : callable, optional
-            Reduce function on the node. The function should be
-            a :mod:`Node UDF <dgl.udf>`.
-        apply_node_func : callable, optional
-            Apply function on the nodes. The function should be
-            a :mod:`Node UDF <dgl.udf>`.
-        inplace: bool, optional
-            If True, update will be done in place, but autograd will break.
-
-        Examples
-        --------
-        Create a graph for demo.
-
-        .. note:: Here we use pytorch syntax for demo. The general idea applies
-            to other frameworks with minor syntax change (e.g. replace
-            ``torch.tensor`` with ``mxnet.ndarray``).
-
-        >>> import torch as th
-        >>> g = dgl.DGLGraph()
-        >>> g.add_nodes(3)
-        >>> g.ndata['x'] = th.tensor([[0.], [1.], [2.]])
-
-        Use the built-in message function :func:`~dgl.function.copy_u` for copying
-        node features as the message.
-
-        >>> m_func = dgl.function.copy_u('x', 'm')
-        >>> g.register_message_func(m_func)
-
-        Use the built-int message reducing function :func:`~dgl.function.sum`, which
-        sums the messages received and replace the old node features with it.
-
-        >>> m_reduce_func = dgl.function.sum('m', 'x')
-        >>> g.register_reduce_func(m_reduce_func)
-
-        As no edges exist, nothing happens.
-
-        >>> g.pull(g.nodes())
-        >>> g.ndata['x']
-        tensor([[0.],
-                [1.],
-                [2.]])
-
-        Add edges ``0 -> 1, 1 -> 2``. Pull messages for the node :math:`2`.
-
-        >>> g.add_edges([0, 1], [1, 2])
-        >>> g.pull(2)
-        >>> g.ndata['x']
-        tensor([[0.],
-                [1.],
-                [1.]])
-
-        The feature of node :math:`2` changes but the feature of node :math:`1`
-        remains the same as we did not :func:`pull` (and reduce) messages for it.
-
-        See Also
-        --------
-        push
-        """
-        if message_func == "default":
-            message_func = self._message_func
-        if reduce_func == "default":
-            reduce_func = self._reduce_func
-        if apply_node_func == "default":
-            apply_node_func = self._apply_node_func
-
-        assert message_func is not None
-        assert reduce_func is not None
-
-        v = utils.toindex(v)
-        if len(v) == 0:
-            return
-        with ir.prog() as prog:
-            scheduler.schedule_pull(graph=AdaptedDGLGraph(self),
-                                    pull_nodes=v,
-                                    message_func=message_func,
-                                    reduce_func=reduce_func,
-                                    apply_func=apply_node_func,
-                                    inplace=inplace)
-            Runtime.run(prog)
-
-    def push(self,
-             u,
-             message_func="default",
-             reduce_func="default",
-             apply_node_func="default",
-             inplace=False):
-        """Send message from the node(s) to their successors and update them.
-
-        Optionally, apply a function to update the node features after receive.
-
-        Parameters
-        ----------
-        u : int, iterable of int, or tensor
-            The node(s) to push messages out.
-        message_func : callable, optional
-            Message function on the edges. The function should be
-            an :mod:`Edge UDF <dgl.udf>`.
-        reduce_func : callable, optional
-            Reduce function on the node. The function should be
-            a :mod:`Node UDF <dgl.udf>`.
-        apply_node_func : callable, optional
-            Apply function on the nodes. The function should be
-            a :mod:`Node UDF <dgl.udf>`.
-        inplace: bool, optional
-            If True, update will be done in place, but autograd will break.
-
-        Examples
-        --------
-        Create a graph for demo.
-
-        .. note:: Here we use pytorch syntax for demo. The general idea applies
-            to other frameworks with minor syntax change (e.g. replace
-            ``torch.tensor`` with ``mxnet.ndarray``).
-
-        >>> import torch as th
-        >>> g = dgl.DGLGraph()
-        >>> g.add_nodes(3)
-        >>> g.ndata['x'] = th.tensor([[1.], [2.], [3.]])
-
-        Use the built-in message function :func:`~dgl.function.copy_u` for copying
-        node features as the message.
-
-        >>> m_func = dgl.function.copy_u('x', 'm')
-        >>> g.register_message_func(m_func)
-
-        Use the built-int message reducing function :func:`~dgl.function.sum`, which
-        sums the messages received and replace the old node features with it.
-
-        >>> m_reduce_func = dgl.function.sum('m', 'x')
-        >>> g.register_reduce_func(m_reduce_func)
-
-        As no edges exist, nothing happens.
-
-        >>> g.push(g.nodes())
-        >>> g.ndata['x']
-        tensor([[1.],
-                [2.],
-                [3.]])
-
-        Add edges ``0 -> 1, 1 -> 2``. Send messages from the node :math:`1`. and update.
-
-        >>> g.add_edges([0, 1], [1, 2])
-        >>> g.push(1)
-        >>> g.ndata['x']
-        tensor([[1.],
-                [2.],
-                [2.]])
-
-        The feature of node :math:`2` changes but the feature of node :math:`1`
-        remains the same as we did not :func:`push` for node :math:`0`.
-
-        See Also
-        --------
-        pull
-        """
-        if message_func == "default":
-            message_func = self._message_func
-        if reduce_func == "default":
-            reduce_func = self._reduce_func
-        if apply_node_func == "default":
-            apply_node_func = self._apply_node_func
-
-        assert message_func is not None
-        assert reduce_func is not None
-
-        u = utils.toindex(u)
-        if len(u) == 0:
-            return
-        with ir.prog() as prog:
-            scheduler.schedule_push(graph=AdaptedDGLGraph(self),
-                                    u=u,
-                                    message_func=message_func,
-                                    reduce_func=reduce_func,
-                                    apply_func=apply_node_func,
-                                    inplace=inplace)
-            Runtime.run(prog)
-
-    def update_all(self,
-                   message_func="default",
-                   reduce_func="default",
-                   apply_node_func="default"):
-        """Send messages through all edges and update all nodes.
-
-        Optionally, apply a function to update the node features after receive.
-
-        This is a convenient combination for performing
-        ``send(self, self.edges(), message_func)`` and
-        ``recv(self, self.nodes(), reduce_func, apply_node_func)``.
-
-        Parameters
-        ----------
-        message_func : callable, optional
-            Message function on the edges. The function should be
-            an :mod:`Edge UDF <dgl.udf>`.
-        reduce_func : callable, optional
-            Reduce function on the node. The function should be
-            a :mod:`Node UDF <dgl.udf>`.
-        apply_node_func : callable, optional
-            Apply function on the nodes. The function should be
-            a :mod:`Node UDF <dgl.udf>`.
-
-        See Also
-        --------
-        send
-        recv
-        """
-        if message_func == "default":
-            message_func = self._message_func
-        if reduce_func == "default":
-            reduce_func = self._reduce_func
-        if apply_node_func == "default":
-            apply_node_func = self._apply_node_func
-        assert message_func is not None
-        assert reduce_func is not None
-
-        with ir.prog() as prog:
-            scheduler.schedule_update_all(graph=AdaptedDGLGraph(self),
-                                          message_func=message_func,
-                                          reduce_func=reduce_func,
-                                          apply_func=apply_node_func)
-            Runtime.run(prog)
-
-    def prop_nodes(self,
-                   nodes_generator,
-                   message_func="default",
-                   reduce_func="default",
-                   apply_node_func="default"):
-        """Propagate messages using graph traversal by triggering
-        :func:`pull()` on nodes.
-
-        The traversal order is specified by the ``nodes_generator``. It generates
-        node frontiers, which is a list or a tensor of nodes. The nodes in the
-        same frontier will be triggered together, while nodes in different frontiers
-        will be triggered according to the generating order.
-
-        Parameters
-        ----------
-        node_generators : iterable, each element is a list or a tensor of node ids
-            The generator of node frontiers. It specifies which nodes perform
-            :func:`pull` at each timestep.
-        message_func : callable, optional
-            Message function on the edges. The function should be
-            an :mod:`Edge UDF <dgl.udf>`.
-        reduce_func : callable, optional
-            Reduce function on the node. The function should be
-            a :mod:`Node UDF <dgl.udf>`.
-        apply_node_func : callable, optional
-            Apply function on the nodes. The function should be
-            a :mod:`Node UDF <dgl.udf>`.
-
-        Examples
-        --------
-        Create a graph for demo.
-
-        .. note:: Here we use pytorch syntax for demo. The general idea applies
-            to other frameworks with minor syntax change (e.g. replace
-            ``torch.tensor`` with ``mxnet.ndarray``).
-
-        >>> import torch as th
-        >>> g = dgl.DGLGraph()
-        >>> g.add_nodes(4)
-        >>> g.ndata['x'] = th.tensor([[1.], [2.], [3.], [4.]])
-        >>> g.add_edges([0, 1, 1, 2], [1, 2, 3, 3])
-
-        Prepare message function and message reduce function for demo.
-
-        >>> def send_source(edges): return {'m': edges.src['x']}
-        >>> g.register_message_func(send_source)
-        >>> def simple_reduce(nodes): return {'x': nodes.mailbox['m'].sum(1)}
-        >>> g.register_reduce_func(simple_reduce)
-
-        First pull messages for nodes :math:`1, 2` with edges ``0 -> 1`` and
-        ``1 -> 2``; and then pull messages for node :math:`3` with edges
-        ``1 -> 3`` and ``2 -> 3``.
-
-        >>> g.prop_nodes([[1, 2], [3]])
-        >>> g.ndata['x']
-        tensor([[1.],
-                [1.],
-                [2.],
-                [3.]])
-
-        In the first stage, we pull messages for nodes :math:`1, 2`.
-        The feature of node :math:`1` is replaced by that of node :math:`0`, i.e. 1
-        The feature of node :math:`2` is replaced by that of node :math:`1`, i.e. 2.
-        Both of the replacement happen simultaneously.
-
-        In the second stage, we pull messages for node :math:`3`.
-        The feature of node :math:`3` becomes the sum of node :math:`1`'s feature and
-        :math:`2`'s feature, i.e. 1 + 2 = 3.
-
-        See Also
-        --------
-        prop_edges
-        """
-        for node_frontier in nodes_generator:
-            self.pull(node_frontier, message_func, reduce_func, apply_node_func)
-
-    def prop_edges(self,
-                   edges_generator,
-                   message_func="default",
-                   reduce_func="default",
-                   apply_node_func="default"):
-        """Propagate messages using graph traversal by triggering
-        :func:`send_and_recv()` on edges.
-
-        The traversal order is specified by the ``edges_generator``. It generates
-        edge frontiers. The edge frontiers should be of *valid edges type*.
-        See :func:`send` for more details.
-
-        Edges in the same frontier will be triggered together, while edges in
-        different frontiers will be triggered according to the generating order.
-
-        Parameters
-        ----------
-        edges_generator : generator
-            The generator of edge frontiers.
-        message_func : callable, optional
-            Message function on the edges. The function should be
-            an :mod:`Edge UDF <dgl.udf>`.
-        reduce_func : callable, optional
-            Reduce function on the node. The function should be
-            a :mod:`Node UDF <dgl.udf>`.
-        apply_node_func : callable, optional
-            Apply function on the nodes. The function should be
-            a :mod:`Node UDF <dgl.udf>`.
-
-        Examples
-        --------
-        Create a graph for demo.
-
-        .. note:: Here we use pytorch syntax for demo. The general idea applies
-            to other frameworks with minor syntax change (e.g. replace
-            ``torch.tensor`` with ``mxnet.ndarray``).
-
-        >>> import torch as th
-        >>> g = dgl.DGLGraph()
-        >>> g.add_nodes(4)
-        >>> g.ndata['x'] = th.tensor([[1.], [2.], [3.], [4.]])
-        >>> g.add_edges([0, 1, 1, 2], [1, 2, 3, 3])
-
-        Prepare message function and message reduce function for demo.
-
-        >>> def send_source(edges): return {'m': edges.src['x']}
-        >>> g.register_message_func(send_source)
-        >>> def simple_reduce(nodes): return {'x': nodes.mailbox['m'].sum(1)}
-        >>> g.register_reduce_func(simple_reduce)
-
-        First propagate messages for edges ``0 -> 1``, ``1 -> 3`` and then
-        propagate messages for edges ``1 -> 2``, ``2 -> 3``.
-
-        >>> g.prop_edges([([0, 1], [1, 3]), ([1, 2], [2, 3])])
-        >>> g.ndata['x']
-        tensor([[1.],
-                [1.],
-                [1.],
-                [3.]])
-
-        In the first stage, the following happens simultaneously.
-
-            - The feature of node :math:`1` is replaced by that of
-              node :math:`0`, i.e. 1.
-            - The feature of node :math:`3` is replaced by that of
-              node :math:`1`, i.e. 2.
-
-        In the second stage, the following happens simultaneously.
-
-            - The feature of node :math:`2` is replaced by that of
-              node :math:`1`, i.e. 1.
-            - The feature of node :math:`3` is replaced by that of
-              node :math:`2`, i.e. 3.
-
-        See Also
-        --------
-        prop_nodes
-        """
-        for edge_frontier in edges_generator:
-            self.send_and_recv(edge_frontier, message_func, reduce_func, apply_node_func)
-
-    def subgraph(self, nodes):
-        """Return the subgraph induced on given nodes.
-
-        Parameters
-        ----------
-        nodes : list, or iterable
-            A node ID array to construct subgraph.
-            All nodes must exist in the graph.
-
-        Returns
-        -------
-        G : DGLGraph
-            The subgraph.
-            The nodes are relabeled so that node `i` in the subgraph is mapped
-            to node `nodes[i]` in the original graph.
-            The edges are also relabeled.
-            One can retrieve the mapping from subgraph node/edge ID to parent
-            node/edge ID via `parent_nid` and `parent_eid` properties of the
-            subgraph.
-
-        Examples
-        --------
-        The following example uses PyTorch backend.
-
-        >>> G = dgl.DGLGraph()
-        >>> G.add_nodes(5)
-        >>> G.add_edges([0, 1, 2, 3, 4], [1, 2, 3, 4, 0])   # 5-node cycle
-        >>> SG = G.subgraph([0, 1, 4])
-        >>> SG.nodes()
-        tensor([0, 1, 2])
-        >>> SG.edges()
-        (tensor([0, 2]), tensor([1, 0]))
-        >>> SG.parent_nid
-        tensor([0, 1, 4])
-        >>> SG.parent_eid
-        tensor([0, 4])
-
-        See Also
-        --------
-        subgraphs
-        edge_subgraph
-        parent_nid
-        parent_eid
-        copy_from_parent
-        copy_to_parent
-        map_to_subgraph_nid
-        """
-        induced_nodes = utils.toindex(nodes)
-        sgi = self._graph.node_subgraph(induced_nodes)
-        return self._create_subgraph(sgi, sgi.induced_nodes, sgi.induced_edges)
-
-    def subgraphs(self, nodes):
-        """Return a list of subgraphs, each induced in the corresponding given
-        nodes in the list.
-
-        Equivalent to
-        ``[self.subgraph(nodes_list) for nodes_list in nodes]``
-
-        Parameters
-        ----------
-        nodes : a list of lists or iterable
-            A list of node ID arrays to construct corresponding subgraphs.
-            All nodes in all the list items must exist in the graph.
-
-        Returns
-        -------
-        G : A list of DGLGraph
-            The subgraphs.
-
-        See Also
-        --------
-        subgraph
-        parent_nid
-        parent_eid
-        copy_from_parent
-        copy_to_parent
-        map_to_subgraph_nid
-        """
-        induced_nodes = [utils.toindex(n) for n in nodes]
-        sgis = self._graph.node_subgraphs(induced_nodes)
-        return [self._create_subgraph(
-            sgi, sgi.induced_nodes, sgi.induced_edges) for sgi in sgis]
-
-    def edge_subgraph(self, edges, preserve_nodes=False):
-        """Return the subgraph induced on given edges.
-
-        Parameters
-        ----------
-        edges : list, or iterable
-            An edge ID array to construct subgraph.
-            All edges must exist in the subgraph.
-        preserve_nodes : bool
-            Indicates whether to preserve all nodes or not.
-            If true, keep the nodes which have no edge connected in the subgraph;
-            If false, all nodes without edge connected to it would be removed.
-
-        Returns
-        -------
-        G : DGLGraph
-            The subgraph.
-            The edges are relabeled so that edge `i` in the subgraph is mapped
-            to edge `edges[i]` in the original graph.
-            The nodes are also relabeled.
-            One can retrieve the mapping from subgraph node/edge ID to parent
-            node/edge ID via `parent_nid` and `parent_eid` properties of the
-            subgraph.
-
-        Examples
-        --------
-        The following example uses PyTorch backend.
-
-        >>> G = dgl.DGLGraph()
-        >>> G.add_nodes(5)
-        >>> G.add_edges([0, 1, 2, 3, 4], [1, 2, 3, 4, 0])   # 5-node cycle
-        >>> SG = G.edge_subgraph([0, 4])
-        >>> SG.nodes()
-        tensor([0, 1, 2])
-        >>> SG.edges()
-        (tensor([0, 2]), tensor([1, 0]))
-        >>> SG.parent_nid
-        tensor([0, 1, 4])
-        >>> SG.parent_eid
-        tensor([0, 4])
-        >>> SG = G.edge_subgraph([0, 4], relabel_nodes=False)
-        >>> SG.nodes()
-        tensor([0, 1, 2, 3, 4])
-        >>> SG.edges()
-        (tensor([0, 4]), tensor([1, 0]))
-        >>> SG.parent_nid
-        tensor([0, 1, 2, 3, 4])
-        >>> SG.parent_eid
-        tensor([0, 4])
-
-        See Also
-        --------
-        subgraph
-        copy_from_parent
-        copy_to_parent
-        map_to_subgraph_nid
-        """
-        induced_edges = utils.toindex(edges)
-        sgi = self._graph.edge_subgraph(induced_edges, preserve_nodes=preserve_nodes)
-        return self._create_subgraph(sgi, sgi.induced_nodes, sgi.induced_edges)
-
-    def adjacency_matrix_scipy(self, transpose=None, fmt='csr', return_edge_ids=None):
-        """Return the scipy adjacency matrix representation of this graph.
-
-        By default, a row of returned adjacency matrix represents the destination
-        of an edge and the column represents the source.
-
-        When transpose is True, a row represents the source and a column represents
-        a destination.
-
-        Parameters
-        ----------
-        transpose : bool, optional (default=False)
-            A flag to transpose the returned adjacency matrix.
-        fmt : str, optional (default='csr')
-            Indicates the format of returned adjacency matrix.
-        return_edge_ids : bool, optional (default=True)
-            If True, the elements in the adjacency matrix are edge ids.
-            Note that one of the element is 0.  Proceed with caution.
-            If False, the elements will be always 1.
-
-        Returns
-        -------
-        scipy.sparse.spmatrix
-            The scipy representation of adjacency matrix.
-
-        """
-        if transpose is None:
-            dgl_warning(
-                "Currently adjacency_matrix() returns a matrix with destination as rows"
-                " by default.  In 0.5 the result will have source as rows"
-                " (i.e. transpose=True)")
-            transpose = False
-        return self._graph.adjacency_matrix_scipy(transpose, fmt, return_edge_ids)
-
-    def adjacency_matrix(self, transpose=None, ctx=F.cpu()):
-        """Return the adjacency matrix representation of this graph.
-
-        By default, a row of returned adjacency matrix represents the
-        destination of an edge and the column represents the source.
-
-        When transpose is True, a row represents the source and a column
-        represents a destination.
-
-        Parameters
-        ----------
-        transpose : bool, optional (default=False)
-            A flag to transpose the returned adjacency matrix.
-        ctx : context, optional (default=cpu)
-            The context of returned adjacency matrix.
-
-        Returns
-        -------
-        SparseTensor
-            The adjacency matrix.
-        """
-        if transpose is None:
-            dgl_warning(
-                "Currently adjacency_matrix() returns a matrix with destination as rows"
-                " by default.  In 0.5 the result will have source as rows"
-                " (i.e. transpose=True)")
-            transpose = False
-        return self._graph.adjacency_matrix(transpose, ctx)[0]
-
-    def incidence_matrix(self, typestr, ctx=F.cpu()):
-        """Return the incidence matrix representation of this graph.
-
-        An incidence matrix is an n x m sparse matrix, where n is
-        the number of nodes and m is the number of edges. Each nnz
-        value indicating whether the edge is incident to the node
-        or not.
-
-        There are three types of an incidence matrix :math:`I`:
-
-        * ``in``:
-
-            - :math:`I[v, e] = 1` if :math:`e` is the in-edge of :math:`v`
-              (or :math:`v` is the dst node of :math:`e`);
-            - :math:`I[v, e] = 0` otherwise.
-
-        * ``out``:
-
-            - :math:`I[v, e] = 1` if :math:`e` is the out-edge of :math:`v`
-              (or :math:`v` is the src node of :math:`e`);
-            - :math:`I[v, e] = 0` otherwise.
-
-        * ``both``:
-
-            - :math:`I[v, e] = 1` if :math:`e` is the in-edge of :math:`v`;
-            - :math:`I[v, e] = -1` if :math:`e` is the out-edge of :math:`v`;
-            - :math:`I[v, e] = 0` otherwise (including self-loop).
-
-        Parameters
-        ----------
-        typestr : str
-            Can be either ``in``, ``out`` or ``both``
-        ctx : context, optional (default=cpu)
-            The context of returned incidence matrix.
-
-        Returns
-        -------
-        SparseTensor
-            The incidence matrix.
-        """
-        return self._graph.incidence_matrix(typestr, ctx)[0]
-
-    def line_graph(self, backtracking=True, shared=False):
-        """Return the line graph of this graph.
-
-        See :func:`~dgl.transforms.line_graph`.
-        """
-        return dgl.line_graph(self, backtracking, shared)
-
-    def reverse(self, share_ndata=False, share_edata=False):
-        """Return the reverse of this graph.
-
-        See :func:`~dgl.transforms.reverse`.
-        """
-        return dgl.reverse(self, share_ndata, share_edata)
-
-    def filter_nodes(self, predicate, nodes=ALL):
-        """Return a tensor of node IDs that satisfy the given predicate.
-
-        Parameters
-        ----------
-        predicate : callable
-            A function of signature ``func(nodes) -> tensor``.
-            ``nodes`` are :class:`NodeBatch` objects as in :mod:`~dgl.udf`.
-            The ``tensor`` returned should be a 1-D boolean tensor with
-            each element indicating whether the corresponding node in
-            the batch satisfies the predicate.
-        nodes : int, iterable or tensor of ints
-            The nodes to filter on. Default value is all the nodes.
-
-        Returns
-        -------
-        tensor
-            The filtered nodes.
-
-        Examples
-        --------
-        Construct a graph object for demo.
-
-        .. note:: Here we use pytorch syntax for demo. The general idea applies
-            to other frameworks with minor syntax change (e.g. replace
-            ``torch.tensor`` with ``mxnet.ndarray``).
-
-        >>> import torch as th
-        >>> g = dgl.DGLGraph()
-        >>> g.add_nodes(3)
-        >>> g.ndata['x'] = th.tensor([[1.], [-1.], [1.]])
-
-        Define a function for filtering nodes with feature :math:`1`.
-
-        >>> def has_feature_one(nodes): return (nodes.data['x'] == 1).squeeze(1)
-
-        Filter the nodes with feature :math:`1`.
-
-        >>> g.filter_nodes(has_feature_one)
-        tensor([0, 2])
-
-        See Also
-        --------
-        filter_edges
-        """
-        if is_all(nodes):
-            v = utils.toindex(slice(0, self.number_of_nodes()))
-        else:
-            v = utils.toindex(nodes)
-
-        n_repr = self.get_n_repr(v)
-        nbatch = NodeBatch(v, n_repr)
-        n_mask = F.copy_to(predicate(nbatch), F.cpu())
-
-        if is_all(nodes):
-            return F.nonzero_1d(n_mask)
-        else:
-            nodes = F.tensor(nodes)
-            return F.boolean_mask(nodes, n_mask)
-
-    def filter_edges(self, predicate, edges=ALL):
-        """Return a tensor of edge IDs that satisfy the given predicate.
-
-        Parameters
-        ----------
-        predicate : callable
-            A function of signature ``func(edges) -> tensor``.
-            ``edges`` are :class:`EdgeBatch` objects as in :mod:`~dgl.udf`.
-            The ``tensor`` returned should be a 1-D boolean tensor with
-            each element indicating whether the corresponding edge in
-            the batch satisfies the predicate.
-        edges : valid edges type
-            Edges on which to apply ``func``. See :func:`send` for valid
-            edges type. Default value is all the edges.
-
-        Returns
-        -------
-        tensor
-            The filtered edges represented by their ids.
-
-        Examples
-        --------
-        Construct a graph object for demo.
-
-        .. note:: Here we use pytorch syntax for demo. The general idea applies
-            to other frameworks with minor syntax change (e.g. replace
-            ``torch.tensor`` with ``mxnet.ndarray``).
-
-        >>> import torch as th
-        >>> g = dgl.DGLGraph()
-        >>> g.add_nodes(3)
-        >>> g.ndata['x'] = th.tensor([[1.], [-1.], [1.]])
-        >>> g.add_edges([0, 1, 2], [2, 2, 1])
-
-        Define a function for filtering edges whose destinations have
-        node feature :math:`1`.
-
-        >>> def has_dst_one(edges): return (edges.dst['x'] == 1).squeeze(1)
-
-        Filter the edges whose destination nodes have feature :math:`1`.
-
-        >>> g.filter_edges(has_dst_one)
-        tensor([0, 1])
-
-        See Also
-        --------
-        filter_nodes
-        """
-        if is_all(edges):
-            u, v, _ = self._graph.edges('eid')
-            eid = utils.toindex(slice(0, self.number_of_edges()))
-        elif isinstance(edges, tuple):
-            u, v = edges
-            u = utils.toindex(u)
-            v = utils.toindex(v)
-            # Rewrite u, v to handle edge broadcasting and multigraph.
-            u, v, eid = self._graph.edge_ids(u, v)
-        else:
-            eid = utils.toindex(edges)
-            u, v, _ = self._graph.find_edges(eid)
-
-        src_data = self.get_n_repr(u)
-        edge_data = self.get_e_repr(eid)
-        dst_data = self.get_n_repr(v)
-        ebatch = EdgeBatch((u, v, eid), src_data, edge_data, dst_data)
-        e_mask = F.copy_to(predicate(ebatch), F.cpu())
-
-        if is_all(edges):
-            return F.nonzero_1d(e_mask)
-        else:
-            edges = F.tensor(edges)
-            return F.boolean_mask(edges, e_mask)
-
-    def readonly(self, readonly_state=True):
-        """Set this graph's readonly state in-place.
-
-        Parameters
-        ----------
-        readonly_state : bool, optional
-            New readonly state of the graph, defaults to True.
-
-        Examples
-        --------
-        >>> G = dgl.DGLGraph()
-        >>> G.add_nodes(3)
-        >>> G.add_edge(0, 1)
-        >>> G.readonly()
-        >>> try:
-        >>>     G.add_nodes(5)
-        >>>     fail = False
-        >>> except:
-        >>>     fail = True
-        >>>
-        >>> fail
-        True
-        >>> G.readonly(False)
-        >>> G.add_nodes(5)
-        >>> G.number_of_nodes()
-        8
-        """
-        if readonly_state != self.is_readonly:
-            self._graph.readonly(readonly_state)
-
-    def __repr__(self):
-        ret = ('DGLGraph(num_nodes={node}, num_edges={edge},\n'
-               '         ndata_schemes={ndata}\n'
-               '         edata_schemes={edata})')
-        return ret.format(node=self.number_of_nodes(), edge=self.number_of_edges(),
-                          ndata=str(self.node_attr_schemes()),
-                          edata=str(self.edge_attr_schemes()))
-
-    # pylint: disable=invalid-name
-    def to(self, ctx, **kwargs):
-        """Move both ndata and edata to the targeted mode (cpu/gpu)
-        Framework agnostic
-
-        Parameters
-        ----------
-        ctx : framework-specific context object
-            The context to move data to.
-
-        Returns
-        -------
-        g : DGLGraph
-          Moved DGLGraph of the targeted mode.
-
-        Examples
-        --------
-        The following example uses PyTorch backend.
-
-        >>> import torch
-        >>> G = dgl.DGLGraph()
-        >>> G.add_nodes(5, {'h': torch.ones((5, 2))})
-        >>> G.add_edges([0, 1], [1, 2], {'m' : torch.ones((2, 2))})
-        >>> G.add_edges([0, 1], [1, 2], {'m' : torch.ones((2, 2))})
-        >>> G = G.to(torch.device('cuda:0'))
-        """
-        for k in self.ndata.keys():
-            self.ndata[k] = F.copy_to(self.ndata[k], ctx, **kwargs)
-        for k in self.edata.keys():
-            self.edata[k] = F.copy_to(self.edata[k], ctx, **kwargs)
-        return self
-    # pylint: enable=invalid-name
-
-    def local_var(self):
-        """Return a graph object that can be used in a local function scope.
-
-        The returned graph object shares the feature data and graph structure of this graph.
-        However, any out-place mutation to the feature data will not reflect to this graph,
-        thus making it easier to use in a function scope.
-
-        If set, the local graph object will use same initializers for node features and
-        edge features.
-
-        Examples
-        --------
-        The following example uses PyTorch backend.
-
-        Avoid accidentally overriding existing feature data. This is quite common when
-        implementing a NN module:
-
-        >>> def foo(g):
-        >>>     g = g.local_var()
-        >>>     g.ndata['h'] = torch.ones((g.number_of_nodes(), 3))
-        >>>     return g.ndata['h']
-        >>>
-        >>> g = ... # some graph
-        >>> g.ndata['h'] = torch.zeros((g.number_of_nodes(), 3))
-        >>> newh = foo(g)  # get tensor of all ones
-        >>> print(g.ndata['h'])  # still get tensor of all zeros
-
-        Automatically garbage collect locally-defined tensors without the need to manually
-        ``pop`` the tensors.
-
-        >>> def foo(g):
-        >>>     g = g.local_var()
-        >>>     # This 'xxx' feature will stay local and be GCed when the function exits
-        >>>     g.ndata['xxx'] = torch.ones((g.number_of_nodes(), 3))
-        >>>     return g.ndata['xxx']
-        >>>
-        >>> g = ... # some graph
-        >>> xxx = foo(g)
-        >>> print('xxx' in g.ndata)
-        False
-
-        Notes
-        -----
-        Internally, the returned graph shares the same feature tensors, but construct a new
-        dictionary structure (aka. Frame) so adding/removing feature tensors from the returned
-        graph will not reflect to the original graph. However, inplace operations do change
-        the shared tensor values, so will be reflected to the original graph. This function
-        also has little overhead when the number of feature tensors in this graph is small.
-
-        See Also
-        --------
-        local_var
-
-        Returns
-        -------
-        DGLGraph
-            The graph object that can be used as a local variable.
-        """
-        local_node_frame = FrameRef(Frame(self._node_frame._frame))
-        local_edge_frame = FrameRef(Frame(self._edge_frame._frame))
-        # Use same per-column initializers and default initializer.
-        # If registered, a column (based on key) initializer will be used first,
-        # otherwise the default initializer will be used.
-        sync_frame_initializer(local_node_frame._frame, self._node_frame._frame)
-        sync_frame_initializer(local_edge_frame._frame, self._edge_frame._frame)
-        return DGLGraph(graph_data=self._graph,
-                        node_frame=local_node_frame,
-                        edge_frame=local_edge_frame,
-                        readonly=self.is_readonly,
-                        batch_num_nodes=self.batch_num_nodes,
-                        batch_num_edges=self.batch_num_edges,
-                        parent=self._parent)
-
-    @contextmanager
-    def local_scope(self):
-        """Enter a local scope context for this graph.
-
-        By entering a local scope, any out-place mutation to the feature data will
-        not reflect to the original graph, thus making it easier to use in a function scope.
-
-        If set, the local scope will use same initializers for node features and
-        edge features.
-
-        Examples
-        --------
-        The following example uses PyTorch backend.
-
-        Avoid accidentally overriding existing feature data. This is quite common when
-        implementing a NN module:
-
-        >>> def foo(g):
-        >>>     with g.local_scope():
-        >>>         g.ndata['h'] = torch.ones((g.number_of_nodes(), 3))
-        >>>         return g.ndata['h']
-        >>>
-        >>> g = ... # some graph
-        >>> g.ndata['h'] = torch.zeros((g.number_of_nodes(), 3))
-        >>> newh = foo(g)  # get tensor of all ones
-        >>> print(g.ndata['h'])  # still get tensor of all zeros
-
-        Automatically garbage collect locally-defined tensors without the need to manually
-        ``pop`` the tensors.
-
-        >>> def foo(g):
-        >>>     with g.local_scope():
-        >>>     # This 'xxx' feature will stay local and be GCed when the function exits
-        >>>         g.ndata['xxx'] = torch.ones((g.number_of_nodes(), 3))
-        >>>         return g.ndata['xxx']
-        >>>
-        >>> g = ... # some graph
-        >>> xxx = foo(g)
-        >>> print('xxx' in g.ndata)
-        False
-
-        See Also
-        --------
-        local_var
-        """
-        old_nframe = self._node_frame
-        old_eframe = self._edge_frame
-        self._node_frame = FrameRef(Frame(self._node_frame._frame))
-        self._edge_frame = FrameRef(Frame(self._edge_frame._frame))
-        # Use same per-column initializers and default initializer.
-        # If registered, a column (based on key) initializer will be used first,
-        # otherwise the default initializer will be used.
-        sync_frame_initializer(self._node_frame._frame, old_nframe._frame)
-        sync_frame_initializer(self._edge_frame._frame, old_eframe._frame)
-        yield
-        self._node_frame = old_nframe
-        self._edge_frame = old_eframe
-
-    @property
-    def is_homogeneous(self):
-        """Return if the graph is homogeneous."""
-        return True
-
-############################################################
-# Batch/Unbatch APIs
-############################################################
-
-def batch(graph_list, node_attrs=ALL, edge_attrs=ALL):
-    """Batch a collection of :class:`~dgl.DGLGraph` and return a batched
-    :class:`DGLGraph` object that is independent of the :attr:`graph_list` so that
-    one can perform message passing and readout over a batch of graphs
-    simultaneously, the batch size of the returned graph is the length of
-    :attr:`graph_list`.
-
-    The nodes and edges are re-indexed with a new id in the batched graph with the
-    rule below:
-
-    ======  ==========  ========================  ===  ==========================
-    item    Graph 1     Graph 2                   ...  Graph k
-    ======  ==========  ========================  ===  ==========================
-    raw id  0, ..., N1       0, ..., N2           ...  ..., Nk
-    new id  0, ..., N1  N1 + 1, ..., N1 + N2 + 1  ...  ..., N1 + ... + Nk + k - 1
-    ======  ==========  ========================  ===  ==========================
-
-    To modify the features in the batched graph has no effect on the original
-    graphs. See the examples below about how to work around.
-
-    Parameters
-    ----------
-    graph_list : iterable
-        A collection of :class:`~dgl.DGLGraph` to be batched.
-    node_attrs : None, str or iterable
-        The node attributes to be batched. If ``None``, the returned :class:`DGLGraph`
-        object will not have any node attributes. By default, all node attributes will
-        be batched. If ``str`` or iterable, this should specify exactly what node
-        attributes to be batched.
-    edge_attrs : None, str or iterable, optional
-        Same as for the case of :attr:`node_attrs`
-
-    Returns
-    -------
-    DGLGraph
-        One single batched graph.
-
-    Examples
-    --------
-    Create two :class:`~dgl.DGLGraph` objects.
-    **Instantiation:**
-
-    >>> import dgl
-    >>> import torch as th
-    >>> g1 = dgl.DGLGraph()
-    >>> g1.add_nodes(2)                                # Add 2 nodes
-    >>> g1.add_edge(0, 1)                              # Add edge 0 -> 1
-    >>> g1.ndata['hv'] = th.tensor([[0.], [1.]])       # Initialize node features
-    >>> g1.edata['he'] = th.tensor([[0.]])             # Initialize edge features
-    >>> g2 = dgl.DGLGraph()
-    >>> g2.add_nodes(3)                                # Add 3 nodes
-    >>> g2.add_edges([0, 2], [1, 1])                   # Add edges 0 -> 1, 2 -> 1
-    >>> g2.ndata['hv'] = th.tensor([[2.], [3.], [4.]]) # Initialize node features
-    >>> g2.edata['he'] = th.tensor([[1.], [2.]])       # Initialize edge features
-
-    Merge two :class:`~dgl.DGLGraph` objects into one :class:`DGLGraph` object.
-    When merging a list of graphs, we can choose to include only a subset of the attributes.
-
-    >>> bg = dgl.batch([g1, g2], edge_attrs=None)
-    >>> bg.edata
-    {}
-
-    Below one can see that the nodes are re-indexed. The edges are re-indexed in
-    the same way.
-
-    >>> bg.nodes()
-    tensor([0, 1, 2, 3, 4])
-    >>> bg.ndata['hv']
-    tensor([[0.],
-            [1.],
-            [2.],
-            [3.],
-            [4.]])
-
-    **Property:**
-    We can still get a brief summary of the graphs that constitute the batched graph.
-
-    >>> bg.batch_size
-    2
-    >>> bg.batch_num_nodes
-    [2, 3]
-    >>> bg.batch_num_edges
-    [1, 2]
-
-    **Readout:**
-    Another common demand for graph neural networks is graph readout, which is a
-    function that takes in the node attributes and/or edge attributes for a graph
-    and outputs a vector summarizing the information in the graph.
-    DGL also supports performing readout for a batch of graphs at once.
-    Below we take the built-in readout function :func:`sum_nodes` as an example, which
-    sums over a particular kind of node attribute for each graph.
-
-    >>> dgl.sum_nodes(bg, 'hv') # Sum the node attribute 'hv' for each graph.
-    tensor([[1.],               # 0 + 1
-            [9.]])              # 2 + 3 + 4
-
-    **Message passing:**
-    For message passing and related operations, batched :class:`DGLGraph` acts exactly
-    the same as a single :class:`~dgl.DGLGraph` with batch size 1.
-
-    **Update Attributes:**
-    Updating the attributes of the batched graph has no effect on the original graphs.
-
-    >>> bg.edata['he'] = th.zeros(3, 2)
-    >>> g2.edata['he']
-    tensor([[1.],
-            [2.]])}
-
-    Instead, we can decompose the batched graph back into a list of graphs and use them
-    to replace the original graphs.
-
-    >>> g1, g2 = dgl.unbatch(bg)    # returns a list of DGLGraph objects
-    >>> g2.edata['he']
-    tensor([[0., 0.],
-            [0., 0.]])}
-
-    See Also
-    --------
-    unbatch
-    """
-    if len(graph_list) == 1:
-        # Need to deepcopy the node/edge frame of original graph.
-        graph = graph_list[0]
-        return DGLGraph(graph_data=graph._graph,
-                        node_frame=graph._node_frame.deepclone(),
-                        edge_frame=graph._edge_frame.deepclone(),
-                        batch_num_nodes=graph.batch_num_nodes,
-                        batch_num_edges=graph.batch_num_edges)
-
-    def _init_attrs(attrs, mode):
-        """Collect attributes of given mode (node/edge) from graph_list.
-
-        Parameters
-        ----------
-        attrs: None or ALL or str or iterator
-            The attributes to collect. If ALL, check if all graphs have the same
-            attribute set and return the attribute set. If None, return an empty
-            list. If it is a string or a iterator of string, return these
-            attributes.
-        mode: str
-            Suggest to collect node attributes or edge attributes.
-
-        Returns
-        -------
-        Iterable
-            The obtained attribute set.
-        """
-        if mode == 'node':
-            nitems_list = [g.number_of_nodes() for g in graph_list]
-            attrs_list = [set(g.node_attr_schemes().keys()) for g in graph_list]
-        else:
-            nitems_list = [g.number_of_edges() for g in graph_list]
-            attrs_list = [set(g.edge_attr_schemes().keys()) for g in graph_list]
-
-        if attrs is None:
-            return []
-        elif is_all(attrs):
-            attrs = set()
-            # Check if at least a graph has mode items and associated features.
-            for i, (g_num_items, g_attrs) in enumerate(zip(nitems_list, attrs_list)):
-                if g_num_items > 0 and len(g_attrs) > 0:
-                    attrs = g_attrs
-                    ref_g_index = i
-                    break
-            # Check if all the graphs with mode items have the same associated features.
-            if len(attrs) > 0:
-                for i, (g_num_items, g_attrs) in enumerate(zip(nitems_list, attrs_list)):
-                    if g_attrs != attrs and g_num_items > 0:
-                        raise ValueError('Expect graph {0} and {1} to have the same {2} '
-                                         'attributes when {2}_attrs=ALL, got {3} and {4}.'
-                                         .format(ref_g_index, i, mode, attrs, g_attrs))
-            return attrs
-        elif isinstance(attrs, str):
-            return [attrs]
-        elif isinstance(attrs, Iterable):
-            return attrs
-        else:
-            raise ValueError('Expected {} attrs to be of type None str or Iterable, '
-                             'got type {}'.format(mode, type(attrs)))
-
-    node_attrs = _init_attrs(node_attrs, 'node')
-    edge_attrs = _init_attrs(edge_attrs, 'edge')
-
-    # create batched graph index
-    batched_index = graph_index.disjoint_union([g._graph for g in graph_list])
-    # create batched node and edge frames
-    if len(node_attrs) == 0:
-        batched_node_frame = FrameRef(Frame(num_rows=batched_index.number_of_nodes()))
-    else:
-        # NOTE: following code will materialize the columns of the input graphs.
-        cols = {key: F.cat([gr._node_frame[key] for gr in graph_list
-                            if gr.number_of_nodes() > 0], dim=0)
-                for key in node_attrs}
-        batched_node_frame = FrameRef(Frame(cols))
-
-    if len(edge_attrs) == 0:
-        batched_edge_frame = FrameRef(Frame(num_rows=batched_index.number_of_edges()))
-    else:
-        cols = {key: F.cat([gr._edge_frame[key] for gr in graph_list
-                            if gr.number_of_edges() > 0], dim=0)
-                for key in edge_attrs}
-        batched_edge_frame = FrameRef(Frame(cols))
-
-    batch_size = 0
-    batch_num_nodes = []
-    batch_num_edges = []
-    for grh in graph_list:
-        # handle the input is again a batched graph.
-        batch_size += grh.batch_size
-        batch_num_nodes += grh.batch_num_nodes
-        batch_num_edges += grh.batch_num_edges
-
-    return DGLGraph(graph_data=batched_index,
-                    node_frame=batched_node_frame,
-                    edge_frame=batched_edge_frame,
-                    batch_num_nodes=batch_num_nodes,
-                    batch_num_edges=batch_num_edges)
-
-def unbatch(graph):
-    """Return the list of graphs in this batch.
-
-    Parameters
-    ----------
-    graph : DGLGraph
-        The batched graph.
-
-    Returns
-    -------
-    list
-        A list of :class:`~dgl.DGLGraph` objects whose attributes are obtained
-        by partitioning the attributes of the :attr:`graph`. The length of the
-        list is the same as the batch size of :attr:`graph`.
-
-    Notes
-    -----
-    Unbatching will break each field tensor of the batched graph into smaller
-    partitions.
-
-    For simpler tasks such as node/edge state aggregation, try to use
-    readout functions.
-
-    See Also
-    --------
-    batch
-    """
-    if graph.batch_size == 1:
-        # Like dgl.batch, unbatch also deep copies data frame.
-        return [DGLGraph(graph_data=graph._graph,
-                         node_frame=graph._node_frame.deepclone(),
-                         edge_frame=graph._edge_frame.deepclone())]
-
-    bsize = graph.batch_size
-    bnn = graph.batch_num_nodes
-    bne = graph.batch_num_edges
-    pttns = graph_index.disjoint_partition(graph._graph, utils.toindex(bnn))
-    # split the frames
-    node_frames = [FrameRef(Frame(num_rows=n)) for n in bnn]
-    edge_frames = [FrameRef(Frame(num_rows=n)) for n in bne]
-    for attr, col in graph._node_frame.items():
-        col_splits = F.split(col, bnn, dim=0)
-        for i in range(bsize):
-            node_frames[i][attr] = col_splits[i]
-    for attr, col in graph._edge_frame.items():
-        col_splits = F.split(col, bne, dim=0)
-        for i in range(bsize):
-            edge_frames[i][attr] = col_splits[i]
-    return [DGLGraph(graph_data=pttns[i],
-                     node_frame=node_frames[i],
-                     edge_frame=edge_frames[i]) for i in range(bsize)]
-
-
-############################################################
-# Internal APIs
-############################################################
-
-class AdaptedDGLGraph(GraphAdapter):
-    """Adapt DGLGraph to interface required by scheduler.
-
-    Parameters
-    ----------
-    graph : DGLGraph
-        Graph
-    """
-    def __init__(self, graph):
-        self.graph = graph
-
-    @property
-    def gidx(self):
-        return self.graph._graph
-
-    def num_src(self):
-        """Number of source nodes."""
-        return self.graph.number_of_nodes()
-
-    def num_dst(self):
-        """Number of destination nodes."""
-        return self.graph.number_of_nodes()
-
-    def num_edges(self):
-        """Number of edges."""
-        return self.graph.number_of_edges()
-
-    @property
-    def srcframe(self):
-        """Frame to store source node features."""
-        return self.graph._node_frame
-
-    @property
-    def dstframe(self):
-        """Frame to store source node features."""
-        return self.graph._node_frame
-
-    @property
-    def edgeframe(self):
-        """Frame to store edge features."""
-        return self.graph._edge_frame
-
-    @property
-    def msgframe(self):
-        """Frame to store messages."""
-        return self.graph._msg_frame
-
-    @property
-    def msgindicator(self):
-        """Message indicator tensor."""
-        return self.graph._get_msg_index()
-
-    @msgindicator.setter
-    def msgindicator(self, val):
-        """Set new message indicator tensor."""
-        self.graph._set_msg_index(val)
-
-    def in_edges(self, nodes):
-        return self.graph._graph.in_edges(nodes)
-
-    def out_edges(self, nodes):
-        return self.graph._graph.out_edges(nodes)
-
-    def edges(self, form):
-        return self.graph._graph.edges(form)
-
-    def get_immutable_gidx(self, ctx):
-        return self.graph._graph.get_immutable_gidx(ctx)
-
-    def bits_needed(self):
-        return self.graph._graph.bits_needed()
-
-    @property
-    def canonical_etype(self):
-        """Canonical edge type (None for homogeneous graph)"""
-        return (None, None, None)
--- a/python/dgl/_deprecate/kernel.py
+++ b/python/dgl/_deprecate/kernel.py
-"""Module for dgl kernels for graph computation."""
-from __future__ import absolute_import
-
-from .._ffi.function import _init_api
-from .. import ndarray as nd
-
-# pylint: disable=invalid-name
-def infer_binary_feature_shape(op, lhs, rhs):
-    """Infer the output feature shape after a binary operation between lhs and rhs.
-
-    Parameter
-    ---------
-    op : string
-        The binary_op name.
-    lhs : dgl.ndarray.NDArray
-        The lhs tensor.
-    rhs : dgl.ndarray.NDArray
-        The rhs tensor.
-
-    Returns
-    -------
-    tuple of int
-        The output feature shape.
-    """
-    ret = _CAPI_DGLKernelInferBinaryFeatureShape(op, lhs, rhs)
-    return tuple(ret.asnumpy())
-
-# pylint: disable=invalid-name
-def binary_op_reduce(reducer, op, G, A_target, B_target, A, B, out,
-                     A_rows=None, B_rows=None, out_rows=None):
-    """Perform binary operation on the edges of graph ``G``, and optionally
-    reduce the per-edge result by edge destinations into per-node result.
-
-    Details
-    -------
-    Concretely, this function could be decomposed into two steps:
-
-    1. Perform binary operations on each edge (u, v, e) on graph ``G`` as
-       follows,::
-
-           C[e] = A[select_A_target(u, v, e)] op B[select_B_target(u, v, e)]
-
-       where
-
-       * ``select_A_target`` and ``select_B_target`` would return the source
-         node ID, destination node ID, or edge ID, according to ``A_target``
-         and ``B_target`` which could take either
-
-         - "source" (dgl.function.TargetCode.SRC),
-         - "destination" (dgl.function.TargetCode.DST), or
-         - "edge" (dgl.function.TargetCode.EDGE).
-
-       * ``A`` and ``B`` are data tensors.  If ``A_target`` is "edge", then
-         ``A.shape[0]`` should equal the number of edges of ``G``. Otherwise
-         that should equal the number of nodes of ``G``.  Similar constraints
-         apply for ``B``.
-
-       * ``op`` could be either of the following strings: "add", "mul", "sub",
-         "div".
-
-    2. Perform the optional reduction step on ``C`` computed previously.
-
-       * If ``reducer`` is None, then no reduction is performed and we return
-         the per-edge result ``C`` directly,::
-
-             out[e] = C[e]
-
-       * Otherwise, the per-edge result ``C`` is reduced into per-node result
-         according to edge destinations, in a similar fashion as
-         ``unsorted_segment_XXX`` in Tensorflow or ``scatter_XXX`` in PyTorch
-         or PyTorch-Scatter.  For all ``v`` that has incoming edges,::
-
-             out[v] = reducer_{e: (u, v, e) in G} C[e]
-
-    Broadcasting
-    ------------
-    Broadcasting is supported on the feature dimensions, following numpy
-    semantics.
-
-    Examples::
-
-        A.shape = (N, D1, D2)  # N is the number of nodes
-        B.shape = (M, D1, 1)   # M is the number of edges
-        C = BinaryOpReduce("sum", "add", graph, A, B, ...)
-        C.shape = (N, D1, D2)
-
-    Partial reads/writes
-    --------------------
-    Optionally, one can provide which rows to read from ``A`` and ``B`` with
-    ``A_rows`` and ``B_rows``, both of which are 1D integer arrays.  Similarly,
-    one can provide which rows to write to ``out`` with ``out_rows``, which is
-    again a 1D integer array.  Concretely,
-
-    * Instead of from ``A`` and ``B``, ``C`` would be computed from
-      ``A[A_rows]`` and ``B[B_rows]``.  This implies that
-
-      * ``A`` and ``B`` no longer need to have the same number of rows as
-        the number of nodes or edges in ``G``.  However, ``A_rows`` and
-        ``B_rows`` must have the same number of elements as the number of
-        nodes or edges in ``G``.
-
-    * Instead of directly writing to ``out``, it will selectively write some
-      rows of ``C`` or reduced ``C``,::
-
-          out[out_rows[i]] = C[i]     if out_rows[i] != -1
-
-      Or
-
-          out[out_rows[i]] = reducer_{e: (u, v, e) in G} C[e]
-
-    Parameters
-    ----------
-    reducer : str
-        The type of the reducer ("sum", "max", "min", "mean", "prod", "none").
-        If the reducer is "none", the output is an edge feature tensor.
-        Otherwise, a node feature tensor is returned.
-    op : str
-        The type of the binary functor ("add", "mul", "sub", "div").
-    G : GraphIndex
-        The graph
-    A_target : int
-        Choice of source, destination, or edge ID for edges on left operand
-    B_target : int
-        Choice of source, destination, or edge ID for edges on right operand
-    A : NDArray
-        Data tensor of left operand
-    B : NDArray
-        Data tensor of right operand
-    out : NDArray (output)
-        Output tensor.  The result will be written there in place.
-    A_rows : NDArray, optional
-        The rows to read from A.
-    B_rows : NDArray, optional
-        The rows to read from B.
-    out_rows : NDArray
-        The rows to write to output tensor.
-    """
-    if A_rows is None:
-        A_rows = nd.NULL[G.dtype]
-    if B_rows is None:
-        B_rows = nd.NULL[G.dtype]
-    if out_rows is None:
-        out_rows = nd.NULL[G.dtype]
-    _CAPI_DGLKernelBinaryOpReduce(
-        reducer, op, G,
-        int(A_target), int(B_target),
-        A, B, out,
-        A_rows, B_rows, out_rows)
-
-# pylint: disable=invalid-name
-def backward_lhs_binary_op_reduce(
-        reducer, op, G,
-        A_target, B_target,
-        A, B, out,
-        grad_out, grad_A,
-        A_rows=None, B_rows=None, out_rows=None):
-    """Compute the gradient of ``binary_op_reduce`` w.r.t. ``A`` and store it
-    in ``grad_A``.
-
-    See ``binary_op_reduce`` for forward propagation and partial reads/writes.
-
-    Gradient of broadcasted tensors
-    -------------------------------
-    ``grad_A`` is assumed to be unbroadcasted, i.e. the shape of ``grad_A``
-    is the same as ``grad_out`` except the first axis.
-
-    If broadcasting happened in forward propagation, one needs to manually
-    sum the gradients along the broadcasted dimension to yield the correct
-    gradient.
-
-    Parameter
-    ---------
-    reducer : str
-        The type of the reducer ("sum", "max", "min", "mean", "prod", "none").
-        If the reducer is "none", the output is an edge feature tensor.
-        Otherwise, a node feature tensor is returned.
-    op : str
-        The type of the binary functor ("add", "mul", "sub", "div").
-    G : GraphIndex
-        The graph
-    A_target : int
-        Choice of source, destination, or edge ID for edges on left operand
-    B_target : int
-        Choice of source, destination, or edge ID for edges on right operand
-    A : NDArray
-        Data tensor of left operand
-    B : NDArray
-        Data tensor of right operand
-    out : NDArray
-        Output tensor computed in the forward pass.
-    grad_out : NDArray
-        Gradient w.r.t. ``out``.
-    grad_A : NDArray (output)
-        Gradient w.r.t. ``A``.  The result will be written there in place.
-    A_rows : NDArray, optional
-        The rows read from A.
-    B_rows : NDArray, optional
-        The rows read from B.
-    out_rows : NDArray
-        The rows written to output tensor.
-    """
-    if A_rows is None:
-        A_rows = nd.NULL[G.dtype]
-    if B_rows is None:
-        B_rows = nd.NULL[G.dtype]
-    if out_rows is None:
-        out_rows = nd.NULL[G.dtype]
-    _CAPI_DGLKernelBackwardLhsBinaryOpReduce(
-        reducer, op, G,
-        int(A_target), int(B_target),
-        A_rows, B_rows, out_rows,
-        A, B, out,
-        grad_out, grad_A)
-
-# pylint: disable=invalid-name
-def backward_rhs_binary_op_reduce(
-        reducer, op, G,
-        A_target, B_target,
-        A, B, out,
-        grad_out, grad_B,
-        A_rows=None, B_rows=None, out_rows=None):
-    """Compute the gradient of ``binary_op_reduce`` w.r.t. ``B`` and store it
-    in ``grad_B``.
-
-    See ``binary_op_reduce`` for forward propagation and partial reads/writes.
-
-    Gradient of broadcasted tensors
-    -------------------------------
-    ``grad_B`` is assumed to be unbroadcasted, i.e. the shape of ``grad_B``
-    is the same as ``grad_out`` except the first axis.
-
-    If broadcasting happened in forward propagation, one needs to manually
-    sum the gradients along the broadcasted dimension to yield the correct
-    gradient.
-
-    Parameter
-    ---------
-    reducer : str
-        The type of the reducer ("sum", "max", "min", "mean", "prod", "none").
-        If the reducer is "none", the output is an edge feature tensor.
-        Otherwise, a node feature tensor is returned.
-    op : str
-        The type of the binary functor ("add", "mul", "sub", "div").
-    G : GraphIndex
-        The graph
-    A_target : int
-        Choice of source, destination, or edge ID for edges on left operand
-    B_target : int
-        Choice of source, destination, or edge ID for edges on right operand
-    A : NDArray
-        Data tensor of left operand
-    B : NDArray
-        Data tensor of right operand
-    out : NDArray
-        Output tensor computed in the forward pass.
-    grad_out : NDArray
-        Gradient w.r.t. ``out``.
-    grad_B : NDArray (output)
-        Gradient w.r.t. ``B``.  The result will be written there in place.
-    A_rows : NDArray, optional
-        The rows read from A.
-    B_rows : NDArray, optional
-        The rows read from B.
-    out_rows : NDArray
-        The rows written to output tensor.
-    """
-    if A_rows is None:
-        A_rows = nd.NULL[G.dtype]
-    if B_rows is None:
-        B_rows = nd.NULL[G.dtype]
-    if out_rows is None:
-        out_rows = nd.NULL[G.dtype]
-    _CAPI_DGLKernelBackwardRhsBinaryOpReduce(
-        reducer, op, G,
-        int(A_target), int(B_target),
-        A_rows, B_rows, out_rows,
-        A, B, out,
-        grad_out, grad_B)
-
-# pylint: disable=invalid-name
-def copy_reduce(reducer, G, target,
-                X, out,
-                X_rows=None, out_rows=None):
-    """Copy data in ``X`` according to source/destination/edge ID onto the
-    edges of graph ``G``, and optionally reduce the per-edge result by edge
-    destinations into per-node result.
-
-    Details
-    -------
-    Concretely, this function could be decomposed into two steps:
-
-    1. For each edge (u, v, e) on graph ``G``, set
-
-           C[e] = X[select_target(u, v, e)]
-
-       where
-
-       * ``select_target`` would return the source node ID, destination node,
-         ID, or edge ID, according to ``target`` which could take either
-
-         - "source" (dgl.function.TargetCode.SRC),
-         - "destination" (dgl.function.TargetCode.DST), or
-         - "edge" (dgl.function.TargetCode.EDGE).
-
-       * ``X`` is a data tensor.  If ``target`` is "edge", then ``X.shape[0]``
-         should equal the number of edges of ``G``.  Otherwise that should
-         equal the number of nodes of ``G``.
-
-    2. Perform the optional reduction step on ``C`` computed previously.
-
-       * If ``reducer`` is None, then no reduction is performed and we return
-         the per-edge result ``C`` directly,::
-
-             out[e] = C[e]
-
-       * Otherwise, the per-edge result ``C`` is reduced into per-node result
-         according to edge destinations, in a similar fashion as
-         ``unsorted_segment_XXX`` in Tensorflow or ``scatter_XXX`` in PyTorch
-         or PyTorch-Scatter.  For all ``v`` that has incoming edges,::
-
-             out[v] = reducer_{e: (u, v, e) in G} C[e]
-
-    Partial reads/writes
-    --------------------
-    Optionally, one can provide which rows to read from ``X`` with ``X_rows``,
-    which is a 1D integer array.  Similarly, one can provide which rows to
-    write to ``out`` with ``out_rows``, which is again a 1D integer array.
-    Concretely,
-
-    * Instead of from ``X``, ``C`` would be copied from ``X[X_rows]``.  This
-      implies that
-
-      * ``X`` no longer needs to have the same number of rows as the number of
-        nodes or edges in ``G``.  However, ``X_rows`` must have the same
-        number of elements as the number of nodes or edges in ``G``.
-
-    * Instead of directly writing to ``out``, it will selectively write some
-      rows of ``C`` or reduced ``C``,::
-
-          out[out_rows[i]] = C[i]     if out_rows[i] != -1
-
-      Or
-
-          out[out_rows[i]] = reducer_{e: (u, v, e) in G} C[e]
-
-    Parameter
-    ---------
-    reducer : str
-        The type of the reducer ("sum", "max", "min", "mean", "prod", "none").
-        If the reducer is "none", the output is an edge feature tensor.
-        Otherwise, a node feature tensor is returned.
-    graph : GraphIndex
-        The graph
-    target : int
-        Choice of source, destination, or edge ID for edges to index in data
-        tensor.
-    X : NDArray
-        Data tensor.
-    out : NDArray (output)
-        Output tensor.  The result will be written there in place.
-    X_rows : NDArray, optional
-        The rows to read from X.
-    out_mapping : NDArray
-        The rows to write to output tensor.
-    """
-    if X_rows is None:
-        X_rows = nd.NULL[G.dtype]
-    if out_rows is None:
-        out_rows = nd.NULL[G.dtype]
-    _CAPI_DGLKernelCopyReduce(
-        reducer, G, int(target),
-        X, out, X_rows, out_rows)
-
-# pylint: disable=invalid-name
-def backward_copy_reduce(reducer, G, target,
-                         X, out,
-                         grad_out, grad_X,
-                         X_rows=None, out_rows=None):
-    """Compute the gradient of ``copy_reduce`` w.r.t. ``X`` and store it in
-    ``grad_X``.
-
-    See ``copy_reduce`` for forward propagation and partial reads/writes.
-
-    Parameter
-    ---------
-    reducer : str
-        The type of the reducer ("sum", "max", "min", "mean", "prod", "none").
-        If the reducer is "none", the output is an edge feature tensor.
-        Otherwise, a node feature tensor is returned.
-    G : GraphIndex
-        The graph
-    target : int
-        Choice of source, destination, or edge ID for edges to index in data
-        tensor.
-    X : NDArray
-        Data tensor.
-    out : NDArray
-        Output tensor computed in the forward pass.
-    grad_out_data : NDArray
-        Gradient w.r.t. ``out``.
-    grad_X : NDArray (output)
-        Gradient w.r.t. ``X``.  The result will be written there in place.
-    X_rows : NDArray, optional
-        The rows read from X.
-    out_rows : NDArray
-        The rows written to output tensor.
-    """
-    if X_rows is None:
-        X_rows = nd.NULL[G.dtype]
-    if out_rows is None:
-        out_rows = nd.NULL[G.dtype]
-    _CAPI_DGLKernelBackwardCopyReduce(
-        reducer, G, int(target),
-        X, out, grad_out, grad_X,
-        X_rows, out_rows)
-
-_init_api("dgl._deprecate.kernel")
--- a/python/dgl/_deprecate/nodeflow.py
+++ b/python/dgl/_deprecate/nodeflow.py
-"""Class for NodeFlow data structure."""
-from __future__ import absolute_import
-
-from .._ffi.object import register_object, ObjectBase
-from .._ffi.function import _init_api
-from ..base import ALL, is_all, DGLError, dgl_warning
-from .. import backend as F
-from .frame import Frame, FrameRef
-from .graph import DGLBaseGraph
-from ..graph_index import transform_ids
-from .runtime import ir, scheduler, Runtime
-from .. import utils
-from .view import LayerView, BlockView
-
-__all__ = ['NodeFlow']
-
-@register_object('graph.NodeFlow')
-class NodeFlowObject(ObjectBase):
-    """NodeFlow object"""
-
-    @property
-    def graph(self):
-        """The graph structure of this nodeflow.
-
-        Returns
-        -------
-        GraphIndex
-        """
-        return _CAPI_NodeFlowGetGraph(self)
-
-    @property
-    def layer_offsets(self):
-        """The offsets of each layer.
-
-        Returns
-        -------
-        NDArray
-        """
-        return _CAPI_NodeFlowGetLayerOffsets(self)
-
-    @property
-    def block_offsets(self):
-        """The offsets of each block.
-
-        Returns
-        -------
-        NDArray
-        """
-        return _CAPI_NodeFlowGetBlockOffsets(self)
-
-    @property
-    def node_mapping(self):
-        """Mapping array from nodeflow node id to parent graph
-
-        Returns
-        -------
-        NDArray
-        """
-        return _CAPI_NodeFlowGetNodeMapping(self)
-
-    @property
-    def edge_mapping(self):
-        """Mapping array from nodeflow edge id to parent graph
-
-        Returns
-        -------
-        NDArray
-        """
-        return _CAPI_NodeFlowGetEdgeMapping(self)
-
-class NodeFlow(DGLBaseGraph):
-    """The NodeFlow class stores the sampling results of Neighbor
-    sampling and Layer-wise sampling.
-
-    These sampling algorithms generate graphs with multiple layers. The
-    edges connect the nodes between two layers, which forms *blocks*, while
-    there don't exist edges between the nodes in the same layer. As illustrated
-    in the figure, the last layer stores the target (seed) nodes where neighbors
-    are sampled from. Neighbors reached in different hops are placed in different
-    layers. Edges that connect to the neighbors in the next hop are placed
-    in a block.
-    We store extra information, such as the node and edge mapping from
-    the NodeFlow graph to the parent graph.
-
-    .. image:: https://data.dgl.ai/api/sampling.nodeflow.png
-
-    DO NOT create NodeFlow object directly. Use sampling method to
-    generate NodeFlow instead.
-
-    Parameters
-    ----------
-    parent : DGLGraphStale
-        The parent graph.
-    nfobj : NodeFlowObject
-        The nodeflow object
-    """
-    def __init__(self, parent, nfobj):
-        super(NodeFlow, self).__init__(nfobj.graph)
-        dgl_warning('NodeFlow APIs are deprecated starting from v0.5. Please read our'
-                    ' guide<link> for how to use the new sampling APIs.')
-        self._parent = parent
-        self._node_mapping = utils.toindex(nfobj.node_mapping)
-        self._edge_mapping = utils.toindex(nfobj.edge_mapping)
-        self._layer_offsets = utils.toindex(nfobj.layer_offsets).tonumpy()
-        self._block_offsets = utils.toindex(nfobj.block_offsets).tonumpy()
-        # node/edge frames
-        self._node_frames = [FrameRef(Frame(num_rows=self.layer_size(i))) \
-                             for i in range(self.num_layers)]
-        self._edge_frames = [FrameRef(Frame(num_rows=self.block_size(i))) \
-                             for i in range(self.num_blocks)]
-        # registered functions
-        self._message_funcs = [None] * self.num_blocks
-        self._reduce_funcs = [None] * self.num_blocks
-        self._apply_node_funcs = [None] * self.num_blocks
-        self._apply_edge_funcs = [None] * self.num_blocks
-
-    def _get_layer_id(self, layer_id):
-        """The layer Id might be negative. We need to convert it to the actual layer Id.
-        """
-        if layer_id >= 0:
-            return layer_id
-        else:
-            return self.num_layers + layer_id
-
-    def _get_block_id(self, block_id):
-        """The block Id might be negative. We need to convert it to the actual block Id.
-        """
-        if block_id >= 0:
-            return block_id
-        else:
-            return self.num_blocks + block_id
-
-    def _get_node_frame(self, layer_id):
-        return self._node_frames[layer_id]
-
-    def _get_edge_frame(self, block_id):
-        return self._edge_frames[block_id]
-
-    @property
-    def num_layers(self):
-        """Get the number of layers.
-
-        Returns
-        -------
-        int
-            the number of layers
-        """
-        return len(self._layer_offsets) - 1
-
-    @property
-    def num_blocks(self):
-        """Get the number of blocks.
-
-        Returns
-        -------
-        int
-            the number of blocks
-        """
-        return self.num_layers - 1
-
-    @property
-    def layers(self):
-        """Return a LayerView of this NodeFlow.
-
-        This is mainly for usage like:
-        * `g.layers[2].data['h']` to get the node features of layer#2.
-        """
-        return LayerView(self)
-
-    @property
-    def blocks(self):
-        """Return a BlockView of this NodeFlow.
-
-        This is mainly for usage like:
-        * `g.blocks[1].data['h']` to get the edge features of blocks from layer#1 to layer#2.
-        """
-        return BlockView(self)
-
-    def node_attr_schemes(self, layer_id):
-        """Return the node feature schemes.
-
-        Each feature scheme is a named tuple that stores the shape and data type
-        of the node feature
-
-        Parameters
-        ----------
-        layer_id : int
-            the specified layer to get node data scheme.
-
-        Returns
-        -------
-        dict of str to schemes
-            The schemes of node feature columns.
-        """
-        layer_id = self._get_layer_id(layer_id)
-        return self._node_frames[layer_id].schemes
-
-    def edge_attr_schemes(self, block_id):
-        """Return the edge feature schemes.
-
-        Each feature scheme is a named tuple that stores the shape and data type
-        of the node feature
-
-        Parameters
-        ----------
-        block_id : int
-            the specified block to get edge data scheme.
-
-        Returns
-        -------
-        dict of str to schemes
-            The schemes of edge feature columns.
-        """
-        block_id = self._get_block_id(block_id)
-        return self._edge_frames[block_id].schemes
-
-    def layer_size(self, layer_id):
-        """Return the number of nodes in a specified layer.
-
-        Parameters
-        ----------
-        layer_id : int
-            the specified layer to return the number of nodes.
-        """
-        layer_id = self._get_layer_id(layer_id)
-        return int(self._layer_offsets[layer_id + 1]) - int(self._layer_offsets[layer_id])
-
-    def block_size(self, block_id):
-        """Return the number of edges in a specified block.
-
-        Parameters
-        ----------
-        block_id : int
-            the specified block to return the number of edges.
-        """
-        block_id = self._get_block_id(block_id)
-        return int(self._block_offsets[block_id + 1]) - int(self._block_offsets[block_id])
-
-    def copy_from_parent(self, node_embed_names=ALL, edge_embed_names=ALL, ctx=None):
-        """Copy node/edge features from the parent graph.
-
-        Parameters
-        ----------
-        node_embed_names : a list of lists of strings, optional
-            The names of embeddings in each layer.
-        edge_embed_names : a list of lists of strings, optional
-            The names of embeddings in each block.
-        ctx : Context
-            The device to copy tensor to. If None, features will stay at its original device
-        """
-        if self._parent._node_frame.num_rows != 0 and self._parent._node_frame.num_columns != 0:
-            if is_all(node_embed_names):
-                for i in range(self.num_layers):
-                    nid = utils.toindex(self.layer_parent_nid(i))
-                    self._node_frames[i] = FrameRef(Frame(_copy_frame(
-                        self._parent._node_frame[nid], ctx)))
-            elif node_embed_names is not None:
-                assert isinstance(node_embed_names, list) \
-                        and len(node_embed_names) == self.num_layers, \
-                        "The specified embedding names should be the same as the number of layers."
-                for i in range(self.num_layers):
-                    nid = self.layer_parent_nid(i)
-                    self._node_frames[i] = _get_frame(self._parent._node_frame,
-                                                      node_embed_names[i], nid, ctx)
-
-        if self._parent._edge_frame.num_rows != 0 and self._parent._edge_frame.num_columns != 0:
-            if is_all(edge_embed_names):
-                for i in range(self.num_blocks):
-                    eid = utils.toindex(self.block_parent_eid(i))
-                    self._edge_frames[i] = FrameRef(Frame(_copy_frame(
-                        self._parent._edge_frame[eid], ctx)))
-            elif edge_embed_names is not None:
-                assert isinstance(edge_embed_names, list) \
-                        and len(edge_embed_names) == self.num_blocks, \
-                        "The specified embedding names should be the same as the number of flows."
-                for i in range(self.num_blocks):
-                    eid = self.block_parent_eid(i)
-                    self._edge_frames[i] = _get_frame(self._parent._edge_frame,
-                                                      edge_embed_names[i], eid, ctx)
-
-    def copy_to_parent(self, node_embed_names=ALL, edge_embed_names=ALL):
-        """Copy node/edge embeddings to the parent graph.
-
-        Note: if a node in the parent graph appears in multiple layers and they
-        in the NodeFlow has node data with the same name, the data of this node
-        in the lower layer will overwrite the node data in previous layer.
-
-        For example, node 5 in the parent graph appears in layer 0 and 1 and
-        they have the same node data 'h'. The node data in layer 1 of this node
-        will overwrite its data in layer 0 when copying the data back.
-
-        To avoid this, users can give node data in each layer a different name.
-
-        Parameters
-        ----------
-        node_embed_names : a list of lists of strings, optional
-            The names of embeddings in each layer.
-        edge_embed_names : a list of lists of strings, optional
-            The names of embeddings in each block.
-        """
-        if self._parent._node_frame.num_rows != 0 and self._parent._node_frame.num_columns != 0:
-            if is_all(node_embed_names):
-                for i in range(self.num_layers):
-                    nid = utils.toindex(self.layer_parent_nid(i))
-                    # We should write data back directly.
-                    self._parent._node_frame.update_rows(nid, self._node_frames[i], inplace=True)
-            elif node_embed_names is not None:
-                assert isinstance(node_embed_names, list) \
-                        and len(node_embed_names) == self.num_layers, \
-                        "The specified embedding names should be the same as the number of layers."
-                for i in range(self.num_layers):
-                    nid = utils.toindex(self.layer_parent_nid(i))
-                    _update_frame(self._parent._node_frame, node_embed_names[i], nid,
-                                  self._node_frames[i])
-
-        if self._parent._edge_frame.num_rows != 0 and self._parent._edge_frame.num_columns != 0:
-            if is_all(edge_embed_names):
-                for i in range(self.num_blocks):
-                    eid = utils.toindex(self.block_parent_eid(i))
-                    self._parent._edge_frame.update_rows(eid, self._edge_frames[i], inplace=True)
-            elif edge_embed_names is not None:
-                assert isinstance(edge_embed_names, list) \
-                        and len(edge_embed_names) == self.num_blocks, \
-                        "The specified embedding names should be the same as the number of flows."
-                for i in range(self.num_blocks):
-                    eid = utils.toindex(self.block_parent_eid(i))
-                    _update_frame(self._parent._edge_frame, edge_embed_names[i], eid,
-                                  self._edge_frames[i])
-
-    def map_to_parent_nid(self, nid):
-        """This maps the child node Ids to the parent Ids.
-
-        Parameters
-        ----------
-        nid : tensor
-            The node ID array in the NodeFlow graph.
-
-        Returns
-        -------
-        Tensor
-            The parent node id array.
-        """
-        nid = utils.toindex(nid)
-        return F.gather_row(self._node_mapping.tousertensor(), nid.tousertensor())
-
-    def map_to_parent_eid(self, eid):
-        """This maps the child edge Ids to the parent Ids.
-
-        Parameters
-        ----------
-        nid : tensor
-            The edge ID array in the NodeFlow graph.
-
-        Returns
-        -------
-        Tensor
-            The parent edge id array.
-        """
-        eid = utils.toindex(eid)
-        return F.gather_row(self._edge_mapping.tousertensor(), eid.tousertensor())
-
-    def map_from_parent_nid(self, layer_id, parent_nids, remap_local=False):
-        """Map parent node Ids to NodeFlow node Ids in a certain layer.
-
-        If `remap_local` is True, it returns the node Ids local to the layer.
-        Otherwise, the node Ids are unique in the NodeFlow.
-
-        Parameters
-        ----------
-        layer_id : int
-            The layer Id.
-        parent_nids: list or Tensor
-            Node Ids in the parent graph.
-        remap_local: boolean
-            Remap layer/block-level local Id if True; otherwise, NodeFlow-level Id.
-
-        Returns
-        -------
-        Tensor
-            Node Ids in the NodeFlow.
-        """
-        layer_id = self._get_layer_id(layer_id)
-        parent_nids = utils.toindex(parent_nids)
-        layers = self._layer_offsets
-        start = int(layers[layer_id])
-        end = int(layers[layer_id + 1])
-        # TODO(minjie): should not directly use []
-        mapping = self._node_mapping.tousertensor()
-        mapping = mapping[start:end]
-        mapping = utils.toindex(mapping)
-        nflow_ids = transform_ids(mapping, parent_nids)
-        if remap_local:
-            return nflow_ids.tousertensor()
-        else:
-            return nflow_ids.tousertensor() + int(self._layer_offsets[layer_id])
-
-    def layer_in_degree(self, layer_id):
-        """Return the in-degree of the nodes in the specified layer.
-
-        Parameters
-        ----------
-        layer_id : int
-            The layer Id.
-
-        Returns
-        -------
-        Tensor
-            The degree of the nodes in the specified layer.
-        """
-        layer_id = self._get_layer_id(layer_id)
-        return self._graph.in_degrees(utils.toindex(self.layer_nid(layer_id))).tousertensor()
-
-    def layer_out_degree(self, layer_id):
-        """Return the out-degree of the nodes in the specified layer.
-
-        Parameters
-        ----------
-        layer_id : int
-            The layer Id.
-
-        Returns
-        -------
-        Tensor
-            The degree of the nodes in the specified layer.
-        """
-        layer_id = self._get_layer_id(layer_id)
-        return self._graph.out_degrees(utils.toindex(self.layer_nid(layer_id))).tousertensor()
-
-    def layer_nid(self, layer_id):
-        """Get the node Ids in the specified layer.
-
-        The returned node Ids are unique in the NodeFlow.
-
-        Parameters
-        ----------
-        layer_id : int
-            The layer to get the node Ids.
-
-        Returns
-        -------
-        Tensor
-            The node ids.
-        """
-        layer_id = self._get_layer_id(layer_id)
-        assert layer_id + 1 < len(self._layer_offsets)
-        start = self._layer_offsets[layer_id]
-        end = self._layer_offsets[layer_id + 1]
-        return F.arange(int(start), int(end))
-
-    def layer_parent_nid(self, layer_id):
-        """Get the node Ids of the parent graph in the specified layer
-
-        layer_parent_nid(-1) returns seed vertices for this NodeFlow.
-
-        Parameters
-        ----------
-        layer_id : int
-            The layer to get the node Ids.
-
-        Returns
-        -------
-        Tensor
-            The parent node id array.
-        """
-        layer_id = self._get_layer_id(layer_id)
-        assert layer_id + 1 < len(self._layer_offsets)
-        start = self._layer_offsets[layer_id]
-        end = self._layer_offsets[layer_id + 1]
-        # TODO(minjie): should not directly use []
-        return self._node_mapping.tousertensor()[start:end]
-
-    def block_eid(self, block_id):
-        """Get the edge Ids in the specified block.
-
-        The returned edge Ids are unique in the NodeFlow.
-
-        Parameters
-        ----------
-        block_id : int
-            the specified block to get edge Ids.
-
-        Returns
-        -------
-        Tensor
-            The edge ids of the block in the NodeFlow.
-        """
-        block_id = self._get_block_id(block_id)
-        start = self._block_offsets[block_id]
-        end = self._block_offsets[block_id + 1]
-        return F.arange(int(start), int(end))
-
-    def block_parent_eid(self, block_id):
-        """Get the edge Ids of the parent graph in the specified block.
-
-        Parameters
-        ----------
-        block_id : int
-            the specified block to get edge Ids.
-
-        Returns
-        -------
-        Tensor
-            The edge ids of the block in the parent graph.
-        """
-        block_id = self._get_block_id(block_id)
-        start = self._block_offsets[block_id]
-        end = self._block_offsets[block_id + 1]
-        # TODO(minjie): should not directly use []
-        ret = self._edge_mapping.tousertensor()[start:end]
-        # If `add_self_loop` is enabled, the returned parent eid can be -1.
-        # We have to make sure this case doesn't happen.
-        assert F.asnumpy(ret == -1).sum(0) == 0, "The eid in the parent graph is invalid."
-        return ret
-
-    def block_edges(self, block_id, remap_local=False):
-        """Return the edges in a block.
-
-        If remap_local is True, returned indices u, v, eid will be remapped to local
-        Ids (i.e. starting from 0) in the block or in the layer. Otherwise,
-        u, v, eid are unique in the NodeFlow.
-
-        Parameters
-        ----------
-        block_id : int
-            The specified block to return the edges.
-        remap_local : boolean
-            Remap layer/block-level local Id if True; otherwise, NodeFlow-level Id.
-
-        Returns
-        -------
-        Tensor
-            The src nodes.
-        Tensor
-            The dst nodes.
-        Tensor
-            The edge ids.
-        """
-        block_id = self._get_block_id(block_id)
-        layer0_size = self._layer_offsets[block_id + 1] - self._layer_offsets[block_id]
-        rst = _CAPI_NodeFlowGetBlockAdj(self._graph, "coo",
-                                        int(layer0_size),
-                                        int(self._layer_offsets[block_id + 1]),
-                                        int(self._layer_offsets[block_id + 2]),
-                                        remap_local)
-        idx = utils.toindex(rst(0)).tousertensor()
-        eid = utils.toindex(rst(1))
-        num_edges = int(len(idx) / 2)
-        assert len(eid) == num_edges
-        return idx[num_edges:len(idx)], idx[0:num_edges], eid.tousertensor()
-
-    def block_adjacency_matrix(self, block_id, ctx):
-        """Return the adjacency matrix representation for a specific block in a NodeFlow.
-
-        A row of the returned adjacency matrix represents the destination
-        of an edge and the column represents the source.
-
-        Parameters
-        ----------
-        block_id : int
-            The specified block to return the adjacency matrix.
-        ctx : context
-            The context of the returned matrix.
-
-        Returns
-        -------
-        SparseTensor
-            The adjacency matrix.
-        Tensor
-            A index for data shuffling due to sparse format change. Return None
-            if shuffle is not required.
-        """
-        block_id = self._get_block_id(block_id)
-        fmt = F.get_preferred_sparse_format()
-        # We need to extract two layers.
-        layer0_size = self._layer_offsets[block_id + 1] - self._layer_offsets[block_id]
-        rst = _CAPI_NodeFlowGetBlockAdj(self._graph, fmt,
-                                        int(layer0_size),
-                                        int(self._layer_offsets[block_id + 1]),
-                                        int(self._layer_offsets[block_id + 2]),
-                                        True)
-        num_rows = self.layer_size(block_id + 1)
-        num_cols = self.layer_size(block_id)
-
-        if fmt == "csr":
-            indptr = F.copy_to(utils.toindex(rst(0)).tousertensor(), ctx)
-            indices = F.copy_to(utils.toindex(rst(1)).tousertensor(), ctx)
-            shuffle = utils.toindex(rst(2))
-            dat = F.ones(indices.shape, dtype=F.float32, ctx=ctx)
-            return F.sparse_matrix(dat, ('csr', indices, indptr),
-                                   (num_rows, num_cols))[0], shuffle.tousertensor()
-        elif fmt == "coo":
-            ## FIXME(minjie): data type
-            idx = F.copy_to(utils.toindex(rst(0)).tousertensor(), ctx)
-            m = self.block_size(block_id)
-            idx = F.reshape(idx, (2, m))
-            dat = F.ones((m,), dtype=F.float32, ctx=ctx)
-            adj, shuffle_idx = F.sparse_matrix(dat, ('coo', idx), (num_rows, num_cols))
-            return adj, shuffle_idx
-        else:
-            raise Exception("unknown format")
-
-    def block_incidence_matrix(self, block_id, typestr, ctx):
-        """Return the incidence matrix representation of the block.
-
-        An incidence matrix is an n x m sparse matrix, where n is
-        the number of nodes and m is the number of edges. Each nnz
-        value indicating whether the edge is incident to the node
-        or not.
-
-        There are two types of an incidence matrix `I`:
-
-        * ``in``:
-
-            - I[v, e] = 1 if e is the in-edge of v (or v is the dst node of e);
-            - I[v, e] = 0 otherwise.
-
-        * ``out``:
-
-            - I[v, e] = 1 if e is the out-edge of v (or v is the src node of e);
-            - I[v, e] = 0 otherwise.
-
-        "both" isn't defined in the block of a NodeFlow.
-
-        Parameters
-        ----------
-        block_id : int
-            The specified block to return the incidence matrix.
-        typestr : str
-            Can be either "in", "out" or "both"
-        ctx : context
-            The context of returned incidence matrix.
-
-        Returns
-        -------
-        SparseTensor
-            The incidence matrix.
-        Tensor
-            A index for data shuffling due to sparse format change. Return None
-            if shuffle is not required.
-        """
-        block_id = self._get_block_id(block_id)
-        src, dst, eid = self.block_edges(block_id, remap_local=True)
-        src = F.copy_to(src, ctx)  # the index of the ctx will be cached
-        dst = F.copy_to(dst, ctx)  # the index of the ctx will be cached
-        eid = F.copy_to(eid, ctx)  # the index of the ctx will be cached
-        if typestr == 'in':
-            n = self.layer_size(block_id + 1)
-            m = self.block_size(block_id)
-            row = F.unsqueeze(dst, 0)
-            col = F.unsqueeze(eid, 0)
-            idx = F.cat([row, col], dim=0)
-            # FIXME(minjie): data type
-            dat = F.ones((m,), dtype=F.float32, ctx=ctx)
-            inc, shuffle_idx = F.sparse_matrix(dat, ('coo', idx), (n, m))
-        elif typestr == 'out':
-            n = self.layer_size(block_id)
-            m = self.block_size(block_id)
-            row = F.unsqueeze(src, 0)
-            col = F.unsqueeze(eid, 0)
-            idx = F.cat([row, col], dim=0)
-            # FIXME(minjie): data type
-            dat = F.ones((m,), dtype=F.float32, ctx=ctx)
-            inc, shuffle_idx = F.sparse_matrix(dat, ('coo', idx), (n, m))
-        else:
-            raise DGLError('Invalid incidence matrix type: %s' % str(typestr))
-        return inc, shuffle_idx
-
-    def set_n_initializer(self, initializer, layer_id=ALL, field=None):
-        """Set the initializer for empty node features.
-
-        Initializer is a callable that returns a tensor given the shape, data type
-        and device context.
-
-        When a subset of the nodes are assigned a new feature, initializer is
-        used to create feature for rest of the nodes.
-
-        Parameters
-        ----------
-        initializer : callable
-            The initializer.
-        layer_id : int
-            the layer to set the initializer.
-        field : str, optional
-            The feature field name. Default is set an initializer for all the
-            feature fields.
-        """
-        if is_all(layer_id):
-            for i in range(self.num_layers):
-                self._node_frames[i].set_initializer(initializer, field)
-        else:
-            layer_id = self._get_layer_id(layer_id)
-            self._node_frames[layer_id].set_initializer(initializer, field)
-
-    def set_e_initializer(self, initializer, block_id=ALL, field=None):
-        """Set the initializer for empty edge features.
-
-        Initializer is a callable that returns a tensor given the shape, data
-        type and device context.
-
-        When a subset of the edges are assigned a new feature, initializer is
-        used to create feature for rest of the edges.
-
-        Parameters
-        ----------
-        initializer : callable
-            The initializer.
-        block_id : int
-            the block to set the initializer.
-        field : str, optional
-            The feature field name. Default is set an initializer for all the
-            feature fields.
-        """
-        if is_all(block_id):
-            for i in range(self.num_blocks):
-                self._edge_frames[i].set_initializer(initializer, field)
-        else:
-            block_id = self._get_block_id(block_id)
-            self._edge_frames[block_id].set_initializer(initializer, field)
-
-
-    def register_message_func(self, func, block_id=ALL):
-        """Register global message function for a block.
-
-        Once registered, ``func`` will be used as the default
-        message function in message passing operations, including
-        :func:`block_compute`, :func:`prop_flow`.
-
-        Parameters
-        ----------
-        func : callable
-            Message function on the edge. The function should be
-            an :mod:`Edge UDF <dgl.udf>`.
-        block_id : int or ALL
-            the block to register the message function.
-        """
-        if is_all(block_id):
-            self._message_funcs = [func] * self.num_blocks
-        else:
-            block_id = self._get_block_id(block_id)
-            self._message_funcs[block_id] = func
-
-    def register_reduce_func(self, func, block_id=ALL):
-        """Register global message reduce function for a block.
-
-        Once registered, ``func`` will be used as the default
-        message reduce function in message passing operations, including
-        :func:`block_compute`, :func:`prop_flow`.
-
-        Parameters
-        ----------
-        func : callable
-            Reduce function on the node. The function should be
-            a :mod:`Node UDF <dgl.udf>`.
-        block_id : int or ALL
-            the block to register the reduce function.
-        """
-        if is_all(block_id):
-            self._reduce_funcs = [func] * self.num_blocks
-        else:
-            block_id = self._get_block_id(block_id)
-            self._reduce_funcs[block_id] = func
-
-    def register_apply_node_func(self, func, block_id=ALL):
-        """Register global node apply function for a block.
-
-        Once registered, ``func`` will be used as the default apply
-        node function. Related operations include :func:`apply_layer`,
-        :func:`block_compute`, :func:`prop_flow`.
-
-        Parameters
-        ----------
-        func : callable
-            Apply function on the nodes. The function should be
-            a :mod:`Node UDF <dgl.udf>`.
-        block_id : int or ALL
-            the block to register the apply node function.
-        """
-        if is_all(block_id):
-            self._apply_node_funcs = [func] * self.num_blocks
-        else:
-            block_id = self._get_block_id(block_id)
-            self._apply_node_funcs[block_id] = func
-
-    def register_apply_edge_func(self, func, block_id=ALL):
-        """Register global edge apply function for a block.
-
-        Once registered, ``func`` will be used as the default apply
-        edge function in :func:`apply_block`.
-
-        Parameters
-        ----------
-        func : callable
-            Apply function on the edge. The function should be
-            an :mod:`Edge UDF <dgl.udf>`.
-        block_id : int or ALL
-            the block to register the apply edge function.
-        """
-        if is_all(block_id):
-            self._apply_edge_funcs = [func] * self.num_blocks
-        else:
-            block_id = self._get_block_id(block_id)
-            self._apply_edge_funcs[block_id] = func
-
-    def apply_layer(self, layer_id, func="default", v=ALL, inplace=False):
-        """Apply node update function on the node embeddings in the specified layer.
-
-        Parameters
-        ----------
-        layer_id : int
-            The specified layer to update node embeddings.
-        func : callable or None, optional
-            Apply function on the nodes. The function should be
-            a :mod:`Node UDF <dgl.udf>`.
-        v : a list of vertex Ids or ALL.
-            The vertex Ids (unique in the NodeFlow) to run the node update function.
-        inplace : bool, optional
-            If True, update will be done in place, but autograd will break.
-        """
-        layer_id = self._get_layer_id(layer_id)
-        if func == "default":
-            func = self._apply_node_funcs[layer_id]
-        if is_all(v):
-            v = utils.toindex(slice(0, self.layer_size(layer_id)))
-        else:
-            v = v - int(self._layer_offsets[layer_id])
-            v = utils.toindex(v)
-        with ir.prog() as prog:
-            scheduler.schedule_nodeflow_apply_nodes(graph=self,
-                                                    layer_id=layer_id,
-                                                    v=v,
-                                                    apply_func=func,
-                                                    inplace=inplace)
-            Runtime.run(prog)
-
-
-    def apply_block(self, block_id, func="default", edges=ALL, inplace=False):
-        """Apply edge update function on the edge embeddings in the specified layer.
-
-        Parameters
-        ----------
-        block_id : int
-            The specified block to update edge embeddings.
-        func : callable or None, optional
-            Apply function on the edges. The function should be
-            an :mod:`Edge UDF <dgl.udf>`.
-        edges : a list of edge Ids or ALL.
-            The edges Id to run the edge update function.
-        inplace : bool, optional
-            If True, update will be done in place, but autograd will break.
-        """
-        block_id = self._get_block_id(block_id)
-        if func == "default":
-            func = self._apply_edge_funcs[block_id]
-        assert func is not None
-
-        if is_all(edges):
-            u, v, _ = self.block_edges(block_id, remap_local=True)
-            u = utils.toindex(u)
-            v = utils.toindex(v)
-            eid = utils.toindex(slice(0, self.block_size(block_id)))
-        elif isinstance(edges, tuple):
-            u, v = edges
-            # Rewrite u, v to handle edge broadcasting and multigraph.
-            u, v, eid = self._graph.edge_ids(utils.toindex(u), utils.toindex(v))
-            u = utils.toindex(u.tousertensor() - int(self._layer_offsets[block_id]))
-            v = utils.toindex(v.tousertensor() - int(self._layer_offsets[block_id + 1]))
-            eid = utils.toindex(eid.tousertensor() - int(self._block_offsets[block_id]))
-        else:
-            eid = utils.toindex(edges)
-            u, v, _ = self._graph.find_edges(eid)
-            u = utils.toindex(u.tousertensor() - int(self._layer_offsets[block_id]))
-            v = utils.toindex(v.tousertensor() - int(self._layer_offsets[block_id + 1]))
-            eid = utils.toindex(edges - int(self._block_offsets[block_id]))
-
-        with ir.prog() as prog:
-            scheduler.schedule_nodeflow_apply_edges(graph=self,
-                                                    block_id=block_id,
-                                                    u=u,
-                                                    v=v,
-                                                    eid=eid,
-                                                    apply_func=func,
-                                                    inplace=inplace)
-            Runtime.run(prog)
-
-    def _glb2lcl_nid(self, nid, layer_id):
-        layer_id = self._get_layer_id(layer_id)
-        return nid - int(self._layer_offsets[layer_id])
-
-    def _glb2lcl_eid(self, eid, block_id):
-        block_id = self._get_block_id(block_id)
-        return eid - int(self._block_offsets[block_id])
-
-    def block_compute(self, block_id, message_func="default", reduce_func="default",
-                      apply_node_func="default", v=ALL, inplace=False):
-        """Perform the computation on the specified block. It's similar to `pull`
-        in DGLGraphStale.
-        On the given block i, it runs `pull` on nodes in layer i+1, which generates
-        messages on edges in block i, runs the reduce function and node update
-        function on nodes in layer i+1.
-
-        Parameters
-        ----------
-        block_id : int
-            The block to run the computation.
-        message_func : callable, optional
-            Message function on the edges. The function should be
-            an :mod:`Edge UDF <dgl.udf>`.
-        reduce_func : callable, optional
-            Reduce function on the node. The function should be
-            a :mod:`Node UDF <dgl.udf>`.
-        apply_node_func : callable, optional
-            Apply function on the nodes. The function should be
-            a :mod:`Node UDF <dgl.udf>`.
-        v : a list of vertex Ids or ALL.
-            The Node Ids (unique in the NodeFlow) in layer block_id+1 to run the computation.
-        inplace: bool, optional
-            If True, update will be done in place, but autograd will break.
-        """
-        block_id = self._get_block_id(block_id)
-        if message_func == "default":
-            message_func = self._message_funcs[block_id]
-        if reduce_func == "default":
-            reduce_func = self._reduce_funcs[block_id]
-        if apply_node_func == "default":
-            apply_node_func = self._apply_node_funcs[block_id]
-
-        assert message_func is not None
-        assert reduce_func is not None
-
-        if is_all(v):
-            with ir.prog() as prog:
-                scheduler.schedule_nodeflow_update_all(graph=self,
-                                                       block_id=block_id,
-                                                       message_func=message_func,
-                                                       reduce_func=reduce_func,
-                                                       apply_func=apply_node_func)
-                Runtime.run(prog)
-        else:
-            dest_nodes = utils.toindex(v)
-            u, v, eid = self._graph.in_edges(dest_nodes)
-            assert len(u) > 0, "block_compute must run on edges"
-            u = utils.toindex(self._glb2lcl_nid(u.tousertensor(), block_id))
-            v = utils.toindex(self._glb2lcl_nid(v.tousertensor(), block_id + 1))
-            dest_nodes = utils.toindex(
-                self._glb2lcl_nid(dest_nodes.tousertensor(), block_id + 1))
-            eid = utils.toindex(self._glb2lcl_eid(eid.tousertensor(), block_id))
-
-            with ir.prog() as prog:
-                scheduler.schedule_nodeflow_compute(graph=self,
-                                                    block_id=block_id,
-                                                    u=u,
-                                                    v=v,
-                                                    eid=eid,
-                                                    dest_nodes=dest_nodes,
-                                                    message_func=message_func,
-                                                    reduce_func=reduce_func,
-                                                    apply_func=apply_node_func,
-                                                    inplace=inplace)
-                Runtime.run(prog)
-
-    def prop_flow(self, message_funcs="default", reduce_funcs="default",
-                  apply_node_funcs="default", flow_range=ALL, inplace=False):
-        """Perform the computation on flows. By default, it runs on all blocks, one-by-one.
-        On block i, it runs `pull` on nodes in layer i+1, which generates
-        messages on edges in block i, runs the reduce function and node update
-        function on nodes in layer i+1.
-
-        Users can specify a list of message functions, reduce functions and
-        node apply functions, one for each block. Thus, when a list is given,
-        the length of the list should be the same as the number of blocks.
-
-        Parameters
-        ----------
-        message_funcs : a callable, a list of callable, optional
-            Message functions on the edges. The function should be
-            an :mod:`Edge UDF <dgl.udf>`.
-        reduce_funcs : a callable, a list of callable, optional
-            Reduce functions on the node. The function should be
-            a :mod:`Node UDF <dgl.udf>`.
-        apply_node_funcs : a callable, a list of callable, optional
-            Apply functions on the nodes. The function should be
-            a :mod:`Node UDF <dgl.udf>`.
-        flow_range : int or a slice or ALL.
-            The specified blocks to run the computation.
-        inplace: bool, optional
-            If True, update will be done in place, but autograd will break.
-        """
-        if is_all(flow_range):
-            flow_range = range(0, self.num_blocks)
-        elif isinstance(flow_range, slice):
-            if flow_range.step != 1:
-                raise DGLError("We can't propogate flows and skip some of them")
-            flow_range = range(flow_range.start, flow_range.stop)
-        else:
-            raise DGLError("unknown flow range")
-
-        for i in flow_range:
-            if message_funcs == "default":
-                message_func = self._message_funcs[i]
-            elif isinstance(message_funcs, list):
-                message_func = message_funcs[i]
-            else:
-                message_func = message_funcs
-
-            if reduce_funcs == "default":
-                reduce_func = self._reduce_funcs[i]
-            elif isinstance(reduce_funcs, list):
-                reduce_func = reduce_funcs[i]
-            else:
-                reduce_func = reduce_funcs
-
-            if apply_node_funcs == "default":
-                apply_node_func = self._apply_node_funcs[i]
-            elif isinstance(apply_node_funcs, list):
-                apply_node_func = apply_node_funcs[i]
-            else:
-                apply_node_func = apply_node_funcs
-
-            self.block_compute(i, message_func, reduce_func, apply_node_func,
-                               inplace=inplace)
-
-    @property
-    def canonical_etype(self):
-        """Return canonical edge type to be compatible with GraphAdapter
-        """
-        return (None, None, None)
-
-def _copy_to_like(arr1, arr2):
-    return F.copy_to(arr1, F.context(arr2))
-
-def _get_frame(frame, names, ids, ctx):
-    col_dict = {}
-    for name in names:
-        col = F.gather_row(frame[name], _copy_to_like(ids, frame[name]))
-        if ctx:
-            col = F.copy_to(col, ctx)
-        col_dict[name] = col
-    if len(col_dict) == 0:
-        return FrameRef(Frame(num_rows=len(ids)))
-    else:
-        return FrameRef(Frame(col_dict))
-
-def _copy_frame(frame, ctx):
-    new_frame = {}
-    for name in frame:
-        new_frame[name] = F.copy_to(frame[name], ctx) if ctx else frame[name]
-    return new_frame
-
-
-def _update_frame(frame, names, ids, new_frame):
-    col_dict = {name: new_frame[name] for name in names}
-    if len(col_dict) > 0:
-        # This will raise error for tensorflow, because inplace update is not supported
-        frame.update_rows(ids, FrameRef(Frame(col_dict)), inplace=True)
-
-_init_api("dgl._deprecate.nodeflow", __name__)