[API Deprecation]Deprecate contrib module (#5114)

56ffb650 · peizhou001 · GitHub · 436de3d1 · 436de3d1 · 436de3d1
Unverified Commit 56ffb650 authored Jan 06, 2023 by peizhou001 Committed by GitHub Jan 06, 2023
20 changed files
--- a/apps/kg/models/mxnet/tensor_models.py
+++ b/apps/kg/models/mxnet/tensor_models.py
-# -*- coding: utf-8 -*-
-#
-# setup.py
-#
-# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-"""
-KG Sparse embedding
-"""
-import os
-import numpy as np
-import mxnet as mx
-from mxnet import gluon
-from mxnet import ndarray as nd
-
-from .score_fun import *
-from .. import *
-
-def logsigmoid(val):
-    max_elem = nd.maximum(0., -val)
-    z = nd.exp(-max_elem) + nd.exp(-val - max_elem)
-    return -(max_elem + nd.log(z))
-
-get_device = lambda args : mx.gpu(args.gpu[0]) if args.gpu[0] >= 0 else mx.cpu()
-norm = lambda x, p: nd.sum(nd.abs(x) ** p)
-
-get_scalar = lambda x: x.detach().asscalar()
-
-reshape = lambda arr, x, y: arr.reshape(x, y)
-
-cuda = lambda arr, gpu: arr.as_in_context(mx.gpu(gpu))
-
-class ExternalEmbedding:
-    """Sparse Embedding for Knowledge Graph
-    It is used to store both entity embeddings and relation embeddings.
-
-    Parameters
-    ----------
-    args :
-        Global configs.
-    num : int
-        Number of embeddings.
-    dim : int
-        Embedding dimention size.
-    ctx : mx.ctx
-        Device context to store the embedding.
-    """
-    def __init__(self, args, num, dim, ctx):
-        self.gpu = args.gpu
-        self.args = args
-        self.trace = []
-
-        self.emb = nd.empty((num, dim), dtype=np.float32, ctx=ctx)
-        self.state_sum = nd.zeros((self.emb.shape[0]), dtype=np.float32, ctx=ctx)
-        self.state_step = 0
-
-    def init(self, emb_init):
-        """Initializing the embeddings.
-
-        Parameters
-        ----------
-        emb_init : float
-            The intial embedding range should be [-emb_init, emb_init].
-        """
-        nd.random.uniform(-emb_init, emb_init,
-                          shape=self.emb.shape, dtype=self.emb.dtype,
-                          ctx=self.emb.context, out=self.emb)
-
-    def share_memory(self):
-        # TODO(zhengda) fix this later
-        pass
-
-    def __call__(self, idx, gpu_id=-1, trace=True):
-        """ Return sliced tensor.
-
-        Parameters
-        ----------
-        idx : th.tensor
-            Slicing index
-        gpu_id : int
-            Which gpu to put sliced data in.
-        trace : bool
-            If True, trace the computation. This is required in training.
-            If False, do not trace the computation.
-            Default: True
-        """
-        if self.emb.context != idx.context:
-            idx = idx.as_in_context(self.emb.context)
-        data = nd.take(self.emb, idx)
-        if gpu_id >= 0:
-            data = data.as_in_context(mx.gpu(gpu_id))
-        data.attach_grad()
-        if trace:
-            self.trace.append((idx, data))
-        return data
-
-    def update(self, gpu_id=-1):
-        """ Update embeddings in a sparse manner
-        Sparse embeddings are updated in mini batches. We maintain gradient states for
-        each embedding so they can be updated separately.
-
-        Parameters
-        ----------
-        gpu_id : int
-            Which gpu to accelerate the calculation. if -1 is provided, cpu is used.
-        """
-        self.state_step += 1
-        for idx, data in self.trace:
-            grad = data.grad
-
-            clr = self.args.lr
-            #clr = self.args.lr / (1 + (self.state_step - 1) * group['lr_decay'])
-
-            # the update is non-linear so indices must be unique
-            grad_indices = idx
-            grad_values = grad
-
-            grad_sum = (grad_values * grad_values).mean(1)
-            ctx = self.state_sum.context
-            if ctx != grad_indices.context:
-                grad_indices = grad_indices.as_in_context(ctx)
-            if ctx != grad_sum.context:
-                grad_sum = grad_sum.as_in_context(ctx)
-            self.state_sum[grad_indices] += grad_sum
-            std = self.state_sum[grad_indices]  # _sparse_mask
-            if gpu_id >= 0:
-                std = std.as_in_context(mx.gpu(gpu_id))
-            std_values = nd.expand_dims(nd.sqrt(std) + 1e-10, 1)
-            tmp = (-clr * grad_values / std_values)
-            if tmp.context != ctx:
-                tmp = tmp.as_in_context(ctx)
-            # TODO(zhengda) the overhead is here.
-            self.emb[grad_indices] = mx.nd.take(self.emb, grad_indices) + tmp
-        self.trace = []
-
-    def curr_emb(self):
-        """Return embeddings in trace.
-        """
-        data = [data for _, data in self.trace]
-        return nd.concat(*data, dim=0)
-
-    def save(self, path, name):
-        """Save embeddings.
-
-        Parameters
-        ----------
-        path : str
-            Directory to save the embedding.
-        name : str
-            Embedding name.
-        """
-        emb_fname = os.path.join(path, name+'.npy')
-        np.save(emb_fname, self.emb.asnumpy())
-
-    def load(self, path, name):
-        """Load embeddings.
-
-        Parameters
-        ----------
-        path : str
-            Directory to load the embedding.
-        name : str
-            Embedding name.
-        """
-        emb_fname = os.path.join(path, name+'.npy')
-        self.emb = nd.array(np.load(emb_fname))
--- a/apps/kg/models/pytorch/__init__.py
+++ b/apps/kg/models/pytorch/__init__.py
-# -*- coding: utf-8 -*-
-#
-# setup.py
-#
-# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
\ No newline at end of file
--- a/apps/kg/models/pytorch/score_fun.py
+++ b/apps/kg/models/pytorch/score_fun.py
-# -*- coding: utf-8 -*-
-#
-# setup.py
-#
-# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-import torch as th
-import torch.nn as nn
-import torch.nn.functional as functional
-import torch.nn.init as INIT
-import numpy as np
-
-def batched_l2_dist(a, b):
-    a_squared = a.norm(dim=-1).pow(2)
-    b_squared = b.norm(dim=-1).pow(2)
-
-    squared_res = th.baddbmm(
-        b_squared.unsqueeze(-2), a, b.transpose(-2, -1), alpha=-2
-    ).add_(a_squared.unsqueeze(-1))
-    res = squared_res.clamp_min_(1e-30).sqrt_()
-    return res
-
-def batched_l1_dist(a, b):
-    res = th.cdist(a, b, p=1)
-    return res
-
-class TransEScore(nn.Module):
-    """TransE score function
-    Paper link: https://papers.nips.cc/paper/5071-translating-embeddings-for-modeling-multi-relational-data
-    """
-    def __init__(self, gamma, dist_func='l2'):
-        super(TransEScore, self).__init__()
-        self.gamma = gamma
-        if dist_func == 'l1':
-            self.neg_dist_func = batched_l1_dist
-            self.dist_ord = 1
-        else: # default use l2
-            self.neg_dist_func = batched_l2_dist
-            self.dist_ord = 2
-
-    def edge_func(self, edges):
-        head = edges.src['emb']
-        tail = edges.dst['emb']
-        rel = edges.data['emb']
-        score = head + rel - tail
-        return {'score': self.gamma - th.norm(score, p=self.dist_ord, dim=-1)}
-
-    def prepare(self, g, gpu_id, trace=False):
-        pass
-
-    def create_neg_prepare(self, neg_head):
-        def fn(rel_id, num_chunks, head, tail, gpu_id, trace=False):
-            return head, tail
-        return fn
-
-    def forward(self, g):
-        g.apply_edges(lambda edges: self.edge_func(edges))
-
-    def update(self, gpu_id=-1):
-        pass
-
-    def reset_parameters(self):
-        pass
-
-    def save(self, path, name):
-        pass
-
-    def load(self, path, name):
-        pass
-
-    def create_neg(self, neg_head):
-        gamma = self.gamma
-        if neg_head:
-            def fn(heads, relations, tails, num_chunks, chunk_size, neg_sample_size):
-                hidden_dim = heads.shape[1]
-                heads = heads.reshape(num_chunks, neg_sample_size, hidden_dim)
-                tails = tails - relations
-                tails = tails.reshape(num_chunks, chunk_size, hidden_dim)
-                return gamma - self.neg_dist_func(tails, heads)
-            return fn
-        else:
-            def fn(heads, relations, tails, num_chunks, chunk_size, neg_sample_size):
-                hidden_dim = heads.shape[1]
-                heads = heads + relations
-                heads = heads.reshape(num_chunks, chunk_size, hidden_dim)
-                tails = tails.reshape(num_chunks, neg_sample_size, hidden_dim)
-                return gamma - self.neg_dist_func(heads, tails)
-            return fn
-
-class TransRScore(nn.Module):
-    """TransR score function
-    Paper link: https://www.aaai.org/ocs/index.php/AAAI/AAAI15/paper/download/9571/9523
-    """
-    def __init__(self, gamma, projection_emb, relation_dim, entity_dim):
-        super(TransRScore, self).__init__()
-        self.gamma = gamma
-        self.projection_emb = projection_emb
-        self.relation_dim = relation_dim
-        self.entity_dim = entity_dim
-
-    def edge_func(self, edges):
-        head = edges.data['head_emb']
-        tail = edges.data['tail_emb']
-        rel = edges.data['emb']
-        score = head + rel - tail
-        return {'score': self.gamma - th.norm(score, p=1, dim=-1)}
-
-    def prepare(self, g, gpu_id, trace=False):
-        head_ids, tail_ids = g.all_edges(order='eid')
-        projection = self.projection_emb(g.edata['id'], gpu_id, trace)
-        projection = projection.reshape(-1, self.entity_dim, self.relation_dim)
-        g.edata['head_emb'] = th.einsum('ab,abc->ac', g.ndata['emb'][head_ids], projection)
-        g.edata['tail_emb'] = th.einsum('ab,abc->ac', g.ndata['emb'][tail_ids], projection)
-
-    def create_neg_prepare(self, neg_head):
-        if neg_head:
-            def fn(rel_id, num_chunks, head, tail, gpu_id, trace=False):
-                # pos node, project to its relation
-                projection = self.projection_emb(rel_id, gpu_id, trace)
-                projection = projection.reshape(num_chunks, -1, self.entity_dim, self.relation_dim)
-                tail = tail.reshape(num_chunks, -1, 1, self.entity_dim)
-                tail = th.matmul(tail, projection)
-                tail = tail.reshape(num_chunks, -1, self.relation_dim)
-
-                # neg node, each project to all relations
-                head = head.reshape(num_chunks, 1, -1, self.entity_dim)
-                # (num_chunks, num_rel, num_neg_nodes, rel_dim)
-                head = th.matmul(head, projection)
-                return head, tail
-            return fn
-        else:
-            def fn(rel_id, num_chunks, head, tail, gpu_id, trace=False):
-                # pos node, project to its relation
-                projection = self.projection_emb(rel_id, gpu_id, trace)
-                projection = projection.reshape(num_chunks, -1, self.entity_dim, self.relation_dim)
-                head = head.reshape(num_chunks, -1, 1, self.entity_dim)
-                head = th.matmul(head, projection)
-                head = head.reshape(num_chunks, -1, self.relation_dim)
-
-                # neg node, each project to all relations
-                tail = tail.reshape(num_chunks, 1, -1, self.entity_dim)
-                # (num_chunks, num_rel, num_neg_nodes, rel_dim)
-                tail = th.matmul(tail, projection)
-                return head, tail
-            return fn
-
-    def forward(self, g):
-        g.apply_edges(lambda edges: self.edge_func(edges))
-
-    def reset_parameters(self):
-        self.projection_emb.init(1.0)
-
-    def update(self, gpu_id=-1):
-        self.projection_emb.update(gpu_id)
-
-    def save(self, path, name):  
-        self.projection_emb.save(path, name+'projection')
-
-    def load(self, path, name):
-        self.projection_emb.load(path, name+'projection')
-
-    def prepare_local_emb(self, projection_emb):
-        self.global_projection_emb = self.projection_emb
-        self.projection_emb = projection_emb
-
-    def prepare_cross_rels(self, cross_rels):
-        self.projection_emb.setup_cross_rels(cross_rels, self.global_projection_emb)
-
-    def writeback_local_emb(self, idx):
-        self.global_projection_emb.emb[idx] = self.projection_emb.emb.cpu()[idx]
-
-    def load_local_emb(self, projection_emb):
-        device = projection_emb.emb.device
-        projection_emb.emb = self.projection_emb.emb.to(device)
-        self.projection_emb = projection_emb
-
-    def share_memory(self):
-        self.projection_emb.share_memory()  
-
-    def create_neg(self, neg_head):
-        gamma = self.gamma
-        if neg_head:
-            def fn(heads, relations, tails, num_chunks, chunk_size, neg_sample_size):
-                relations = relations.reshape(num_chunks, -1, self.relation_dim)
-                tails = tails - relations
-                tails = tails.reshape(num_chunks, -1, 1, self.relation_dim)
-                score = heads - tails
-                return gamma - th.norm(score, p=1, dim=-1)
-            return fn
-        else:
-            def fn(heads, relations, tails, num_chunks, chunk_size, neg_sample_size):
-                relations = relations.reshape(num_chunks, -1, self.relation_dim)
-                heads = heads - relations
-                heads = heads.reshape(num_chunks, -1, 1, self.relation_dim)
-                score = heads - tails
-                return gamma - th.norm(score, p=1, dim=-1)
-            return fn
-
-class DistMultScore(nn.Module):
-    """DistMult score function
-    Paper link: https://arxiv.org/abs/1412.6575
-    """
-    def __init__(self):
-        super(DistMultScore, self).__init__()
-
-    def edge_func(self, edges):
-        head = edges.src['emb']
-        tail = edges.dst['emb']
-        rel = edges.data['emb']
-        score = head * rel * tail
-        # TODO: check if there exists minus sign and if gamma should be used here(jin)
-        return {'score': th.sum(score, dim=-1)}
-
-    def prepare(self, g, gpu_id, trace=False):
-        pass
-
-    def create_neg_prepare(self, neg_head):
-        def fn(rel_id, num_chunks, head, tail, gpu_id, trace=False):
-            return head, tail
-        return fn
-
-    def update(self, gpu_id=-1):
-        pass
-
-    def reset_parameters(self):
-        pass
-
-    def save(self, path, name):
-        pass
-
-    def load(self, path, name):
-        pass
-
-    def forward(self, g):
-        g.apply_edges(lambda edges: self.edge_func(edges))
-
-    def create_neg(self, neg_head):
-        if neg_head:
-            def fn(heads, relations, tails, num_chunks, chunk_size, neg_sample_size):
-                hidden_dim = heads.shape[1]
-                heads = heads.reshape(num_chunks, neg_sample_size, hidden_dim)
-                heads = th.transpose(heads, 1, 2)
-                tmp = (tails * relations).reshape(num_chunks, chunk_size, hidden_dim)
-                return th.bmm(tmp, heads)
-            return fn
-        else:
-            def fn(heads, relations, tails, num_chunks, chunk_size, neg_sample_size):
-                hidden_dim = tails.shape[1]
-                tails = tails.reshape(num_chunks, neg_sample_size, hidden_dim)
-                tails = th.transpose(tails, 1, 2)
-                tmp = (heads * relations).reshape(num_chunks, chunk_size, hidden_dim)
-                return th.bmm(tmp, tails)
-            return fn
-
-class ComplExScore(nn.Module):
-    """ComplEx score function
-    Paper link: https://arxiv.org/abs/1606.06357
-    """
-    def __init__(self):
-        super(ComplExScore, self).__init__()
-
-    def edge_func(self, edges):
-        real_head, img_head = th.chunk(edges.src['emb'], 2, dim=-1)
-        real_tail, img_tail = th.chunk(edges.dst['emb'], 2, dim=-1)
-        real_rel, img_rel = th.chunk(edges.data['emb'], 2, dim=-1)
-
-        score = real_head * real_tail * real_rel \
-                + img_head * img_tail * real_rel \
-                + real_head * img_tail * img_rel \
-                - img_head * real_tail * img_rel
-        # TODO: check if there exists minus sign and if gamma should be used here(jin)
-        return {'score': th.sum(score, -1)}
-
-    def prepare(self, g, gpu_id, trace=False):
-        pass
-
-    def create_neg_prepare(self, neg_head):
-        def fn(rel_id, num_chunks, head, tail, gpu_id, trace=False):
-            return head, tail
-        return fn
-
-    def update(self, gpu_id=-1):
-        pass
-
-    def reset_parameters(self):
-        pass
-
-    def save(self, path, name):
-        pass
-
-    def load(self, path, name):
-        pass
-
-    def forward(self, g):
-        g.apply_edges(lambda edges: self.edge_func(edges))
-
-    def create_neg(self, neg_head):
-        if neg_head:
-            def fn(heads, relations, tails, num_chunks, chunk_size, neg_sample_size):
-                hidden_dim = heads.shape[1]
-                emb_real = tails[..., :hidden_dim // 2]
-                emb_imag = tails[..., hidden_dim // 2:]
-                rel_real = relations[..., :hidden_dim // 2]
-                rel_imag = relations[..., hidden_dim // 2:]
-                real = emb_real * rel_real + emb_imag * rel_imag
-                imag = -emb_real * rel_imag + emb_imag * rel_real
-                emb_complex = th.cat((real, imag), dim=-1)
-                tmp = emb_complex.reshape(num_chunks, chunk_size, hidden_dim)
-                heads = heads.reshape(num_chunks, neg_sample_size, hidden_dim)
-                heads = th.transpose(heads, 1, 2)
-                return th.bmm(tmp, heads)
-            return fn
-        else:
-            def fn(heads, relations, tails, num_chunks, chunk_size, neg_sample_size):
-                hidden_dim = heads.shape[1]
-                emb_real = heads[..., :hidden_dim // 2]
-                emb_imag = heads[..., hidden_dim // 2:]
-                rel_real = relations[..., :hidden_dim // 2]
-                rel_imag = relations[..., hidden_dim // 2:]
-                real = emb_real * rel_real - emb_imag * rel_imag
-                imag = emb_real * rel_imag + emb_imag * rel_real
-                emb_complex = th.cat((real, imag), dim=-1)
-                tmp = emb_complex.reshape(num_chunks, chunk_size, hidden_dim)
-                tails = tails.reshape(num_chunks, neg_sample_size, hidden_dim)
-                tails = th.transpose(tails, 1, 2)
-                return th.bmm(tmp, tails)
-            return fn
-
-class RESCALScore(nn.Module):
-    """RESCAL score function
-    Paper link: http://www.icml-2011.org/papers/438_icmlpaper.pdf
-    """
-    def __init__(self, relation_dim, entity_dim):
-        super(RESCALScore, self).__init__()
-        self.relation_dim = relation_dim
-        self.entity_dim = entity_dim
-
-    def edge_func(self, edges):
-        head = edges.src['emb']
-        tail = edges.dst['emb'].unsqueeze(-1)
-        rel = edges.data['emb']
-        rel = rel.view(-1, self.relation_dim, self.entity_dim)
-        score = head * th.matmul(rel, tail).squeeze(-1)
-        # TODO: check if use self.gamma
-        return {'score': th.sum(score, dim=-1)}
-        # return {'score': self.gamma - th.norm(score, p=1, dim=-1)}
-
-    def prepare(self, g, gpu_id, trace=False):
-        pass
-
-    def create_neg_prepare(self, neg_head):
-        def fn(rel_id, num_chunks, head, tail, gpu_id, trace=False):
-            return head, tail
-        return fn
-
-    def update(self, gpu_id=-1):
-        pass
-
-    def reset_parameters(self):
-        pass
-
-    def save(self, path, name):
-        pass
-
-    def load(self, path, name):
-        pass
-
-    def forward(self, g):
-        g.apply_edges(lambda edges: self.edge_func(edges))
-
-    def create_neg(self, neg_head):
-        if neg_head:
-            def fn(heads, relations, tails, num_chunks, chunk_size, neg_sample_size):
-                hidden_dim = heads.shape[1]
-                heads = heads.reshape(num_chunks, neg_sample_size, hidden_dim)
-                heads = th.transpose(heads, 1, 2)
-                tails = tails.unsqueeze(-1)
-                relations = relations.view(-1, self.relation_dim, self.entity_dim)
-                tmp = th.matmul(relations, tails).squeeze(-1)
-                tmp = tmp.reshape(num_chunks, chunk_size, hidden_dim)
-                return th.bmm(tmp, heads)
-            return fn
-        else:
-            def fn(heads, relations, tails, num_chunks, chunk_size, neg_sample_size):
-                hidden_dim = heads.shape[1]
-                tails = tails.reshape(num_chunks, neg_sample_size, hidden_dim)
-                tails = th.transpose(tails, 1, 2)
-                heads = heads.unsqueeze(-1)
-                relations = relations.view(-1, self.relation_dim, self.entity_dim)
-                tmp = th.matmul(relations, heads).squeeze(-1)
-                tmp = tmp.reshape(num_chunks, chunk_size, hidden_dim)
-                return th.bmm(tmp, tails)
-            return fn
-
-class RotatEScore(nn.Module):
-    """RotatE score function
-    Paper link: https://arxiv.org/abs/1902.10197
-    """
-    def __init__(self, gamma, emb_init):
-        super(RotatEScore, self).__init__()
-        self.gamma = gamma
-        self.emb_init = emb_init
-
-    def edge_func(self, edges):
-        re_head, im_head = th.chunk(edges.src['emb'], 2, dim=-1)
-        re_tail, im_tail = th.chunk(edges.dst['emb'], 2, dim=-1)
-
-        phase_rel = edges.data['emb'] / (self.emb_init / np.pi)
-        re_rel, im_rel = th.cos(phase_rel), th.sin(phase_rel)
-        re_score = re_head * re_rel - im_head * im_rel
-        im_score = re_head * im_rel + im_head * re_rel
-        re_score = re_score - re_tail
-        im_score = im_score - im_tail
-        score = th.stack([re_score, im_score], dim=0)
-        score = score.norm(dim=0)
-        return {'score': self.gamma - score.sum(-1)}
-
-    def update(self, gpu_id=-1):
-        pass
-
-    def reset_parameters(self):
-        pass
-
-    def save(self, path, name):
-        pass
-
-    def load(self, path, name):
-        pass
-
-    def forward(self, g):
-        g.apply_edges(lambda edges: self.edge_func(edges))
-        
-    def create_neg_prepare(self, neg_head):
-        def fn(rel_id, num_chunks, head, tail, gpu_id, trace=False):
-            return head, tail
-        return fn
-    
-    def prepare(self, g, gpu_id, trace=False):
-        pass
-    
-    def create_neg(self, neg_head):
-        gamma = self.gamma
-        emb_init = self.emb_init
-        if neg_head:
-            def fn(heads, relations, tails, num_chunks, chunk_size, neg_sample_size):
-                hidden_dim = heads.shape[1]
-                emb_real = tails[..., :hidden_dim // 2]
-                emb_imag = tails[..., hidden_dim // 2:]
-
-                phase_rel = relations / (emb_init / np.pi)
-                rel_real, rel_imag = th.cos(phase_rel), th.sin(phase_rel)
-                real = emb_real * rel_real + emb_imag * rel_imag
-                imag = -emb_real * rel_imag + emb_imag * rel_real
-                emb_complex = th.cat((real, imag), dim=-1)
-                tmp = emb_complex.reshape(num_chunks, chunk_size, 1, hidden_dim)
-                heads = heads.reshape(num_chunks, 1, neg_sample_size, hidden_dim)
-                score = tmp - heads
-                score = th.stack([score[..., :hidden_dim // 2],
-                                  score[..., hidden_dim // 2:]], dim=-1).norm(dim=-1)
-                return gamma - score.sum(-1)
-
-            return fn
-        else:
-            def fn(heads, relations, tails, num_chunks, chunk_size, neg_sample_size):
-                hidden_dim = heads.shape[1]
-                emb_real = heads[..., :hidden_dim // 2]
-                emb_imag = heads[..., hidden_dim // 2:]
-
-                phase_rel = relations / (emb_init / np.pi)
-                rel_real, rel_imag = th.cos(phase_rel), th.sin(phase_rel)
-                real = emb_real * rel_real - emb_imag * rel_imag
-                imag = emb_real * rel_imag + emb_imag * rel_real
-
-                emb_complex = th.cat((real, imag), dim=-1)
-                tmp = emb_complex.reshape(num_chunks, chunk_size, 1, hidden_dim)
-                tails = tails.reshape(num_chunks, 1, neg_sample_size, hidden_dim)
-                score = tmp - tails
-                score = th.stack([score[..., :hidden_dim // 2],
-                                  score[..., hidden_dim // 2:]], dim=-1).norm(dim=-1)
-
-                return gamma - score.sum(-1)
-
-            return fn
--- a/apps/kg/models/pytorch/tensor_models.py
+++ b/apps/kg/models/pytorch/tensor_models.py
-# -*- coding: utf-8 -*-
-#
-# setup.py
-#
-# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-"""
-KG Sparse embedding
-"""
-import os
-import numpy as np
-
-import torch as th
-import torch.nn as nn
-import torch.nn.functional as functional
-import torch.nn.init as INIT
-
-import torch.multiprocessing as mp
-from torch.multiprocessing import Queue
-from _thread import start_new_thread
-import traceback
-from functools import wraps
-
-from .. import *
-
-logsigmoid = functional.logsigmoid
-
-def get_device(args):
-    return th.device('cpu') if args.gpu[0] < 0 else th.device('cuda:' + str(args.gpu[0]))
-
-norm = lambda x, p: x.norm(p=p)**p
-get_scalar = lambda x: x.detach().item()
-reshape = lambda arr, x, y: arr.view(x, y)
-cuda = lambda arr, gpu: arr.cuda(gpu)
-
-def thread_wrapped_func(func):
-    """Wrapped func for torch.multiprocessing.Process.
-
-    With this wrapper we can use OMP threads in subprocesses
-    otherwise, OMP_NUM_THREADS=1 is mandatory.
-
-    How to use:
-    @thread_wrapped_func
-    def func_to_wrap(args ...):
-    """
-    @wraps(func)
-    def decorated_function(*args, **kwargs):
-        queue = Queue()
-        def _queue_result():
-            exception, trace, res = None, None, None
-            try:
-                res = func(*args, **kwargs)
-            except Exception as e:
-                exception = e
-                trace = traceback.format_exc()
-            queue.put((res, exception, trace))
-
-        start_new_thread(_queue_result, ())
-        result, exception, trace = queue.get()
-        if exception is None:
-            return result
-        else:
-            assert isinstance(exception, Exception)
-            raise exception.__class__(trace)
-    return decorated_function
-
-@thread_wrapped_func
-def async_update(args, emb, queue):
-    """Asynchronous embedding update for entity embeddings.
-    How it works:
-        1. trainer process push entity embedding update requests into the queue.
-        2. async_update process pull requests from the queue, calculate 
-           the gradient state and gradient and write it into entity embeddings.
-
-    Parameters
-    ----------
-    args :
-        Global confis.
-    emb : ExternalEmbedding
-        The entity embeddings.
-    queue:
-        The request queue.
-    """
-    th.set_num_threads(args.num_thread)
-    while True:
-        (grad_indices, grad_values, gpu_id) = queue.get()
-        clr = emb.args.lr
-        if grad_indices is None:
-            return
-        with th.no_grad():
-            grad_sum = (grad_values * grad_values).mean(1)
-            device = emb.state_sum.device
-            if device != grad_indices.device:
-                grad_indices = grad_indices.to(device)
-            if device != grad_sum.device:
-                grad_sum = grad_sum.to(device)
-
-            emb.state_sum.index_add_(0, grad_indices, grad_sum)
-            std = emb.state_sum[grad_indices]  # _sparse_mask
-            if gpu_id >= 0:
-                std = std.cuda(gpu_id)
-            std_values = std.sqrt_().add_(1e-10).unsqueeze(1)
-            tmp = (-clr * grad_values / std_values)
-            if tmp.device != device:
-                tmp = tmp.to(device)
-            emb.emb.index_add_(0, grad_indices, tmp)
-
-class ExternalEmbedding:
-    """Sparse Embedding for Knowledge Graph
-    It is used to store both entity embeddings and relation embeddings.
-
-    Parameters
-    ----------
-    args :
-        Global configs.
-    num : int
-        Number of embeddings.
-    dim : int
-        Embedding dimention size.
-    device : th.device
-        Device to store the embedding.
-    """
-    def __init__(self, args, num, dim, device):
-        self.gpu = args.gpu
-        self.args = args
-        self.num = num
-        self.trace = []
-
-        self.emb = th.empty(num, dim, dtype=th.float32, device=device)
-        self.state_sum = self.emb.new().resize_(self.emb.size(0)).zero_()
-        self.state_step = 0
-        self.has_cross_rel = False
-        # queue used by asynchronous update
-        self.async_q = None
-        # asynchronous update process
-        self.async_p = None
-
-    def init(self, emb_init):
-        """Initializing the embeddings.
-
-        Parameters
-        ----------
-        emb_init : float
-            The intial embedding range should be [-emb_init, emb_init].
-        """
-        INIT.uniform_(self.emb, -emb_init, emb_init)
-        INIT.zeros_(self.state_sum)
-
-    def setup_cross_rels(self, cross_rels, global_emb):
-        cpu_bitmap = th.zeros((self.num,), dtype=th.bool)
-        for i, rel in enumerate(cross_rels):
-            cpu_bitmap[rel] = 1
-        self.cpu_bitmap = cpu_bitmap
-        self.has_cross_rel = True
-        self.global_emb = global_emb
-
-    def get_noncross_idx(self, idx):
-        cpu_mask = self.cpu_bitmap[idx]
-        gpu_mask = ~cpu_mask
-        return idx[gpu_mask]
-
-    def share_memory(self):
-        """Use torch.tensor.share_memory_() to allow cross process tensor access
-        """
-        self.emb.share_memory_()
-        self.state_sum.share_memory_()
-
-    def __call__(self, idx, gpu_id=-1, trace=True):
-        """ Return sliced tensor.
-
-        Parameters
-        ----------
-        idx : th.tensor
-            Slicing index
-        gpu_id : int
-            Which gpu to put sliced data in.
-        trace : bool
-            If True, trace the computation. This is required in training.
-            If False, do not trace the computation.
-            Default: True
-        """
-        if self.has_cross_rel:
-            cpu_idx = idx.cpu()
-            cpu_mask = self.cpu_bitmap[cpu_idx]
-            cpu_idx = cpu_idx[cpu_mask]
-            cpu_idx = th.unique(cpu_idx)
-            if cpu_idx.shape[0] != 0:
-                cpu_emb = self.global_emb.emb[cpu_idx]
-                self.emb[cpu_idx] = cpu_emb.cuda(gpu_id)
-        s = self.emb[idx]
-        if gpu_id >= 0:
-            s = s.cuda(gpu_id)
-        # During the training, we need to trace the computation.
-        # In this case, we need to record the computation path and compute the gradients.
-        if trace:
-            data = s.clone().detach().requires_grad_(True)
-            self.trace.append((idx, data))
-        else:
-            data = s
-        return data
-
-    def update(self, gpu_id=-1):
-        """ Update embeddings in a sparse manner
-        Sparse embeddings are updated in mini batches. We maintain gradient states for
-        each embedding so they can be updated separately.
-
-        Parameters
-        ----------
-        gpu_id : int
-            Which gpu to accelerate the calculation. if -1 is provided, cpu is used.
-        """
-        self.state_step += 1
-        with th.no_grad():
-            for idx, data in self.trace:
-                grad = data.grad.data
-
-                clr = self.args.lr
-                #clr = self.args.lr / (1 + (self.state_step - 1) * group['lr_decay'])
-
-                # the update is non-linear so indices must be unique
-                grad_indices = idx
-                grad_values = grad
-                if self.async_q is not None:
-                    grad_indices.share_memory_()
-                    grad_values.share_memory_()
-                    self.async_q.put((grad_indices, grad_values, gpu_id))
-                else:
-                    grad_sum = (grad_values * grad_values).mean(1)
-                    device = self.state_sum.device
-                    if device != grad_indices.device:
-                        grad_indices = grad_indices.to(device)
-                    if device != grad_sum.device:
-                        grad_sum = grad_sum.to(device)
-
-                    if self.has_cross_rel:
-                        cpu_mask = self.cpu_bitmap[grad_indices]
-                        cpu_idx = grad_indices[cpu_mask]
-                        if cpu_idx.shape[0] > 0:
-                            cpu_grad = grad_values[cpu_mask]
-                            cpu_sum = grad_sum[cpu_mask].cpu()
-                            cpu_idx = cpu_idx.cpu()
-                            self.global_emb.state_sum.index_add_(0, cpu_idx, cpu_sum)
-                            std = self.global_emb.state_sum[cpu_idx]
-                            if gpu_id >= 0:
-                                std = std.cuda(gpu_id)
-                            std_values = std.sqrt_().add_(1e-10).unsqueeze(1)
-                            tmp = (-clr * cpu_grad / std_values)
-                            tmp = tmp.cpu()
-                            self.global_emb.emb.index_add_(0, cpu_idx, tmp)
-                    self.state_sum.index_add_(0, grad_indices, grad_sum)
-                    std = self.state_sum[grad_indices]  # _sparse_mask
-                    if gpu_id >= 0:
-                        std = std.cuda(gpu_id)
-                    std_values = std.sqrt_().add_(1e-10).unsqueeze(1)
-                    tmp = (-clr * grad_values / std_values)
-                    if tmp.device != device:
-                        tmp = tmp.to(device)
-                    # TODO(zhengda) the overhead is here.
-                    self.emb.index_add_(0, grad_indices, tmp)
-        self.trace = []
-
-    def create_async_update(self):
-        """Set up the async update subprocess.
-        """
-        self.async_q = Queue(1)
-        self.async_p = mp.Process(target=async_update, args=(self.args, self, self.async_q))
-        self.async_p.start()
-
-    def finish_async_update(self):
-        """Notify the async update subprocess to quit.
-        """
-        self.async_q.put((None, None, None))
-        self.async_p.join()
-
-    def curr_emb(self):
-        """Return embeddings in trace.
-        """
-        data = [data for _, data in self.trace]
-        return th.cat(data, 0)
-
-    def save(self, path, name):
-        """Save embeddings.
-
-        Parameters
-        ----------
-        path : str
-            Directory to save the embedding.
-        name : str
-            Embedding name.
-        """
-        file_name = os.path.join(path, name+'.npy')
-        np.save(file_name, self.emb.cpu().detach().numpy())
-
-    def load(self, path, name):
-        """Load embeddings.
-
-        Parameters
-        ----------
-        path : str
-            Directory to load the embedding.
-        name : str
-            Embedding name.
-        """
-        file_name = os.path.join(path, name+'.npy')
-        self.emb = th.Tensor(np.load(file_name))
--- a/apps/kg/partition.py
+++ b/apps/kg/partition.py
-# -*- coding: utf-8 -*-
-#
-# setup.py
-#
-# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from dataloader import get_dataset
-import scipy as sp
-import numpy as np
-import argparse
-import os
-import dgl
-from dgl import backend as F
-from dgl.data.utils import load_graphs, save_graphs
-
-def write_txt_graph(path, file_name, part_dict, total_nodes):
-    partition_book = [0] * total_nodes
-    for part_id in part_dict:
-        print('write graph %d...' % part_id)
-        # Get (h,r,t) triples
-        partition_path = path + str(part_id)
-        if not os.path.exists(partition_path):
-            os.mkdir(partition_path)
-        triple_file = os.path.join(partition_path, file_name)
-        f = open(triple_file, 'w')
-        graph = part_dict[part_id]
-        src, dst = graph.all_edges(form='uv', order='eid')
-        rel = graph.edata['tid']
-        assert len(src) == len(rel)
-        src = F.asnumpy(src)
-        dst = F.asnumpy(dst)
-        rel = F.asnumpy(rel)
-        for i in range(len(src)):
-            f.write(str(src[i])+'\t'+str(rel[i])+'\t'+str(dst[i])+'\n')
-        f.close()
-        # Get local2global
-        l2g_file = os.path.join(partition_path, 'local_to_global.txt')
-        f = open(l2g_file, 'w')
-        pid = F.asnumpy(graph.parent_nid)
-        for i in range(len(pid)):
-            f.write(str(pid[i])+'\n')
-        f.close()
-        # Update partition_book
-        partition = F.asnumpy(graph.ndata['part_id'])
-        for i in range(len(pid)):
-            partition_book[pid[i]] = partition[i]
-    # Write partition_book.txt
-    for part_id in part_dict:
-        partition_path = path + str(part_id)
-        pb_file = os.path.join(partition_path, 'partition_book.txt')
-        f = open(pb_file, 'w')
-        for i in range(len(partition_book)):
-            f.write(str(partition_book[i])+'\n')
-        f.close()
-
-def main():
-    parser = argparse.ArgumentParser(description='Partition a knowledge graph')
-    parser.add_argument('--data_path', type=str, default='data',
-                        help='root path of all dataset')
-    parser.add_argument('--dataset', type=str, default='FB15k',
-                        help='dataset name, under data_path')
-    parser.add_argument('--data_files', type=str, default=None, nargs='+',
-                        help='a list of data files, e.g. entity relation train valid test')
-    parser.add_argument('--format', type=str, default='built_in',
-                        help='the format of the dataset, it can be built_in,'\
-                                'raw_udd_{htr} and udd_{htr}')
-    parser.add_argument('-k', '--num-parts', required=True, type=int,
-                        help='The number of partitions')
-    args = parser.parse_args()
-    num_parts = args.num_parts
-
-    print('load dataset..')
-
-    # load dataset and samplers
-    dataset = get_dataset(args.data_path, args.dataset, args.format, args.data_files)
-
-    print('construct graph...')
-
-    src, etype_id, dst = dataset.train
-    coo = sp.sparse.coo_matrix((np.ones(len(src)), (src, dst)),
-            shape=[dataset.n_entities, dataset.n_entities])
-    g = dgl.DGLGraph(coo, readonly=True, multigraph=True, sort_csr=True)
-    g.edata['tid'] = F.tensor(etype_id, F.int64)
-
-    print('partition graph...')
-
-    part_dict = dgl.transforms.metis_partition(g, num_parts, 1)
-
-    tot_num_inner_edges = 0
-    for part_id in part_dict:
-        part = part_dict[part_id]
-
-        num_inner_nodes = len(np.nonzero(F.asnumpy(part.ndata['inner_node']))[0])
-        num_inner_edges = len(np.nonzero(F.asnumpy(part.edata['inner_edge']))[0])
-        print('part {} has {} nodes and {} edges. {} nodes and {} edges are inside the partition'.format(
-              part_id, part.number_of_nodes(), part.number_of_edges(),
-              num_inner_nodes, num_inner_edges))
-        tot_num_inner_edges += num_inner_edges
-
-        part.copy_from_parent()
-
-    print('write graph to txt file...')
-
-    txt_file_graph = os.path.join(args.data_path, args.dataset)
-    txt_file_graph = os.path.join(txt_file_graph, 'partition_')
-    write_txt_graph(txt_file_graph, 'train.txt', part_dict, g.number_of_nodes())
-
-    print('there are {} edges in the graph and {} edge cuts for {} partitions.'.format(
-        g.number_of_edges(), g.number_of_edges() - tot_num_inner_edges, len(part_dict)))
-
-if __name__ == '__main__':
-    main()
\ No newline at end of file
--- a/apps/kg/tests/test_score.py
+++ b/apps/kg/tests/test_score.py
--- a/apps/kg/train.py
+++ b/apps/kg/train.py
--- a/apps/kg/train_mxnet.py
+++ b/apps/kg/train_mxnet.py
--- a/apps/kg/train_pytorch.py
+++ b/apps/kg/train_pytorch.py
--- a/apps/kg/utils.py
+++ b/apps/kg/utils.py
-# -*- coding: utf-8 -*-
-#
-# setup.py
-#
-# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-import math
-
-def get_compatible_batch_size(batch_size, neg_sample_size):
-    if neg_sample_size < batch_size and batch_size % neg_sample_size != 0:
-        old_batch_size = batch_size
-        batch_size = int(math.ceil(batch_size / neg_sample_size) * neg_sample_size)
-        print('batch size ({}) is incompatible to the negative sample size ({}). Change the batch size to {}'.format(
-            old_batch_size, neg_sample_size, batch_size))
-    return batch_size
--- a/docs/source/api/python/dgl.contrib.UnifiedTensor.rst
+++ b/docs/source/api/python/dgl.contrib.UnifiedTensor.rst
-.. _apiunifiedtensor:
-
-dgl.contrib.UnifiedTensor
-=========
-
-.. automodule:: dgl.contrib
-
-UnifiedTensor enables direct CPU memory access from GPU.
-This feature is especially useful when GPUs need to access sparse data structure stored in CPU memory for several reasons (e.g., when node features do not fit in GPU memory).
-Without using this feature, sparsely structured data located in CPU memory must be gathered (or packed) before transferring it to the GPU memory because GPU DMA engines can only transfer data in a block granularity.
-
-However, the gathering step wastes CPU cycles and increases the CPU to GPU data copy time.
-The goal of UnifiedTensor is to skip such CPU gathering step by letting GPUs to access even non-regular data in CPU memory.
-In a hardware-level, this function is enabled by NVIDIA GPUs' unified virtual address (UVM) and zero-copy access capabilities.
-For those who wish to further extend the capability of UnifiedTensor may read the following paper (`link <https://arxiv.org/abs/2103.03330>`_) which explains the underlying mechanism of UnifiedTensor in detail.
-
-
-Base Dataset Class
---------------------------
-
-.. autoclass:: UnifiedTensor
-    :members: __getitem__
--- a/docs/source/api/python/index.rst
+++ b/docs/source/api/python/index.rst
@@ -15,6 +15,5 @@ API Reference
   nn-mxnet
   dgl.ops
   dgl.sampling
-   dgl.contrib.UnifiedTensor
   udf
   transforms
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -52,7 +52,6 @@ Welcome to Deep Graph Library Tutorials and Documentation
   api/python/dgl.optim
   api/python/dgl.sampling
   api/python/dgl.multiprocessing
-   api/python/dgl.contrib.UnifiedTensor
   api/python/transforms
   api/python/udf
   api/python/dgl.sparse_v0

--- a/examples/mxnet/rgcn/entity_classify.py
+++ b/examples/mxnet/rgcn/entity_classify.py
@@ -16,7 +16,6 @@ from mxnet import gluon
 import mxnet.ndarray as F
 import dgl
 from dgl.nn.mxnet import RelGraphConv
-from dgl.contrib.data import load_data
 from functools import partial
 from dgl.data.rdf import AIFBDataset, MUTAGDataset, BGSDataset, AMDataset


--- a/python/dgl/__init__.py
+++ b/python/dgl/__init__.py
--- a/python/dgl/_deprecate/__init__.py
+++ b/python/dgl/_deprecate/__init__.py
--- a/python/dgl/_deprecate/frame.py
+++ b/python/dgl/_deprecate/frame.py
--- a/python/dgl/_deprecate/graph.py
+++ b/python/dgl/_deprecate/graph.py
--- a/python/dgl/_deprecate/kernel.py
+++ b/python/dgl/_deprecate/kernel.py
--- a/python/dgl/_deprecate/nodeflow.py
+++ b/python/dgl/_deprecate/nodeflow.py