[Transform] Modules for Augmentation (#3668)

* Update * Update * Fix * Update * Update * Update * Update * Fix * Update * Update * Update * Update * Fix lint * lint * Update * Update * lint fix * Fix CI * Fix * Fix CI * Update * Fix * Update * Update * Augmentation (#10) * Update * PPR * Update * Update * Update * Update * Update * Update * Update * Update * Update * Update * Update * Update * CI * lint * lint * Update * Update * Fix AddEdge * try import * Update * Fix * CI Co-authored-by: Ubuntu <ubuntu@ip-172-31-31-136.us-west-2.compute.internal> Co-authored-by: Minjie Wang <wmjlyjemaine@gmail.com>

[Transform] Modules for Augmentation (#3668)
* Update * Update * Fix * Update * Update * Update * Update * Fix * Update * Update * Update * Update * Fix lint * lint * Update * Update * lint fix * Fix CI * Fix * Fix CI * Update * Fix * Update * Update * Augmentation (#10) * Update * PPR * Update * Update * Update * Update * Update * Update * Update * Update * Update * Update * Update * Update * CI * lint * lint * Update * Update * Fix AddEdge * try import * Update * Fix * CI Co-authored-by: Ubuntu <ubuntu@ip-172-31-31-136.us-west-2.compute.internal> Co-authored-by: Minjie Wang <wmjlyjemaine@gmail.com>
2b98e764 · Mufei Li · GitHub · ba62b730 · 2b98e764 · 2b98e764
Unverified Commit 2b98e764 authored Jan 25, 2022 by Mufei Li Committed by GitHub Jan 25, 2022
8 changed files
--- a/docs/source/api/python/transform.rst
+++ b/docs/source/api/python/transform.rst
@@ -13,6 +13,12 @@ BaseTransform
    :members: __call__, __repr__
    :show-inheritance:
+Compose
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. autoclass:: Compose
+    :show-inheritance:
 AddSelfLoop
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -55,8 +61,50 @@ AddMetaPaths
 .. autoclass:: AddMetaPaths
    :show-inheritance:
-KNNGraph
+GCNNorm
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. autoclass:: GCNNorm
+    :show-inheritance:
+PPR
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. autoclass:: PPR
+    :show-inheritance:
+HeatKernel
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. autoclass:: HeatKernel
+    :show-inheritance:
+GDC
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. autoclass:: GDC
+    :show-inheritance:
+NodeShuffle
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. autoclass:: NodeShuffle
+    :show-inheritance:
+DropNode
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. autoclass:: DropNode
+    :show-inheritance:
+DropEdge
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. autoclass:: DropEdge
+    :show-inheritance:
+AddEdge
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-.. autoclass:: KNNGraph
+.. autoclass:: AddEdge
    :show-inheritance:
--- a/python/dgl/backend/backend.py
+++ b/python/dgl/backend/backend.py
@@ -570,6 +570,21 @@ def exp(input):
    """
    pass
+def inverse(input):
+    """Returns the inverse matrix of a square matrix if it exists.
+    Parameters
+    ----------
+    input : Tensor
+        The input square matrix.
+    Returns
+    -------
+    Tensor
+        The output tensor.
+    """
+    pass
 def sqrt(input):
    """Returns a new tensor with the square root of the elements of the input tensor `input`.
@@ -1057,6 +1072,21 @@ def equal(x, y):
    """
    pass
+def allclose(x, y, rtol=1e-4, atol=1e-4):
+    """Compares whether all elements are close.
+    Parameters
+    ----------
+    x : Tensor
+        First tensor
+    y : Tensor
+        Second tensor
+    rtol : float, optional
+        Relative tolerance
+    atol : float, optional
+        Absolute tolerance
+    """
 def logical_not(input):
    """Perform a logical not operation.  Equivalent to np.logical_not

--- a/python/dgl/backend/mxnet/tensor.py
+++ b/python/dgl/backend/mxnet/tensor.py
@@ -191,6 +191,9 @@ def argsort(input, dim, descending):
 def exp(input):
    return nd.exp(input)
+def inverse(input):
+    return nd.linalg_inverse(input)
 def sqrt(input):
    return nd.sqrt(input)
@@ -327,6 +330,9 @@ def boolean_mask(input, mask):
 def equal(x, y):
    return x == y
+def allclose(x, y, rtol=1e-4, atol=1e-4):
+    return np.allclose(x.asnumpy(), y.asnumpy(), rtol=rtol, atol=atol)
 def logical_not(input):
    return nd.logical_not(input)

--- a/python/dgl/backend/pytorch/tensor.py
+++ b/python/dgl/backend/pytorch/tensor.py
@@ -14,8 +14,8 @@ from ..._deprecate import kernel as K
 from ...function.base import TargetCode
 from ...base import dgl_warning
-if LooseVersion(th.__version__) < LooseVersion("1.5.0"):
+if LooseVersion(th.__version__) < LooseVersion("1.8.0"):
-    raise Exception("Detected an old version of PyTorch. Please update torch>=1.5.0 "
+    raise Exception("Detected an old version of PyTorch. Please update torch>=1.8.0 "
                    "for the best experience.")
 def data_type_dict():
@@ -164,6 +164,9 @@ def argtopk(input, k, dim, descending=True):
 def exp(input):
    return th.exp(input)
+def inverse(input):
+    return th.inverse(input)
 def sqrt(input):
    return th.sqrt(input)
@@ -276,6 +279,9 @@ def boolean_mask(input, mask):
 def equal(x, y):
    return x == y
+def allclose(x, y, rtol=1e-4, atol=1e-4):
+    return th.allclose(x, y, rtol=rtol, atol=atol)
 def logical_not(input):
    return ~input

--- a/python/dgl/backend/tensorflow/tensor.py
+++ b/python/dgl/backend/tensorflow/tensor.py
@@ -244,6 +244,10 @@ def exp(input):
    return tf.exp(input)
+def inverse(input):
+    return tf.linalg.inv(input)
 def sqrt(input):
    return tf.sqrt(input)
@@ -396,6 +400,11 @@ def equal(x, y):
    return x == y
+def allclose(x, y, rtol=1e-4, atol=1e-4):
+    return np.allclose(tf.convert_to_tensor(x).numpy(),
+                       tf.convert_to_tensor(y).numpy(), rtol=rtol, atol=atol)
 def logical_not(input):
    return ~input

--- a/python/dgl/transform/module.py
+++ b/python/dgl/transform/module.py
@@ -14,11 +14,21 @@
 #   limitations under the License.
 #
 """Modules for transform"""
+# pylint: disable= no-member, arguments-differ, invalid-name
+from scipy.linalg import expm
 from .. import convert
 from .. import backend as F
+from .. import function as fn
 from . import functional
+try:
+    import torch
+    from torch.distributions import Bernoulli
+except ImportError:
+    pass
 __all__ = [
    'BaseTransform',
    'AddSelfLoop',
@@ -28,7 +38,15 @@ __all__ = [
    'LineGraph',
    'KHopGraph',
    'AddMetaPaths',
-    'Compose'
+    'Compose',
+    'GCNNorm',
+    'PPR',
+    'HeatKernel',
+    'GDC',
+    'NodeShuffle',
+    'DropNode',
+    'DropEdge',
+    'AddEdge'
 ]
 def update_graph_structure(g, data_dict, copy_edata=True):
@@ -672,3 +690,568 @@ class Compose(BaseTransform):
    def __repr__(self):
        args = ['  ' + str(transform) for transform in self.transforms]
        return self.__class__.__name__ + '([\n' + ',\n'.join(args) + '\n])'
+class GCNNorm(BaseTransform):
+    r"""
+    Description
+    -----------
+    Apply symmetric adjacency normalization to an input graph and save the result edge weights,
+    as described in `Semi-Supervised Classification with Graph Convolutional Networks
+    <https://arxiv.org/abs/1609.02907>`__.
+    For a heterogeneous graph, this only applies to symmetric canonical edge types, whose source
+    and destination node types are identical.
+    Parameters
+    ----------
+    eweight_name : str, optional
+        :attr:`edata` name to retrieve and store edge weights. The edge weights are optional.
+    Example
+    -------
+    >>> import dgl
+    >>> import torch
+    >>> from dgl import GCNNorm
+    >>> transform = GCNNorm()
+    >>> g = dgl.graph(([0, 1, 2], [0, 0, 1]))
+    Case1: Transform an unweighted graph
+    >>> g = transform(g)
+    >>> print(g.edata['w'])
+    tensor([0.5000, 0.7071, 0.0000])
+    Case2: Transform a weighted graph
+    >>> g.edata['w'] = torch.tensor([0.1, 0.2, 0.3])
+    >>> g = transform(g)
+    >>> print(g.edata['w'])
+    tensor([0.3333, 0.6667, 0.0000])
+    """
+    def __init__(self, eweight_name='w'):
+        self.eweight_name = eweight_name
+    def calc_etype(self, c_etype, g):
+        r"""
+        Description
+        -----------
+        Get edge weights for an edge type.
+        """
+        ntype = c_etype[0]
+        with g.local_scope():
+            if self.eweight_name in g.edges[c_etype].data:
+                g.update_all(fn.copy_e(self.eweight_name, 'm'), fn.sum('m', 'deg'), etype=c_etype)
+                deg_inv_sqrt = 1. / F.sqrt(g.nodes[ntype].data['deg'])
+                g.nodes[ntype].data['w'] = F.replace_inf_with_zero(deg_inv_sqrt)
+                g.apply_edges(lambda edge: {'w': edge.src['w'] * edge.data[self.eweight_name] *
+                                                 edge.dst['w']},
+                              etype=c_etype)
+            else:
+                deg = g.in_degrees(etype=c_etype)
+                deg_inv_sqrt = 1. / F.sqrt(F.astype(deg, F.float32))
+                g.nodes[ntype].data['w'] = F.replace_inf_with_zero(deg_inv_sqrt)
+                g.apply_edges(lambda edges: {'w': edges.src['w'] * edges.dst['w']}, etype=c_etype)
+            return g.edges[c_etype].data['w']
+    def __call__(self, g):
+        result = dict()
+        for c_etype in g.canonical_etypes:
+            utype, _, vtype = c_etype
+            if utype == vtype:
+                result[c_etype] = self.calc_etype(c_etype, g)
+        for c_etype, eweight in result.items():
+            g.edges[c_etype].data[self.eweight_name] = eweight
+        return g
+class PPR(BaseTransform):
+    r"""
+    Description
+    -----------
+    Apply personalized PageRank (PPR) to an input graph for diffusion, as introduced in
+    `The pagerank citation ranking: Bringing order to the web
+    <http://ilpubs.stanford.edu:8090/422/>`__. A sparsification will be applied to the
+    weighted adjacency matrix after diffusion. Specifically, edges whose weight is below
+    a threshold will be dropped.
+    This module only works for homogeneous graphs.
+    Parameters
+    ----------
+    alpha : float, optional
+        Restart probability, which commonly lies in :math:`[0.05, 0.2]`.
+    eweight_name : str, optional
+        :attr:`edata` name to retrieve and store edge weights. If it does
+        not exist in an input graph, this module initializes a weight of 1
+        for all edges. The edge weights should be a tensor of shape :math:`(E)`,
+        where E is the number of edges.
+    eps : float, optional
+        The threshold to preserve edges in sparsification after diffusion. Edges of a
+        weight smaller than eps will be dropped.
+    avg_degree : int, optional
+        The desired average node degree of the result graph. This is the other way to
+        control the sparsity of the result graph and will only be effective if
+        :attr:`eps` is not given.
+    Example
+    -------
+    >>> import dgl
+    >>> import torch
+    >>> from dgl import PPR
+    >>> transform = PPR(avg_degree=2)
+    >>> g = dgl.graph(([0, 1, 2, 3, 4], [2, 3, 4, 5, 3]))
+    >>> g.edata['w'] = torch.tensor([0.1, 0.2, 0.3, 0.4, 0.5])
+    >>> new_g = transform(g)
+    >>> print(new_g.edata['w'])
+    tensor([0.1500, 0.1500, 0.1500, 0.0255, 0.0163, 0.1500, 0.0638, 0.0383, 0.1500,
+            0.0510, 0.0217, 0.1500])
+    """
+    def __init__(self, alpha=0.15, eweight_name='w', eps=None, avg_degree=5):
+        self.alpha = alpha
+        self.eweight_name = eweight_name
+        self.eps = eps
+        self.avg_degree = avg_degree
+    def get_eps(self, num_nodes, mat):
+        r"""
+        Description
+        -----------
+        Get the threshold for graph sparsification.
+        """
+        if self.eps is None:
+            # Infer from self.avg_degree
+            if self.avg_degree > num_nodes:
+                return float('-inf')
+            sorted_weights = torch.sort(mat.flatten(), descending=True).values
+            return sorted_weights[self.avg_degree * num_nodes - 1]
+        else:
+            return self.eps
+    def __call__(self, g):
+        # Step1: PPR diffusion
+        # (α - 1) A
+        device = g.device
+        eweight = (self.alpha - 1) * g.edata.get(self.eweight_name, F.ones(
+            (g.num_edges(),), F.float32, device))
+        num_nodes = g.num_nodes()
+        mat = F.zeros((num_nodes, num_nodes), F.float32, device)
+        src, dst = g.edges()
+        src, dst = F.astype(src, F.int64), F.astype(dst, F.int64)
+        mat[dst, src] = eweight
+        # I_n + (α - 1) A
+        nids = F.astype(g.nodes(), F.int64)
+        mat[nids, nids] = mat[nids, nids] + 1
+        # α (I_n + (α - 1) A)^-1
+        diff_mat = self.alpha * F.inverse(mat)
+        # Step2: sparsification
+        num_nodes = g.num_nodes()
+        eps = self.get_eps(num_nodes, diff_mat)
+        dst, src = (diff_mat >= eps).nonzero(as_tuple=False).t()
+        data_dict = {g.canonical_etypes[0]: (src, dst)}
+        new_g = update_graph_structure(g, data_dict, copy_edata=False)
+        new_g.edata[self.eweight_name] = diff_mat[dst, src]
+        return new_g
+def is_bidirected(g):
+    """Return whether the graph is a bidirected graph.
+    A graph is bidirected if for any edge :math:`(u, v)` in :math:`G` with weight :math:`w`,
+    there exists an edge :math:`(v, u)` in :math:`G` with the same weight.
+    """
+    src, dst = g.edges()
+    num_nodes = g.num_nodes()
+    # Sort first by src then dst
+    idx_src_dst = src * num_nodes + dst
+    perm_src_dst = F.argsort(idx_src_dst, dim=0, descending=False)
+    src1, dst1 = src[perm_src_dst], dst[perm_src_dst]
+    # Sort first by dst then src
+    idx_dst_src = dst * num_nodes + src
+    perm_dst_src = F.argsort(idx_dst_src, dim=0, descending=False)
+    src2, dst2 = src[perm_dst_src], dst[perm_dst_src]
+    return F.allclose(src1, dst2) and F.allclose(src2, dst1)
+# pylint: disable=C0103
+class HeatKernel(BaseTransform):
+    r"""
+    Description
+    -----------
+    Apply heat kernel to an input graph for diffusion, as introduced in
+    `Diffusion kernels on graphs and other discrete structures
+    <https://www.ml.cmu.edu/research/dap-papers/kondor-diffusion-kernels.pdf>`__.
+    A sparsification will be applied to the weighted adjacency matrix after diffusion.
+    Specifically, edges whose weight is below a threshold will be dropped.
+    This module only works for homogeneous graphs.
+    Parameters
+    ----------
+    t : float, optional
+        Diffusion time, which commonly lies in :math:`[2, 10]`.
+    eweight_name : str, optional
+        :attr:`edata` name to retrieve and store edge weights. If it does
+        not exist in an input graph, this module initializes a weight of 1
+        for all edges. The edge weights should be a tensor of shape :math:`(E)`,
+        where E is the number of edges.
+    eps : float, optional
+        The threshold to preserve edges in sparsification after diffusion. Edges of a
+        weight smaller than eps will be dropped.
+    avg_degree : int, optional
+        The desired average node degree of the result graph. This is the other way to
+        control the sparsity of the result graph and will only be effective if
+        :attr:`eps` is not given.
+    Example
+    -------
+    >>> import dgl
+    >>> import torch
+    >>> from dgl import HeatKernel
+    >>> transform = HeatKernel(avg_degree=2)
+    >>> g = dgl.graph(([0, 1, 2, 3, 4], [2, 3, 4, 5, 3]))
+    >>> g.edata['w'] = torch.tensor([0.1, 0.2, 0.3, 0.4, 0.5])
+    >>> new_g = transform(g)
+    >>> print(new_g.edata['w'])
+    tensor([0.1353, 0.1353, 0.1353, 0.0541, 0.0406, 0.1353, 0.1353, 0.0812, 0.1353,
+            0.1083, 0.0541, 0.1353])
+    """
+    def __init__(self, t=2., eweight_name='w', eps=None, avg_degree=5):
+        self.t = t
+        self.eweight_name = eweight_name
+        self.eps = eps
+        self.avg_degree = avg_degree
+    def get_eps(self, num_nodes, mat):
+        r"""
+        Description
+        -----------
+        Get the threshold for graph sparsification.
+        """
+        if self.eps is None:
+            # Infer from self.avg_degree
+            if self.avg_degree > num_nodes:
+                return float('-inf')
+            sorted_weights = torch.sort(mat.flatten(), descending=True).values
+            return sorted_weights[self.avg_degree * num_nodes - 1]
+        else:
+            return self.eps
+    def __call__(self, g):
+        # Step1: heat kernel diffusion
+        # t A
+        device = g.device
+        eweight = self.t * g.edata.get(self.eweight_name, F.ones(
+            (g.num_edges(),), F.float32, device))
+        num_nodes = g.num_nodes()
+        mat = F.zeros((num_nodes, num_nodes), F.float32, device)
+        src, dst = g.edges()
+        src, dst = F.astype(src, F.int64), F.astype(dst, F.int64)
+        mat[dst, src] = eweight
+        # t (A - I_n)
+        nids = F.astype(g.nodes(), F.int64)
+        mat[nids, nids] = mat[nids, nids] - self.t
+        if is_bidirected(g):
+            e, V = torch.linalg.eigh(mat, UPLO='U')
+            diff_mat = V @ torch.diag(e.exp()) @ V.t()
+        else:
+            diff_mat_np = expm(mat.cpu().numpy())
+            diff_mat = torch.Tensor(diff_mat_np).to(device)
+        # Step2: sparsification
+        num_nodes = g.num_nodes()
+        eps = self.get_eps(num_nodes, diff_mat)
+        dst, src = (diff_mat >= eps).nonzero(as_tuple=False).t()
+        data_dict = {g.canonical_etypes[0]: (src, dst)}
+        new_g = update_graph_structure(g, data_dict, copy_edata=False)
+        new_g.edata[self.eweight_name] = diff_mat[dst, src]
+        return new_g
+class GDC(BaseTransform):
+    r"""
+    Description
+    -----------
+    Apply graph diffusion convolution (GDC) to an input graph, as introduced in
+    `Diffusion Improves Graph Learning <https://www.in.tum.de/daml/gdc/>`__. A sparsification
+    will be applied to the weighted adjacency matrix after diffusion. Specifically, edges whose
+    weight is below a threshold will be dropped.
+    This module only works for homogeneous graphs.
+    Parameters
+    ----------
+    coefs : list[float], optional
+        List of coefficients. :math:`\theta_k` for each power of the adjacency matrix.
+    eweight_name : str, optional
+        :attr:`edata` name to retrieve and store edge weights. If it does
+        not exist in an input graph, this module initializes a weight of 1
+        for all edges. The edge weights should be a tensor of shape :math:`(E)`,
+        where E is the number of edges.
+    eps : float, optional
+        The threshold to preserve edges in sparsification after diffusion. Edges of a
+        weight smaller than eps will be dropped.
+    avg_degree : int, optional
+        The desired average node degree of the result graph. This is the other way to
+        control the sparsity of the result graph and will only be effective if
+        :attr:`eps` is not given.
+    Example
+    -------
+    >>> import dgl
+    >>> import torch
+    >>> from dgl import GDC
+    >>> transform = GDC([0.3, 0.2, 0.1], avg_degree=2)
+    >>> g = dgl.graph(([0, 1, 2, 3, 4], [2, 3, 4, 5, 3]))
+    >>> g.edata['w'] = torch.tensor([0.1, 0.2, 0.3, 0.4, 0.5])
+    >>> new_g = transform(g)
+    >>> print(new_g.edata['w'])
+    tensor([0.3000, 0.3000, 0.0200, 0.3000, 0.0400, 0.3000, 0.1000, 0.0600, 0.3000,
+            0.0800, 0.0200, 0.3000])
+    """
+    def __init__(self, coefs, eweight_name='w', eps=None, avg_degree=5):
+        self.coefs = coefs
+        self.eweight_name = eweight_name
+        self.eps = eps
+        self.avg_degree = avg_degree
+    def get_eps(self, num_nodes, mat):
+        r"""
+        Description
+        -----------
+        Get the threshold for graph sparsification.
+        """
+        if self.eps is None:
+            # Infer from self.avg_degree
+            if self.avg_degree > num_nodes:
+                return float('-inf')
+            sorted_weights = torch.sort(mat.flatten(), descending=True).values
+            return sorted_weights[self.avg_degree * num_nodes - 1]
+        else:
+            return self.eps
+    def __call__(self, g):
+        # Step1: diffusion
+        # A
+        device = g.device
+        eweight = g.edata.get(self.eweight_name, F.ones(
+            (g.num_edges(),), F.float32, device))
+        num_nodes = g.num_nodes()
+        adj = F.zeros((num_nodes, num_nodes), F.float32, device)
+        src, dst = g.edges()
+        src, dst = F.astype(src, F.int64), F.astype(dst, F.int64)
+        adj[dst, src] = eweight
+        # theta_0 I_n
+        mat = torch.eye(num_nodes, device=device)
+        diff_mat = self.coefs[0] * mat
+        # add theta_k A^k
+        for coef in self.coefs[1:]:
+            mat = mat @ adj
+            diff_mat += coef * mat
+        # Step2: sparsification
+        num_nodes = g.num_nodes()
+        eps = self.get_eps(num_nodes, diff_mat)
+        dst, src = (diff_mat >= eps).nonzero(as_tuple=False).t()
+        data_dict = {g.canonical_etypes[0]: (src, dst)}
+        new_g = update_graph_structure(g, data_dict, copy_edata=False)
+        new_g.edata[self.eweight_name] = diff_mat[dst, src]
+        return new_g
+class NodeShuffle(BaseTransform):
+    r"""
+    Description
+    -----------
+    Randomly shuffle the nodes.
+    Example
+    -------
+    >>> import dgl
+    >>> import torch
+    >>> from dgl import NodeShuffle
+    >>> transform = NodeShuffle()
+    >>> g = dgl.graph(([0, 1], [1, 2]))
+    >>> g.ndata['h1'] = torch.tensor([[1., 2.], [3., 4.], [5., 6.]])
+    >>> g.ndata['h2'] = torch.tensor([[7., 8.], [9., 10.], [11., 12.]])
+    >>> g = transform(g)
+    >>> print(g.ndata['h1'])
+    tensor([[5., 6.],
+            [3., 4.],
+            [1., 2.]])
+    >>> print(g.ndata['h2'])
+    tensor([[11., 12.],
+            [ 9., 10.],
+            [ 7.,  8.]])
+    """
+    def __call__(self, g):
+        for ntype in g.ntypes:
+            nids = F.astype(g.nodes(ntype), F.int64)
+            perm = F.rand_shuffle(nids)
+            for key, feat in g.nodes[ntype].data.items():
+                g.nodes[ntype].data[key] = feat[perm]
+        return g
+# pylint: disable=C0103
+class DropNode(BaseTransform):
+    r"""
+    Description
+    -----------
+    Randomly drop nodes, as described in
+    `Graph Contrastive Learning with Augmentations <https://arxiv.org/abs/2010.13902>`__.
+    Parameters
+    ----------
+    p : float, optional
+        Probability of a node to be dropped.
+    Example
+    -------
+    >>> import dgl
+    >>> import torch
+    >>> from dgl import DropNode
+    >>> transform = DropNode()
+    >>> g = dgl.rand_graph(5, 20)
+    >>> g.ndata['h'] = torch.arange(g.num_nodes())
+    >>> g.edata['h'] = torch.arange(g.num_edges())
+    >>> new_g = transform(g)
+    >>> print(new_g)
+    Graph(num_nodes=3, num_edges=7,
+          ndata_schemes={'h': Scheme(shape=(), dtype=torch.int64)}
+          edata_schemes={'h': Scheme(shape=(), dtype=torch.int64)})
+    >>> print(new_g.ndata['h'])
+    tensor([0, 1, 2])
+    >>> print(new_g.edata['h'])
+    tensor([0, 6, 14, 5, 17, 3, 11])
+    """
+    def __init__(self, p=0.5):
+        self.p = p
+        self.dist = Bernoulli(p)
+    def __call__(self, g):
+        # Fast path
+        if self.p == 0:
+            return g
+        for ntype in g.ntypes:
+            samples = self.dist.sample(torch.Size([g.num_nodes(ntype)]))
+            nids_to_remove = g.nodes(ntype)[samples.bool().to(g.device)]
+            g.remove_nodes(nids_to_remove, ntype=ntype)
+        return g
+# pylint: disable=C0103
+class DropEdge(BaseTransform):
+    r"""
+    Description
+    -----------
+    Randomly drop edges, as described in
+    `DropEdge: Towards Deep Graph Convolutional Networks on Node Classification
+    <https://arxiv.org/abs/1907.10903>`__ and `Graph Contrastive Learning with Augmentations
+    <https://arxiv.org/abs/2010.13902>`__.
+    Parameters
+    ----------
+    p : float, optional
+        Probability of an edge to be dropped.
+    Example
+    -------
+    >>> import dgl
+    >>> import torch
+    >>> from dgl import DropEdge
+    >>> transform = DropEdge()
+    >>> g = dgl.rand_graph(5, 20)
+    >>> g.edata['h'] = torch.arange(g.num_edges())
+    >>> new_g = transform(g)
+    >>> print(new_g)
+    Graph(num_nodes=5, num_edges=12,
+          ndata_schemes={}
+          edata_schemes={'h': Scheme(shape=(), dtype=torch.int64)})
+    >>> print(new_g.edata['h'])
+    tensor([0, 1, 3, 7, 8, 10, 11, 12, 13, 15, 18, 19])
+    """
+    def __init__(self, p=0.5):
+        self.p = p
+        self.dist = Bernoulli(p)
+    def __call__(self, g):
+        # Fast path
+        if self.p == 0:
+            return g
+        for c_etype in g.canonical_etypes:
+            samples = self.dist.sample(torch.Size([g.num_edges(c_etype)]))
+            eids_to_remove = g.edges(form='eid', etype=c_etype)[samples.bool().to(g.device)]
+            g.remove_edges(eids_to_remove, etype=c_etype)
+        return g
+class AddEdge(BaseTransform):
+    r"""
+    Description
+    -----------
+    Randomly add edges, as described in `Graph Contrastive Learning with Augmentations
+    <https://arxiv.org/abs/2010.13902>`__.
+    Parameters
+    ----------
+    ratio : float, optional
+        Number of edges to add divided by the number of existing edges.
+    Example
+    -------
+    >>> import dgl
+    >>> from dgl import AddEdge
+    >>> transform = AddEdge()
+    >>> g = dgl.rand_graph(5, 20)
+    >>> new_g = transform(g)
+    >>> print(new_g.num_edges())
+    24
+    """
+    def __init__(self, ratio=0.2):
+        self.ratio = ratio
+    def __call__(self, g):
+        # Fast path
+        if self.ratio == 0.:
+            return g
+        device = g.device
+        idtype = g.idtype
+        for c_etype in g.canonical_etypes:
+            utype, _, vtype = c_etype
+            num_edges_to_add = int(g.num_edges(c_etype) * self.ratio)
+            src = F.randint([num_edges_to_add], idtype, device, low=0, high=g.num_nodes(utype))
+            dst = F.randint([num_edges_to_add], idtype, device, low=0, high=g.num_nodes(vtype))
+            g.add_edges(src, dst, etype=c_etype)
+        return g
--- a/tests/compute/test_graph.py
+++ b/tests/compute/test_graph.py
@@ -345,8 +345,8 @@ def test_empty_data_initialized():
    assert len(g.ndata["ha"]) == 1
 def test_is_sorted():
-   u_src, u_dst = edge_pair_input(False) 
+   u_src, u_dst = edge_pair_input(False)
-   s_src, s_dst = edge_pair_input(True) 
+   s_src, s_dst = edge_pair_input(True)
   u_src = F.tensor(u_src, dtype=F.int32)
   u_dst = F.tensor(u_dst, dtype=F.int32)
@@ -409,7 +409,7 @@ def test_formats():
        fail = False
    finally:
        assert not fail
 if __name__ == '__main__':
    test_query()
    test_mutation()

--- a/tests/compute/test_transform.py
+++ b/tests/compute/test_transform.py
@@ -23,6 +23,7 @@ import dgl.function as fn
 import dgl.partition
 import backend as F
 import unittest
+import math
 from utils import parametrize_dtype
 from test_heterograph import create_test_heterograph3, create_test_heterograph4, create_test_heterograph5
@@ -2156,5 +2157,144 @@ def test_module_compose(idtype):
    eset = set(zip(list(F.asnumpy(src)), list(F.asnumpy(dst))))
    assert eset == {(0, 1), (1, 2), (1, 0), (2, 1), (0, 0), (1, 1), (2, 2)}
+@parametrize_dtype
+def test_module_gcnnorm(idtype):
+    g = dgl.heterograph({
+        ('A', 'r1', 'A'): ([0, 1, 2], [0, 0, 1]),
+        ('A', 'r2', 'B'): ([0, 0], [1, 1]),
+        ('B', 'r3', 'B'): ([0, 1, 2], [0, 0, 1])
+    }, idtype=idtype, device=F.ctx())
+    g.edges['r3'].data['w'] = F.tensor([0.1, 0.2, 0.3])
+    transform = dgl.GCNNorm()
+    new_g = transform(g)
+    assert 'w' not in new_g.edges[('A', 'r2', 'B')].data
+    assert F.allclose(new_g.edges[('A', 'r1', 'A')].data['w'],
+                      F.tensor([1./2, 1./math.sqrt(2), 0.]))
+    assert F.allclose(new_g.edges[('B', 'r3', 'B')].data['w'], F.tensor([1./3, 2./3, 0.]))
+@unittest.skipIf(dgl.backend.backend_name != 'pytorch', reason='Only support PyTorch for now')
+@parametrize_dtype
+def test_module_ppr(idtype):
+    g = dgl.graph(([0, 1, 2, 3, 4], [2, 3, 4, 5, 3]), idtype=idtype, device=F.ctx())
+    g.ndata['h'] = F.randn((6, 2))
+    transform = dgl.PPR(avg_degree=2)
+    new_g = transform(g)
+    assert new_g.idtype == g.idtype
+    assert new_g.device == g.device
+    assert new_g.num_nodes() == g.num_nodes()
+    src, dst = new_g.edges()
+    eset = set(zip(list(F.asnumpy(src)), list(F.asnumpy(dst))))
+    assert eset == {(0, 0), (0, 2), (0, 4), (1, 1), (1, 3), (1, 5), (2, 2),
+                    (2, 3), (2, 4), (3, 3), (3, 5), (4, 3), (4, 4), (4, 5), (5, 5)}
+    assert F.allclose(g.ndata['h'], new_g.ndata['h'])
+    assert 'w' in new_g.edata
+    # Prior edge weights
+    g.edata['w'] = F.tensor([0.1, 0.2, 0.3, 0.4, 0.5])
+    new_g = transform(g)
+    src, dst = new_g.edges()
+    eset = set(zip(list(F.asnumpy(src)), list(F.asnumpy(dst))))
+    assert eset == {(0, 0), (1, 1), (1, 3), (2, 2), (2, 3), (2, 4),
+                    (3, 3), (3, 5), (4, 3), (4, 4), (4, 5), (5, 5)}
+@unittest.skipIf(dgl.backend.backend_name != 'pytorch', reason='Only support PyTorch for now')
+@parametrize_dtype
+def test_module_heat_kernel(idtype):
+    # Case1: directed graph
+    g = dgl.graph(([0, 1, 2, 3, 4], [2, 3, 4, 5, 3]), idtype=idtype, device=F.ctx())
+    g.ndata['h'] = F.randn((6, 2))
+    transform = dgl.HeatKernel(avg_degree=1)
+    new_g = transform(g)
+    assert new_g.idtype == g.idtype
+    assert new_g.device == g.device
+    assert new_g.num_nodes() == g.num_nodes()
+    src, dst = new_g.edges()
+    eset = set(zip(list(F.asnumpy(src)), list(F.asnumpy(dst))))
+    assert eset == {(0, 2), (0, 4), (1, 3), (1, 5), (2, 3), (2, 4), (3, 5), (4, 5)}
+    assert F.allclose(g.ndata['h'], new_g.ndata['h'])
+    assert 'w' in new_g.edata
+    # Case2: weighted undirected graph
+    g = dgl.graph(([0, 1, 2, 3], [1, 0, 3, 2]), idtype=idtype, device=F.ctx())
+    g.edata['w'] = F.tensor([0.1, 0.2, 0.3, 0.4])
+    new_g = transform(g)
+    src, dst = new_g.edges()
+    eset = set(zip(list(F.asnumpy(src)), list(F.asnumpy(dst))))
+    assert eset == {(0, 0), (1, 1), (2, 2), (3, 3)}
+@unittest.skipIf(dgl.backend.backend_name != 'pytorch', reason='Only support PyTorch for now')
+@parametrize_dtype
+def test_module_gdc(idtype):
+    transform = dgl.GDC([0.1, 0.2, 0.1], avg_degree=1)
+    g = dgl.graph(([0, 1, 2, 3, 4], [2, 3, 4, 5, 3]), idtype=idtype, device=F.ctx())
+    g.ndata['h'] = F.randn((6, 2))
+    new_g = transform(g)
+    assert new_g.idtype == g.idtype
+    assert new_g.device == g.device
+    assert new_g.num_nodes() == g.num_nodes()
+    src, dst = new_g.edges()
+    eset = set(zip(list(F.asnumpy(src)), list(F.asnumpy(dst))))
+    assert eset == {(0, 0), (0, 2), (0, 4), (1, 1), (1, 3), (1, 5), (2, 2), (2, 3),
+                    (2, 4), (3, 3), (3, 5), (4, 3), (4, 4), (4, 5), (5, 5)}
+    assert F.allclose(g.ndata['h'], new_g.ndata['h'])
+    assert 'w' in new_g.edata
+    # Prior edge weights
+    g.edata['w'] = F.tensor([0.1, 0.2, 0.3, 0.4, 0.5])
+    new_g = transform(g)
+    src, dst = new_g.edges()
+    eset = set(zip(list(F.asnumpy(src)), list(F.asnumpy(dst))))
+    assert eset == {(0, 0), (1, 1), (2, 2), (3, 3), (4, 3), (4, 4), (5, 5)}
+@parametrize_dtype
+def test_module_node_shuffle(idtype):
+    transform = dgl.NodeShuffle()
+    g = dgl.heterograph({
+        ('A', 'r', 'B'): ([0, 1], [1, 2]),
+    }, idtype=idtype, device=F.ctx())
+    new_g = transform(g)
+@unittest.skipIf(dgl.backend.backend_name != 'pytorch', reason='Only support PyTorch for now')
+@parametrize_dtype
+def test_module_drop_node(idtype):
+    transform = dgl.DropNode()
+    g = dgl.heterograph({
+        ('A', 'r', 'B'): ([0, 1], [1, 2]),
+    }, idtype=idtype, device=F.ctx())
+    new_g = transform(g)
+    assert new_g.idtype == g.idtype
+    assert new_g.device == g.device
+    assert new_g.ntypes == g.ntypes
+    assert new_g.canonical_etypes == g.canonical_etypes
+@unittest.skipIf(dgl.backend.backend_name != 'pytorch', reason='Only support PyTorch for now')
+@parametrize_dtype
+def test_module_drop_edge(idtype):
+    transform = dgl.DropEdge()
+    g = dgl.heterograph({
+        ('A', 'r1', 'B'): ([0, 1], [1, 2]),
+        ('C', 'r2', 'C'): ([3, 4, 5], [6, 7, 8])
+    }, idtype=idtype, device=F.ctx())
+    new_g = transform(g)
+    assert new_g.idtype == g.idtype
+    assert new_g.device == g.device
+    assert new_g.ntypes == g.ntypes
+    assert new_g.canonical_etypes == g.canonical_etypes
+@parametrize_dtype
+def test_module_add_edge(idtype):
+    transform = dgl.AddEdge()
+    g = dgl.heterograph({
+        ('A', 'r1', 'B'): ([0, 1, 2, 3, 4], [1, 2, 3, 4, 5]),
+        ('C', 'r2', 'C'): ([0, 1, 2, 3, 4], [1, 2, 3, 4, 5])
+    }, idtype=idtype, device=F.ctx())
+    new_g = transform(g)
+    assert new_g.num_edges(('A', 'r1', 'B')) == 6
+    assert new_g.num_edges(('C', 'r2', 'C')) == 6
+    assert new_g.idtype == g.idtype
+    assert new_g.device == g.device
+    assert new_g.ntypes == g.ntypes
+    assert new_g.canonical_etypes == g.canonical_etypes
 if __name__ == '__main__':
    test_partition_with_halo()