[Transform] Random Walk Positional Encoding & Laplacian Positional Encoding (#3869)

* [Transform] Random Walk Positional Encoding & Laplacian Positional Encoding * Update python/dgl/transforms/module.py Co-authored-by: Mufei Li <mufeili1996@gmail.com> * Update python/dgl/transforms/functional.py Co-authored-by: Mufei Li <mufeili1996@gmail.com> * Update python/dgl/transforms/functional.py Co-authored-by: Mufei Li <mufeili1996@gmail.com> * Update python/dgl/transforms/functional.py Co-authored-by: Mufei Li <mufeili1996@gmail.com> * Update python/dgl/transforms/functional.py Co-authored-by: Mufei Li <mufeili1996@gmail.com> * Update python/dgl/transforms/functional.py Co-authored-by: Mufei Li <mufeili1996@gmail.com> * Update python/dgl/transforms/functional.py & python/dgl/transforms/module.py * update python/dgl/transforms/functional.py * update doc indexing * update unit test * support weighted graphs for random_walk_pe * change reference * fix linter errors * fix datatype compatibility with MXNet & TF * fix device errors * fix result precision errors * change tensorflow abs api * reboost CI Co-authored-by: Mufei Li <mufeili1996@gmail.com>

[Transform] Random Walk Positional Encoding & Laplacian Positional Encoding (#3869)
* [Transform] Random Walk Positional Encoding & Laplacian Positional Encoding * Update python/dgl/transforms/module.py Co-authored-by: Mufei Li <mufeili1996@gmail.com> * Update python/dgl/transforms/functional.py Co-authored-by: Mufei Li <mufeili1996@gmail.com> * Update python/dgl/transforms/functional.py Co-authored-by: Mufei Li <mufeili1996@gmail.com> * Update python/dgl/transforms/functional.py Co-authored-by: Mufei Li <mufeili1996@gmail.com> * Update python/dgl/transforms/functional.py Co-authored-by: Mufei Li <mufeili1996@gmail.com> * Update python/dgl/transforms/functional.py Co-authored-by: Mufei Li <mufeili1996@gmail.com> * Update python/dgl/transforms/functional.py & python/dgl/transforms/module.py * update python/dgl/transforms/functional.py * update doc indexing * update unit test * support weighted graphs for random_walk_pe * change reference * fix linter errors * fix datatype compatibility with MXNet & TF * fix device errors * fix result precision errors * change tensorflow abs api * reboost CI Co-authored-by: Mufei Li <mufeili1996@gmail.com>
290b7c25 · rudongyu · GitHub · 9fee20b9 · 290b7c25 · 290b7c25
Unverified Commit 290b7c25 authored Mar 30, 2022 by rudongyu Committed by GitHub Mar 30, 2022
5 changed files
--- a/docs/source/api/python/dgl.rst
+++ b/docs/source/api/python/dgl.rst
@@ -83,6 +83,19 @@ Operators for generating new graphs by manipulating the structure of the existin
    sort_csr_by_tag
    sort_csc_by_tag
+.. _api-positional-encoding:
+Graph Positional Encoding Ops:
+-----------------------------------------
+Operators for generating positional encodings of each node.
+.. autosummary::
+    :toctree: ../../generated
+    random_walk_pe
+    laplacian_pe
 .. _api-partition:
 Graph Partition Utilities

--- a/docs/source/api/python/transforms.rst
+++ b/docs/source/api/python/transforms.rst
@@ -28,3 +28,5 @@ dgl.transforms
    DropNode
    DropEdge
    AddEdge
+    RandomWalkPE
+    LaplacianPE
--- a/python/dgl/transforms/functional.py
+++ b/python/dgl/transforms/functional.py
@@ -70,7 +70,9 @@ __all__ = [
    'adj_product_graph',
    'adj_sum_graph',
    'reorder_graph',
-    'norm_by_dst'
+    'norm_by_dst',
+    'random_walk_pe',
+    'laplacian_pe'
    ]
@@ -3300,4 +3302,117 @@ def norm_by_dst(g, etype=None):
    return norm
+def random_walk_pe(g, k, eweight_name=None):
+    r"""Random Walk Positional Encoding, as introduced in
+    `Graph Neural Networks with Learnable Structural and Positional Representations
+    <https://arxiv.org/abs/2110.07875>`__
+    This function computes the random walk positional encodings as landing probabilities
+    from 1-step to k-step, starting from each node to itself.
+    Parameters
+    ----------
+    g : DGLGraph
+        The input graph. Must be homogeneous.
+    k : int
+        The number of random walk steps. The paper found the best value to be 16 and 20
+        for two experiments.
+    eweight_name : str, optional
+        The name to retrieve the edge weights. Default: None, not using the edge weights.
+    Returns
+    -------
+    Tensor
+        The random walk positional encodings of shape :math:`(N, k)`, where :math:`N` is the
+        number of nodes in the input graph.
+    Example
+    -------
+    >>> import dgl
+    >>> g = dgl.graph(([0,1,1], [1,1,0]))
+    >>> dgl.random_walk_pe(g, 2)
+    tensor([[0.0000, 0.5000],
+            [0.5000, 0.7500]])
+    """
+    N = g.num_nodes() # number of nodes
+    M = g.num_edges() # number of edges
+    A = g.adj(scipy_fmt='csr') # adjacency matrix
+    if eweight_name is not None:
+        # add edge weights if required
+        W = sparse.csr_matrix(
+            (g.edata[eweight_name].squeeze(), g.find_edges(list(range(M)))),
+            shape = (N, N)
+        )
+        A = A.multiply(W)
+    RW = np.array(A / (A.sum(1) + 1e-30)) # 1-step transition probability
+    # Iterate for k steps
+    PE = [F.astype(F.tensor(RW.diagonal()), F.float32)]
+    RW_power = RW
+    for _ in range(k-1):
+        RW_power = RW_power @ RW
+        PE.append(F.astype(F.tensor(RW_power.diagonal()), F.float32))
+    PE = F.stack(PE,dim=-1)
+    return PE
+def laplacian_pe(g, k):
+    r"""Laplacian Positional Encoding, as introduced in
+    `Benchmarking Graph Neural Networks
+    <https://arxiv.org/abs/2003.00982>`__
+    This function computes the laplacian positional encodings as the
+    k smallest non-trivial eigenvectors (k << n). k and n are the positional
+    encoding dimensions and the number of nodes in the given graph.
+    Parameters
+    ----------
+    g : DGLGraph
+        The input graph. Must be homogeneous.
+    k : int
+        Number of smallest non-trivial eigenvectors to use for positional encoding
+        (smaller than the number of nodes).
+    Returns
+    -------
+    Tensor
+        The laplacian positional encodings of shape :math:`(N, k)`, where :math:`N` is the
+        number of nodes in the input graph.
+    Example
+    -------
+    >>> import dgl
+    >>> g = dgl.rand_graph(6, 12)
+    >>> dgl.laplacian_pe(g, 2)
+    tensor([[-0.8931, -0.7713],
+            [-0.0000,  0.6198],
+            [ 0.2704, -0.0138],
+            [-0.0000,  0.0554],
+            [ 0.3595, -0.0477],
+            [-0.0000,  0.1240]])
+    """
+    # check for the "k < n" constraint
+    n = g.num_nodes()
+    if n <= k:
+        assert "the number of eigenvectors k must be smaller than the number of nodes n, " + \
+            f"{k} and {n} detected."
+    # get laplacian matrix as I - D^-0.5 * A * D^-0.5
+    A = g.adj(scipy_fmt='csr') # adjacency matrix
+    N = sparse.diags(F.asnumpy(g.in_degrees()).clip(1) ** -0.5, dtype=float) # D^-1/2
+    L = sparse.eye(g.num_nodes()) - N * A * N
+    # select eigenvectors with smaller eigenvalues O(n + klogk)
+    EigVal, EigVec = np.linalg.eig(L.toarray())
+    kpartition_indices = np.argpartition(EigVal, k+1)[:k+1]
+    topk_eigvals = EigVal[kpartition_indices]
+    topk_indices = kpartition_indices[topk_eigvals.argsort()][1:]
+    topk_EigVec = np.real(EigVec[:, topk_indices])
+    # get random flip signs
+    rand_sign = 2 * (np.random.rand(k) > 0.5) - 1.
+    PE = F.astype(F.tensor(rand_sign * topk_EigVec), F.float32)
+    return PE
 _init_api("dgl.transform", __name__)
--- a/python/dgl/transforms/module.py
+++ b/python/dgl/transforms/module.py
@@ -31,6 +31,8 @@ except ImportError:
 __all__ = [
    'BaseTransform',
+    'RandomWalkPE',
+    'LaplacianPE',
    'AddSelfLoop',
    'RemoveSelfLoop',
    'AddReverse',
@@ -96,6 +98,88 @@ class BaseTransform:
    def __repr__(self):
        return self.__class__.__name__ + '()'
+class RandomWalkPE(BaseTransform):
+    r"""Random Walk Positional Encoding, as introduced in
+    `Graph Neural Networks with Learnable Structural and Positional Representations
+    <https://arxiv.org/abs/2110.07875>`__
+    This module only works for homogeneous graphs.
+    Parameters
+    ----------
+    k : int
+        Number of random walk steps. The paper found the best value to be 16 and 20
+        for two experiments.
+    feat_name : str, optional
+        Name to store the computed positional encodings in ndata.
+    eweight_name : str, optional
+        Name to retrieve the edge weights. Default: None, not using the edge weights.
+    Example
+    -------
+    >>> import dgl
+    >>> from dgl import RandomWalkPE
+    >>> transform = RandomWalkPE(k=2)
+    >>> g = dgl.graph(([0, 1, 1], [1, 1, 0]))
+    >>> g = transform(g)
+    >>> print(g.ndata['PE'])
+    tensor([[0.0000, 0.5000],
+            [0.5000, 0.7500]])
+    """
+    def __init__(self, k, feat_name='PE', eweight_name=None):
+        self.k = k
+        self.feat_name = feat_name
+        self.eweight_name = eweight_name
+    def __call__(self, g):
+        PE = functional.random_walk_pe(g, k=self.k, eweight_name=self.eweight_name)
+        g.ndata[self.feat_name] = F.copy_to(PE, g.device)
+        return g
+class LaplacianPE(BaseTransform):
+    r"""Laplacian Positional Encoding, as introduced in
+    `Benchmarking Graph Neural Networks
+    <https://arxiv.org/abs/2003.00982>`__
+    This module only works for homogeneous bidirected graphs.
+    Parameters
+    ----------
+    k : int
+        Number of smallest non-trivial eigenvectors to use for positional encoding
+        (smaller than the number of nodes).
+    feat_name : str, optional
+        Name to store the computed positional encodings in ndata.
+    Example
+    -------
+    >>> import dgl
+    >>> from dgl import LaplacianPE
+    >>> transform = LaplacianPE(k=3)
+    >>> g = dgl.rand_graph(5, 10)
+    >>> g = transform(g)
+    >>> print(g.ndata['PE'])
+    tensor([[ 0.0000, -0.3646,  0.3646],
+            [ 0.0000,  0.2825, -0.2825],
+            [ 1.0000, -0.6315,  0.6315],
+            [ 0.0000,  0.3739, -0.3739],
+            [ 0.0000, -0.1663,  0.1663]])
+    """
+    def __init__(self, k, feat_name='PE'):
+        self.k = k
+        self.feat_name = feat_name
+    def __call__(self, g):
+        PE = functional.laplacian_pe(g, k=self.k)
+        g.ndata[self.feat_name] = F.copy_to(PE, g.device)
+        return g
 class AddSelfLoop(BaseTransform):
    r"""Add self-loops for each node in the graph and return a new graph.

--- a/tests/compute/test_transform.py
+++ b/tests/compute/test_transform.py
@@ -2329,6 +2329,30 @@ def test_module_add_edge(idtype):
    assert new_g.ntypes == g.ntypes
    assert new_g.canonical_etypes == g.canonical_etypes
+@parametrize_dtype
+def test_module_random_walk_pe(idtype):
+    transform = dgl.RandomWalkPE(2, 'rwpe')
+    g = dgl.graph(([0, 1, 1], [1, 1, 0]), idtype=idtype, device=F.ctx())
+    new_g = transform(g)
+    tgt = F.copy_to(F.tensor([[0., 0.5],[0.5, 0.75]]), g.device)
+    assert F.allclose(new_g.ndata['rwpe'], tgt)
+@parametrize_dtype
+def test_module_laplacian_pe(idtype):
+    transform = dgl.LaplacianPE(2, 'lappe')
+    g = dgl.graph(([2, 1, 0, 3, 1, 1],[3, 0, 1, 3, 3, 1]), idtype=idtype, device=F.ctx())
+    new_g = transform(g)
+    tgt = F.copy_to(F.tensor([[ 0.24971116, 0.],
+        [ 0.11771496, 0.],
+        [ 0.83237050, 1.],
+        [ 0.48056933, 0.]]), g.device)
+    # tensorflow has no abs() api
+    if dgl.backend.backend_name == 'tensorflow':
+        assert F.allclose(new_g.ndata['lappe'].__abs__(), tgt)
+    # pytorch & mxnet
+    else:
+        assert F.allclose(new_g.ndata['lappe'].abs(), tgt)
 if __name__ == '__main__':
    test_partition_with_halo()
    test_module_heat_kernel(F.int32)