[Transform] Allow add data to self loop created by AddSelfLoop or add_self_loop (#4261)

* Update * Update functional.py * Update * Update test_transform.py * Update * Update functional.py * Update functional.py * Update functional.py * Update functional.py * Update * Update * Update functional.py * Update functional.py * Update functional.py * Update functional.py * Update module.py * Update test_transform.py * Update test_transform.py Co-authored-by: Mufei Li <mufeili1996@gmail.com>

[Transform] Allow add data to self loop created by AddSelfLoop or add_self_loop (#4261)
* Update * Update functional.py * Update * Update test_transform.py * Update * Update functional.py * Update functional.py * Update functional.py * Update functional.py * Update * Update * Update functional.py * Update functional.py * Update functional.py * Update functional.py * Update module.py * Update test_transform.py * Update test_transform.py Co-authored-by: Mufei Li <mufeili1996@gmail.com>
2cf05c53 · Pengfei Xia · GitHub · 92f87f48 · 2cf05c53 · 2cf05c53
Unverified Commit 2cf05c53 authored Jul 27, 2022 by Pengfei Xia Committed by GitHub Jul 27, 2022
3 changed files
--- a/python/dgl/transforms/functional.py
+++ b/python/dgl/transforms/functional.py
@@ -40,6 +40,7 @@ from ..partition import metis_partition_assignment
 from ..partition import partition_graph_with_halo
 from ..partition import metis_partition
 from .. import subgraph
+from .. import function
 # TO BE DEPRECATED
 from .._deprecate.graph import DGLGraph as DGLGraphStale
@@ -1764,13 +1765,24 @@ def remove_nodes(g, nids, ntype=None, store_ids=False):
    g.remove_nodes(nids, ntype=ntype, store_ids=store_ids)
    return g
-def add_self_loop(g, etype=None):
+def add_self_loop(g, edge_feat_names=None, fill_data=1., etype=None):
    r"""Add self-loops for each node in the graph and return a new graph.
    Parameters
    ----------
    g : DGLGraph
        The graph.
+    edge_feat_names : list[str], optional
+        The names of the self-loop features to apply `fill_data`. If None, it will apply `fill_data`
+        to all self-loop features. Default: None.
+    fill_data : int, float or str, optional
+        The value to fill the self-loop features. Default: 1.
+        * If ``fill_data`` is ``int`` or ``float``, self-loop features will be directly given by
+          ``fill_data``.
+        * if ``fill_data`` is ``str``, self-loop features will be generated by aggregating the
+          features of the incoming edges of the corresponding nodes. The supported aggregation are:
+          ``'mean'``, ``'sum'``, ``'max'``, ``'min'``.
    etype : str or (str, str, str), optional
        The type names of the edges. The allowed type name formats are:
@@ -1792,7 +1804,6 @@ def add_self_loop(g, etype=None):
    * The function adds self-loops regardless of whether they already exist or not.
      If one wishes to have exactly one self-loop for every node,
      call :func:`remove_self_loop` before invoking :func:`add_self_loop`.
-    * Features of the new edges (self-loop edges) will be filled with zeros.
    * This function discards the batch information. Please use
      :func:`dgl.DGLGraph.set_batch_num_nodes`
      and :func:`dgl.DGLGraph.set_batch_num_edges` on the transformed graph
@@ -1808,7 +1819,7 @@ def add_self_loop(g, etype=None):
    >>> g = dgl.graph((torch.tensor([0, 0, 2]), torch.tensor([2, 1, 0])))
    >>> g.ndata['hv'] = torch.arange(3).float().reshape(-1, 1)
    >>> g.edata['he'] = torch.arange(3).float().reshape(-1, 1)
-    >>> g = dgl.add_self_loop(g)
+    >>> g = dgl.add_self_loop(g, fill_data='sum')
    >>> g
    Graph(num_nodes=3, num_edges=6,
        ndata_schemes={'hv': Scheme(shape=(1,), dtype=torch.float32)}
@@ -1817,8 +1828,8 @@ def add_self_loop(g, etype=None):
    tensor([[0.],
            [1.],
            [2.],
-            [0.],
+            [2.],
-            [0.],
+            [1.],
            [0.]])
    **Heterogeneous Graphs**
@@ -1835,12 +1846,44 @@ def add_self_loop(g, etype=None):
          metagraph=[('user', 'user'), ('user', 'game')])
    """
    etype = g.to_canonical_etype(etype)
+    data = {}
+    reduce_funcs = {'sum': function.sum,
+                    'mean': function.mean,
+                    'max': function.max,
+                    'min': function.min}
+    if edge_feat_names is None:
+        edge_feat_names = g.edges[etype].data.keys()
    if etype[0] != etype[2]:
        raise DGLError(
            'add_self_loop does not support unidirectional bipartite graphs: {}.' \
            'Please make sure the types of head node and tail node are identical.' \
            ''.format(etype))
+    for feat_name in edge_feat_names:
+        if isinstance(fill_data, (int, float)):
+            dtype = g.edges[etype].data[feat_name].dtype
+            dshape = g.edges[etype].data[feat_name].shape
+            tmp_fill_data = F.copy_to(F.astype(F.tensor([fill_data]), dtype), g.device)
+            if len(dshape) > 1:
+                data[feat_name] = F.zeros((g.num_nodes(etype[0]), *dshape[1:]), dtype,
+                                          g.device) + tmp_fill_data
+            else:
+                data[feat_name] = F.zeros((g.num_nodes(etype[0]),), dtype, g.device) + tmp_fill_data
+        elif isinstance(fill_data, str):
+            if fill_data not in reduce_funcs.keys():
+                raise DGLError('Unsupported aggregation: {}'.format(fill_data))
+            reducer = reduce_funcs[fill_data]
+            with g.local_scope():
+                g.update_all(function.copy_e(feat_name, "h"), reducer('h', 'h'), etype=etype)
+                data[feat_name] = g.nodes[etype[0]].data['h']
    nodes = g.nodes(etype[0])
+    if len(data):
+        new_g = add_edges(g, nodes, nodes, data=data, etype=etype)
+    else:
        new_g = add_edges(g, nodes, nodes, etype=etype)
    return new_g

--- a/python/dgl/transforms/module.py
+++ b/python/dgl/transforms/module.py
@@ -415,6 +415,17 @@ class AddSelfLoop(BaseTransform):
        If False, it will first remove self-loops to prevent duplicate self-loops.
    new_etypes : bool, optional
        If True, it will add an edge type 'self' per node type, which holds self-loops.
+    edge_feat_names : list[str], optional
+        The names of the self-loop features to apply `fill_data`. If None, it will apply `fill_data`
+        to all self-loop features. Default: None.
+    fill_data : int, float or str, optional
+        The value to fill the self-loop features. Default: 1.
+        * If ``fill_data`` is ``int`` or ``float``, self-loop features will be directly given by
+          ``fill_data``.
+        * if ``fill_data`` is ``str``, self-loop features will be generated by aggregating the
+          features of the incoming edges of the corresponding nodes. The supported aggregation are:
+          ``'mean'``, ``'sum'``, ``'max'``, ``'min'``.
    Example
    -------
@@ -424,23 +435,39 @@ class AddSelfLoop(BaseTransform):
    Case1: Add self-loops for a homogeneous graph
-    >>> transform = AddSelfLoop()
+    >>> transform = AddSelfLoop(fill_data='sum')
-    >>> g = dgl.graph(([1, 1], [1, 2]))
+    >>> g = dgl.graph(([0, 0, 2], [2, 1, 0]))
+    >>> g.edata['he'] = torch.arange(3).float().reshape(-1, 1)
    >>> new_g = transform(g)
    >>> print(new_g.edges())
    (tensor([1, 0, 1, 2]), tensor([2, 0, 1, 2]))
+    >>> print(new_g.edata('he'))
+    tensor([[0.],
+            [1.],
+            [2.],
+            [2.],
+            [1.],
+            [0.]])
    Case2: Add self-loops for a heterogeneous graph
+    >>> transform = AddSelfLoop(fill_data='sum')
    >>> g = dgl.heterograph({
-    ...     ('user', 'plays', 'game'): ([0], [1]),
+   ...     ('user', 'follows', 'user'): (torch.tensor([1, 2]),
-    ...     ('user', 'follows', 'user'): ([1], [2])
+   ...                                   torch.tensor([0, 1])),
-    ... })
+   ...     ('user', 'plays', 'game'): (torch.tensor([0, 1]),
+   ...                                 torch.tensor([0, 1]))})
+   >>> g.edata['feat'] = {('user', 'follows', 'user'): torch.randn(2, 5),
+   ...                    ('user', 'plays', 'game'): torch.randn(2, 5)}
+   >>> g.edata['feat1'] = {('user', 'follows', 'user'): torch.randn(2, 15),
+     ...                   ('user', 'plays', 'game'): torch.randn(2, 15)}
    >>> new_g = transform(g)
    >>> print(new_g.edges(etype='plays'))
-    (tensor([0]), tensor([1]))
+    (tensor([0, 1]), tensor([0, 1]))
    >>> print(new_g.edges(etype='follows'))
-    (tensor([1, 0, 1, 2]), tensor([2, 0, 1, 2]))
+    (tensor([1, 2]), tensor([0, 1]))
+    >>> print(new_g.edata['feat'][('user', 'follows', 'user')].shape)
+    torch.Size([5, 5])
    Case3: Add self-etypes for a heterogeneous graph
@@ -451,9 +478,12 @@ class AddSelfLoop(BaseTransform):
    >>> print(new_g.edges(etype=('game', 'self', 'game')))
    (tensor([0, 1]), tensor([0, 1]))
    """
-    def __init__(self, allow_duplicate=False, new_etypes=False):
+    def __init__(self, allow_duplicate=False, new_etypes=False, edge_feat_names=None, fill_data=1.):
        self.allow_duplicate = allow_duplicate
        self.new_etypes = new_etypes
+        self.edge_feat_names = edge_feat_names
+        self.fill_data = fill_data
    def transform_etype(self, c_etype, g):
        r"""
@@ -480,7 +510,8 @@ class AddSelfLoop(BaseTransform):
        if not self.allow_duplicate:
            g = functional.remove_self_loop(g, etype=c_etype)
-        return functional.add_self_loop(g, etype=c_etype)
+        return functional.add_self_loop(g, edge_feat_names=self.edge_feat_names,
+                                        fill_data=self.fill_data, etype=c_etype)
    def __call__(self, g):
        for c_etype in g.canonical_etypes:
@@ -501,6 +532,7 @@ class AddSelfLoop(BaseTransform):
                data_dict[c_etype] = g.edges(etype=c_etype)
            g = update_graph_structure(g, data_dict)
        return g
 class RemoveSelfLoop(BaseTransform):

--- a/tests/compute/test_transform.py
+++ b/tests/compute/test_transform.py
@@ -1625,8 +1625,11 @@ def test_remove_nodes(idtype):
 @parametrize_idtype
 def test_add_selfloop(idtype):
    # homogeneous graph
+    # test for fill_data is float
    g = dgl.graph(([0, 0, 2], [2, 1, 0]), idtype=idtype, device=F.ctx())
    g.edata['he'] = F.copy_to(F.tensor([1, 2, 3], dtype=idtype), ctx=F.ctx())
+    g.edata['he1'] = F.copy_to(F.tensor([[0., 1.], [2., 3.], [4., 5.]]), ctx=F.ctx())
    g.ndata['hn'] = F.copy_to(F.tensor([1, 2, 3], dtype=idtype), ctx=F.ctx())
    g = dgl.add_self_loop(g)
    assert g.number_of_nodes() == 3
@@ -1634,7 +1637,39 @@ def test_add_selfloop(idtype):
    u, v = g.edges(form='uv', order='eid')
    assert F.array_equal(u, F.tensor([0, 0, 2, 0, 1, 2], dtype=idtype))
    assert F.array_equal(v, F.tensor([2, 1, 0, 0, 1, 2], dtype=idtype))
-    assert F.array_equal(g.edata['he'], F.tensor([1, 2, 3, 0, 0, 0], dtype=idtype))
+    assert F.array_equal(g.edata['he'], F.tensor([1, 2, 3, 1, 1, 1], dtype=idtype))
+    assert F.array_equal(g.edata['he1'], F.tensor([[0., 1.], [2., 3.], [4., 5.],
+                                                   [1., 1.], [1., 1.], [1., 1.]]))
+    # test for fill_data is int
+    g = dgl.graph(([0, 0, 2], [2, 1, 0]), idtype=idtype, device=F.ctx())
+    g.edata['he'] = F.copy_to(F.tensor([1, 2, 3], dtype=idtype), ctx=F.ctx())
+    g.edata['he1'] = F.copy_to(F.tensor([[0, 1], [2, 3], [4, 5]], dtype=idtype), ctx=F.ctx())
+    g.ndata['hn'] = F.copy_to(F.tensor([1, 2, 3], dtype=idtype), ctx=F.ctx())
+    g = dgl.add_self_loop(g, fill_data=1)
+    assert g.number_of_nodes() == 3
+    assert g.number_of_edges() == 6
+    u, v = g.edges(form='uv', order='eid')
+    assert F.array_equal(u, F.tensor([0, 0, 2, 0, 1, 2], dtype=idtype))
+    assert F.array_equal(v, F.tensor([2, 1, 0, 0, 1, 2], dtype=idtype))
+    assert F.array_equal(g.edata['he'], F.tensor([1, 2, 3, 1, 1, 1], dtype=idtype))
+    assert F.array_equal(g.edata['he1'], F.tensor([[0, 1], [2, 3], [4, 5],
+                                                   [1, 1], [1, 1], [1, 1]], dtype=idtype))
+    # test for fill_data is str
+    g = dgl.graph(([0, 0, 2], [2, 1, 0]), idtype=idtype, device=F.ctx())
+    g.edata['he'] = F.copy_to(F.tensor([1., 2., 3.]), ctx=F.ctx())
+    g.edata['he1'] = F.copy_to(F.tensor([[0., 1.], [2., 3.], [4., 5.]]), ctx=F.ctx())
+    g.ndata['hn'] = F.copy_to(F.tensor([1, 2, 3], dtype=idtype), ctx=F.ctx())
+    g = dgl.add_self_loop(g, fill_data='sum')
+    assert g.number_of_nodes() == 3
+    assert g.number_of_edges() == 6
+    u, v = g.edges(form='uv', order='eid')
+    assert F.array_equal(u, F.tensor([0, 0, 2, 0, 1, 2], dtype=idtype))
+    assert F.array_equal(v, F.tensor([2, 1, 0, 0, 1, 2], dtype=idtype))
+    assert F.array_equal(g.edata['he'], F.tensor([1., 2., 3., 3., 2., 1.]))
+    assert F.array_equal(g.edata['he1'], F.tensor([[0., 1.], [2., 3.], [4., 5.],
+                                                   [4., 5.], [2., 3.], [0., 1.]]))
    # bipartite graph
    g = dgl.heterograph(
@@ -1647,7 +1682,9 @@ def test_add_selfloop(idtype):
        raise_error = True
    assert raise_error
+    # test for fill_data is float
    g = create_test_heterograph5(idtype)
+    g.edges['follows'].data['h1'] = F.copy_to(F.tensor([[0., 1.], [1., 2.]]), ctx=F.ctx())
    g = dgl.add_self_loop(g, etype='follows')
    assert g.number_of_nodes('user') == 3
    assert g.number_of_nodes('game') == 2
@@ -1656,9 +1693,52 @@ def test_add_selfloop(idtype):
    u, v = g.edges(form='uv', order='eid', etype='follows')
    assert F.array_equal(u, F.tensor([1, 2, 0, 1, 2], dtype=idtype))
    assert F.array_equal(v, F.tensor([0, 1, 0, 1, 2], dtype=idtype))
-    assert F.array_equal(g.edges['follows'].data['h'], F.tensor([1, 2, 0, 0, 0], dtype=idtype))
+    assert F.array_equal(g.edges['follows'].data['h'], F.tensor([1, 2, 1, 1, 1], dtype=idtype))
+    assert F.array_equal(g.edges['follows'].data['h1'], F.tensor([[0., 1.], [1., 2.], [1., 1.],
+                                                                  [1., 1.], [1., 1.]]))
+    assert F.array_equal(g.edges['plays'].data['h'], F.tensor([1, 2], dtype=idtype))
+    # test for fill_data is int
+    g = create_test_heterograph5(idtype)
+    g.edges['follows'].data['h1'] = F.copy_to(F.tensor([[0, 1], [1, 2]], dtype=idtype), ctx=F.ctx())
+    g = dgl.add_self_loop(g, fill_data=1, etype='follows')
+    assert g.number_of_nodes('user') == 3
+    assert g.number_of_nodes('game') == 2
+    assert g.number_of_edges('follows') == 5
+    assert g.number_of_edges('plays') == 2
+    u, v = g.edges(form='uv', order='eid', etype='follows')
+    assert F.array_equal(u, F.tensor([1, 2, 0, 1, 2], dtype=idtype))
+    assert F.array_equal(v, F.tensor([0, 1, 0, 1, 2], dtype=idtype))
+    assert F.array_equal(g.edges['follows'].data['h'], F.tensor([1, 2, 1, 1, 1], dtype=idtype))
+    assert F.array_equal(g.edges['follows'].data['h1'], F.tensor([[0, 1], [1, 2], [1, 1],
+                                                                  [1, 1], [1, 1]], dtype=idtype))
    assert F.array_equal(g.edges['plays'].data['h'], F.tensor([1, 2], dtype=idtype))
+    # test for fill_data is str
+    g = dgl.heterograph({
+        ('user', 'follows', 'user'): (F.tensor([1, 2], dtype=idtype),
+                                      F.tensor([0, 1], dtype=idtype)),
+        ('user', 'plays', 'game'): (F.tensor([0, 1], dtype=idtype),
+                                    F.tensor([0, 1], dtype=idtype))},
+        idtype=idtype, device=F.ctx())
+    g.nodes['user'].data['h'] = F.copy_to(F.tensor([1, 1, 1], dtype=idtype), ctx=F.ctx())
+    g.nodes['game'].data['h'] = F.copy_to(F.tensor([2, 2], dtype=idtype), ctx=F.ctx())
+    g.edges['follows'].data['h'] = F.copy_to(F.tensor([1., 2.]), ctx=F.ctx())
+    g.edges['follows'].data['h1'] = F.copy_to(F.tensor([[0., 1.], [1., 2.]]), ctx=F.ctx())
+    g.edges['plays'].data['h'] = F.copy_to(F.tensor([1., 2.]), ctx=F.ctx())
+    g = dgl.add_self_loop(g, fill_data='mean', etype='follows')
+    assert g.number_of_nodes('user') == 3
+    assert g.number_of_nodes('game') == 2
+    assert g.number_of_edges('follows') == 5
+    assert g.number_of_edges('plays') == 2
+    u, v = g.edges(form='uv', order='eid', etype='follows')
+    assert F.array_equal(u, F.tensor([1, 2, 0, 1, 2], dtype=idtype))
+    assert F.array_equal(v, F.tensor([0, 1, 0, 1, 2], dtype=idtype))
+    assert F.array_equal(g.edges['follows'].data['h'], F.tensor([1., 2., 1., 2., 0.]))
+    assert F.array_equal(g.edges['follows'].data['h1'], F.tensor([[0., 1.], [1., 2.], [0., 1.],
+                                                                  [1., 2.], [0., 0.]]))
+    assert F.array_equal(g.edges['plays'].data['h'], F.tensor([1., 2.]))
    raise_error = False
    try:
        g = dgl.add_self_loop(g, etype='plays')