Unverified Commit 2cf05c53 authored by Pengfei Xia's avatar Pengfei Xia Committed by GitHub
Browse files

[Transform] Allow add data to self loop created by AddSelfLoop or add_self_loop (#4261)



* Update

* Update functional.py

* Update

* Update test_transform.py

* Update

* Update functional.py

* Update functional.py

* Update functional.py

* Update functional.py

* Update

* Update

* Update functional.py

* Update functional.py

* Update functional.py

* Update functional.py

* Update module.py

* Update test_transform.py

* Update test_transform.py
Co-authored-by: default avatarMufei Li <mufeili1996@gmail.com>
parent 92f87f48
...@@ -40,6 +40,7 @@ from ..partition import metis_partition_assignment ...@@ -40,6 +40,7 @@ from ..partition import metis_partition_assignment
from ..partition import partition_graph_with_halo from ..partition import partition_graph_with_halo
from ..partition import metis_partition from ..partition import metis_partition
from .. import subgraph from .. import subgraph
from .. import function
# TO BE DEPRECATED # TO BE DEPRECATED
from .._deprecate.graph import DGLGraph as DGLGraphStale from .._deprecate.graph import DGLGraph as DGLGraphStale
...@@ -1764,13 +1765,24 @@ def remove_nodes(g, nids, ntype=None, store_ids=False): ...@@ -1764,13 +1765,24 @@ def remove_nodes(g, nids, ntype=None, store_ids=False):
g.remove_nodes(nids, ntype=ntype, store_ids=store_ids) g.remove_nodes(nids, ntype=ntype, store_ids=store_ids)
return g return g
def add_self_loop(g, etype=None): def add_self_loop(g, edge_feat_names=None, fill_data=1., etype=None):
r"""Add self-loops for each node in the graph and return a new graph. r"""Add self-loops for each node in the graph and return a new graph.
Parameters Parameters
---------- ----------
g : DGLGraph g : DGLGraph
The graph. The graph.
edge_feat_names : list[str], optional
The names of the self-loop features to apply `fill_data`. If None, it will apply `fill_data`
to all self-loop features. Default: None.
fill_data : int, float or str, optional
The value to fill the self-loop features. Default: 1.
* If ``fill_data`` is ``int`` or ``float``, self-loop features will be directly given by
``fill_data``.
* if ``fill_data`` is ``str``, self-loop features will be generated by aggregating the
features of the incoming edges of the corresponding nodes. The supported aggregation are:
``'mean'``, ``'sum'``, ``'max'``, ``'min'``.
etype : str or (str, str, str), optional etype : str or (str, str, str), optional
The type names of the edges. The allowed type name formats are: The type names of the edges. The allowed type name formats are:
...@@ -1792,7 +1804,6 @@ def add_self_loop(g, etype=None): ...@@ -1792,7 +1804,6 @@ def add_self_loop(g, etype=None):
* The function adds self-loops regardless of whether they already exist or not. * The function adds self-loops regardless of whether they already exist or not.
If one wishes to have exactly one self-loop for every node, If one wishes to have exactly one self-loop for every node,
call :func:`remove_self_loop` before invoking :func:`add_self_loop`. call :func:`remove_self_loop` before invoking :func:`add_self_loop`.
* Features of the new edges (self-loop edges) will be filled with zeros.
* This function discards the batch information. Please use * This function discards the batch information. Please use
:func:`dgl.DGLGraph.set_batch_num_nodes` :func:`dgl.DGLGraph.set_batch_num_nodes`
and :func:`dgl.DGLGraph.set_batch_num_edges` on the transformed graph and :func:`dgl.DGLGraph.set_batch_num_edges` on the transformed graph
...@@ -1808,7 +1819,7 @@ def add_self_loop(g, etype=None): ...@@ -1808,7 +1819,7 @@ def add_self_loop(g, etype=None):
>>> g = dgl.graph((torch.tensor([0, 0, 2]), torch.tensor([2, 1, 0]))) >>> g = dgl.graph((torch.tensor([0, 0, 2]), torch.tensor([2, 1, 0])))
>>> g.ndata['hv'] = torch.arange(3).float().reshape(-1, 1) >>> g.ndata['hv'] = torch.arange(3).float().reshape(-1, 1)
>>> g.edata['he'] = torch.arange(3).float().reshape(-1, 1) >>> g.edata['he'] = torch.arange(3).float().reshape(-1, 1)
>>> g = dgl.add_self_loop(g) >>> g = dgl.add_self_loop(g, fill_data='sum')
>>> g >>> g
Graph(num_nodes=3, num_edges=6, Graph(num_nodes=3, num_edges=6,
ndata_schemes={'hv': Scheme(shape=(1,), dtype=torch.float32)} ndata_schemes={'hv': Scheme(shape=(1,), dtype=torch.float32)}
...@@ -1817,8 +1828,8 @@ def add_self_loop(g, etype=None): ...@@ -1817,8 +1828,8 @@ def add_self_loop(g, etype=None):
tensor([[0.], tensor([[0.],
[1.], [1.],
[2.], [2.],
[0.], [2.],
[0.], [1.],
[0.]]) [0.]])
**Heterogeneous Graphs** **Heterogeneous Graphs**
...@@ -1835,12 +1846,44 @@ def add_self_loop(g, etype=None): ...@@ -1835,12 +1846,44 @@ def add_self_loop(g, etype=None):
metagraph=[('user', 'user'), ('user', 'game')]) metagraph=[('user', 'user'), ('user', 'game')])
""" """
etype = g.to_canonical_etype(etype) etype = g.to_canonical_etype(etype)
data = {}
reduce_funcs = {'sum': function.sum,
'mean': function.mean,
'max': function.max,
'min': function.min}
if edge_feat_names is None:
edge_feat_names = g.edges[etype].data.keys()
if etype[0] != etype[2]: if etype[0] != etype[2]:
raise DGLError( raise DGLError(
'add_self_loop does not support unidirectional bipartite graphs: {}.' \ 'add_self_loop does not support unidirectional bipartite graphs: {}.' \
'Please make sure the types of head node and tail node are identical.' \ 'Please make sure the types of head node and tail node are identical.' \
''.format(etype)) ''.format(etype))
for feat_name in edge_feat_names:
if isinstance(fill_data, (int, float)):
dtype = g.edges[etype].data[feat_name].dtype
dshape = g.edges[etype].data[feat_name].shape
tmp_fill_data = F.copy_to(F.astype(F.tensor([fill_data]), dtype), g.device)
if len(dshape) > 1:
data[feat_name] = F.zeros((g.num_nodes(etype[0]), *dshape[1:]), dtype,
g.device) + tmp_fill_data
else:
data[feat_name] = F.zeros((g.num_nodes(etype[0]),), dtype, g.device) + tmp_fill_data
elif isinstance(fill_data, str):
if fill_data not in reduce_funcs.keys():
raise DGLError('Unsupported aggregation: {}'.format(fill_data))
reducer = reduce_funcs[fill_data]
with g.local_scope():
g.update_all(function.copy_e(feat_name, "h"), reducer('h', 'h'), etype=etype)
data[feat_name] = g.nodes[etype[0]].data['h']
nodes = g.nodes(etype[0]) nodes = g.nodes(etype[0])
if len(data):
new_g = add_edges(g, nodes, nodes, data=data, etype=etype)
else:
new_g = add_edges(g, nodes, nodes, etype=etype) new_g = add_edges(g, nodes, nodes, etype=etype)
return new_g return new_g
......
...@@ -415,6 +415,17 @@ class AddSelfLoop(BaseTransform): ...@@ -415,6 +415,17 @@ class AddSelfLoop(BaseTransform):
If False, it will first remove self-loops to prevent duplicate self-loops. If False, it will first remove self-loops to prevent duplicate self-loops.
new_etypes : bool, optional new_etypes : bool, optional
If True, it will add an edge type 'self' per node type, which holds self-loops. If True, it will add an edge type 'self' per node type, which holds self-loops.
edge_feat_names : list[str], optional
The names of the self-loop features to apply `fill_data`. If None, it will apply `fill_data`
to all self-loop features. Default: None.
fill_data : int, float or str, optional
The value to fill the self-loop features. Default: 1.
* If ``fill_data`` is ``int`` or ``float``, self-loop features will be directly given by
``fill_data``.
* if ``fill_data`` is ``str``, self-loop features will be generated by aggregating the
features of the incoming edges of the corresponding nodes. The supported aggregation are:
``'mean'``, ``'sum'``, ``'max'``, ``'min'``.
Example Example
------- -------
...@@ -424,23 +435,39 @@ class AddSelfLoop(BaseTransform): ...@@ -424,23 +435,39 @@ class AddSelfLoop(BaseTransform):
Case1: Add self-loops for a homogeneous graph Case1: Add self-loops for a homogeneous graph
>>> transform = AddSelfLoop() >>> transform = AddSelfLoop(fill_data='sum')
>>> g = dgl.graph(([1, 1], [1, 2])) >>> g = dgl.graph(([0, 0, 2], [2, 1, 0]))
>>> g.edata['he'] = torch.arange(3).float().reshape(-1, 1)
>>> new_g = transform(g) >>> new_g = transform(g)
>>> print(new_g.edges()) >>> print(new_g.edges())
(tensor([1, 0, 1, 2]), tensor([2, 0, 1, 2])) (tensor([1, 0, 1, 2]), tensor([2, 0, 1, 2]))
>>> print(new_g.edata('he'))
tensor([[0.],
[1.],
[2.],
[2.],
[1.],
[0.]])
Case2: Add self-loops for a heterogeneous graph Case2: Add self-loops for a heterogeneous graph
>>> transform = AddSelfLoop(fill_data='sum')
>>> g = dgl.heterograph({ >>> g = dgl.heterograph({
... ('user', 'plays', 'game'): ([0], [1]), ... ('user', 'follows', 'user'): (torch.tensor([1, 2]),
... ('user', 'follows', 'user'): ([1], [2]) ... torch.tensor([0, 1])),
... }) ... ('user', 'plays', 'game'): (torch.tensor([0, 1]),
... torch.tensor([0, 1]))})
>>> g.edata['feat'] = {('user', 'follows', 'user'): torch.randn(2, 5),
... ('user', 'plays', 'game'): torch.randn(2, 5)}
>>> g.edata['feat1'] = {('user', 'follows', 'user'): torch.randn(2, 15),
... ('user', 'plays', 'game'): torch.randn(2, 15)}
>>> new_g = transform(g) >>> new_g = transform(g)
>>> print(new_g.edges(etype='plays')) >>> print(new_g.edges(etype='plays'))
(tensor([0]), tensor([1])) (tensor([0, 1]), tensor([0, 1]))
>>> print(new_g.edges(etype='follows')) >>> print(new_g.edges(etype='follows'))
(tensor([1, 0, 1, 2]), tensor([2, 0, 1, 2])) (tensor([1, 2]), tensor([0, 1]))
>>> print(new_g.edata['feat'][('user', 'follows', 'user')].shape)
torch.Size([5, 5])
Case3: Add self-etypes for a heterogeneous graph Case3: Add self-etypes for a heterogeneous graph
...@@ -451,9 +478,12 @@ class AddSelfLoop(BaseTransform): ...@@ -451,9 +478,12 @@ class AddSelfLoop(BaseTransform):
>>> print(new_g.edges(etype=('game', 'self', 'game'))) >>> print(new_g.edges(etype=('game', 'self', 'game')))
(tensor([0, 1]), tensor([0, 1])) (tensor([0, 1]), tensor([0, 1]))
""" """
def __init__(self, allow_duplicate=False, new_etypes=False):
def __init__(self, allow_duplicate=False, new_etypes=False, edge_feat_names=None, fill_data=1.):
self.allow_duplicate = allow_duplicate self.allow_duplicate = allow_duplicate
self.new_etypes = new_etypes self.new_etypes = new_etypes
self.edge_feat_names = edge_feat_names
self.fill_data = fill_data
def transform_etype(self, c_etype, g): def transform_etype(self, c_etype, g):
r""" r"""
...@@ -480,7 +510,8 @@ class AddSelfLoop(BaseTransform): ...@@ -480,7 +510,8 @@ class AddSelfLoop(BaseTransform):
if not self.allow_duplicate: if not self.allow_duplicate:
g = functional.remove_self_loop(g, etype=c_etype) g = functional.remove_self_loop(g, etype=c_etype)
return functional.add_self_loop(g, etype=c_etype) return functional.add_self_loop(g, edge_feat_names=self.edge_feat_names,
fill_data=self.fill_data, etype=c_etype)
def __call__(self, g): def __call__(self, g):
for c_etype in g.canonical_etypes: for c_etype in g.canonical_etypes:
...@@ -501,6 +532,7 @@ class AddSelfLoop(BaseTransform): ...@@ -501,6 +532,7 @@ class AddSelfLoop(BaseTransform):
data_dict[c_etype] = g.edges(etype=c_etype) data_dict[c_etype] = g.edges(etype=c_etype)
g = update_graph_structure(g, data_dict) g = update_graph_structure(g, data_dict)
return g return g
class RemoveSelfLoop(BaseTransform): class RemoveSelfLoop(BaseTransform):
......
...@@ -1625,8 +1625,11 @@ def test_remove_nodes(idtype): ...@@ -1625,8 +1625,11 @@ def test_remove_nodes(idtype):
@parametrize_idtype @parametrize_idtype
def test_add_selfloop(idtype): def test_add_selfloop(idtype):
# homogeneous graph # homogeneous graph
# test for fill_data is float
g = dgl.graph(([0, 0, 2], [2, 1, 0]), idtype=idtype, device=F.ctx()) g = dgl.graph(([0, 0, 2], [2, 1, 0]), idtype=idtype, device=F.ctx())
g.edata['he'] = F.copy_to(F.tensor([1, 2, 3], dtype=idtype), ctx=F.ctx()) g.edata['he'] = F.copy_to(F.tensor([1, 2, 3], dtype=idtype), ctx=F.ctx())
g.edata['he1'] = F.copy_to(F.tensor([[0., 1.], [2., 3.], [4., 5.]]), ctx=F.ctx())
g.ndata['hn'] = F.copy_to(F.tensor([1, 2, 3], dtype=idtype), ctx=F.ctx()) g.ndata['hn'] = F.copy_to(F.tensor([1, 2, 3], dtype=idtype), ctx=F.ctx())
g = dgl.add_self_loop(g) g = dgl.add_self_loop(g)
assert g.number_of_nodes() == 3 assert g.number_of_nodes() == 3
...@@ -1634,7 +1637,39 @@ def test_add_selfloop(idtype): ...@@ -1634,7 +1637,39 @@ def test_add_selfloop(idtype):
u, v = g.edges(form='uv', order='eid') u, v = g.edges(form='uv', order='eid')
assert F.array_equal(u, F.tensor([0, 0, 2, 0, 1, 2], dtype=idtype)) assert F.array_equal(u, F.tensor([0, 0, 2, 0, 1, 2], dtype=idtype))
assert F.array_equal(v, F.tensor([2, 1, 0, 0, 1, 2], dtype=idtype)) assert F.array_equal(v, F.tensor([2, 1, 0, 0, 1, 2], dtype=idtype))
assert F.array_equal(g.edata['he'], F.tensor([1, 2, 3, 0, 0, 0], dtype=idtype)) assert F.array_equal(g.edata['he'], F.tensor([1, 2, 3, 1, 1, 1], dtype=idtype))
assert F.array_equal(g.edata['he1'], F.tensor([[0., 1.], [2., 3.], [4., 5.],
[1., 1.], [1., 1.], [1., 1.]]))
# test for fill_data is int
g = dgl.graph(([0, 0, 2], [2, 1, 0]), idtype=idtype, device=F.ctx())
g.edata['he'] = F.copy_to(F.tensor([1, 2, 3], dtype=idtype), ctx=F.ctx())
g.edata['he1'] = F.copy_to(F.tensor([[0, 1], [2, 3], [4, 5]], dtype=idtype), ctx=F.ctx())
g.ndata['hn'] = F.copy_to(F.tensor([1, 2, 3], dtype=idtype), ctx=F.ctx())
g = dgl.add_self_loop(g, fill_data=1)
assert g.number_of_nodes() == 3
assert g.number_of_edges() == 6
u, v = g.edges(form='uv', order='eid')
assert F.array_equal(u, F.tensor([0, 0, 2, 0, 1, 2], dtype=idtype))
assert F.array_equal(v, F.tensor([2, 1, 0, 0, 1, 2], dtype=idtype))
assert F.array_equal(g.edata['he'], F.tensor([1, 2, 3, 1, 1, 1], dtype=idtype))
assert F.array_equal(g.edata['he1'], F.tensor([[0, 1], [2, 3], [4, 5],
[1, 1], [1, 1], [1, 1]], dtype=idtype))
# test for fill_data is str
g = dgl.graph(([0, 0, 2], [2, 1, 0]), idtype=idtype, device=F.ctx())
g.edata['he'] = F.copy_to(F.tensor([1., 2., 3.]), ctx=F.ctx())
g.edata['he1'] = F.copy_to(F.tensor([[0., 1.], [2., 3.], [4., 5.]]), ctx=F.ctx())
g.ndata['hn'] = F.copy_to(F.tensor([1, 2, 3], dtype=idtype), ctx=F.ctx())
g = dgl.add_self_loop(g, fill_data='sum')
assert g.number_of_nodes() == 3
assert g.number_of_edges() == 6
u, v = g.edges(form='uv', order='eid')
assert F.array_equal(u, F.tensor([0, 0, 2, 0, 1, 2], dtype=idtype))
assert F.array_equal(v, F.tensor([2, 1, 0, 0, 1, 2], dtype=idtype))
assert F.array_equal(g.edata['he'], F.tensor([1., 2., 3., 3., 2., 1.]))
assert F.array_equal(g.edata['he1'], F.tensor([[0., 1.], [2., 3.], [4., 5.],
[4., 5.], [2., 3.], [0., 1.]]))
# bipartite graph # bipartite graph
g = dgl.heterograph( g = dgl.heterograph(
...@@ -1647,7 +1682,9 @@ def test_add_selfloop(idtype): ...@@ -1647,7 +1682,9 @@ def test_add_selfloop(idtype):
raise_error = True raise_error = True
assert raise_error assert raise_error
# test for fill_data is float
g = create_test_heterograph5(idtype) g = create_test_heterograph5(idtype)
g.edges['follows'].data['h1'] = F.copy_to(F.tensor([[0., 1.], [1., 2.]]), ctx=F.ctx())
g = dgl.add_self_loop(g, etype='follows') g = dgl.add_self_loop(g, etype='follows')
assert g.number_of_nodes('user') == 3 assert g.number_of_nodes('user') == 3
assert g.number_of_nodes('game') == 2 assert g.number_of_nodes('game') == 2
...@@ -1656,9 +1693,52 @@ def test_add_selfloop(idtype): ...@@ -1656,9 +1693,52 @@ def test_add_selfloop(idtype):
u, v = g.edges(form='uv', order='eid', etype='follows') u, v = g.edges(form='uv', order='eid', etype='follows')
assert F.array_equal(u, F.tensor([1, 2, 0, 1, 2], dtype=idtype)) assert F.array_equal(u, F.tensor([1, 2, 0, 1, 2], dtype=idtype))
assert F.array_equal(v, F.tensor([0, 1, 0, 1, 2], dtype=idtype)) assert F.array_equal(v, F.tensor([0, 1, 0, 1, 2], dtype=idtype))
assert F.array_equal(g.edges['follows'].data['h'], F.tensor([1, 2, 0, 0, 0], dtype=idtype)) assert F.array_equal(g.edges['follows'].data['h'], F.tensor([1, 2, 1, 1, 1], dtype=idtype))
assert F.array_equal(g.edges['follows'].data['h1'], F.tensor([[0., 1.], [1., 2.], [1., 1.],
[1., 1.], [1., 1.]]))
assert F.array_equal(g.edges['plays'].data['h'], F.tensor([1, 2], dtype=idtype))
# test for fill_data is int
g = create_test_heterograph5(idtype)
g.edges['follows'].data['h1'] = F.copy_to(F.tensor([[0, 1], [1, 2]], dtype=idtype), ctx=F.ctx())
g = dgl.add_self_loop(g, fill_data=1, etype='follows')
assert g.number_of_nodes('user') == 3
assert g.number_of_nodes('game') == 2
assert g.number_of_edges('follows') == 5
assert g.number_of_edges('plays') == 2
u, v = g.edges(form='uv', order='eid', etype='follows')
assert F.array_equal(u, F.tensor([1, 2, 0, 1, 2], dtype=idtype))
assert F.array_equal(v, F.tensor([0, 1, 0, 1, 2], dtype=idtype))
assert F.array_equal(g.edges['follows'].data['h'], F.tensor([1, 2, 1, 1, 1], dtype=idtype))
assert F.array_equal(g.edges['follows'].data['h1'], F.tensor([[0, 1], [1, 2], [1, 1],
[1, 1], [1, 1]], dtype=idtype))
assert F.array_equal(g.edges['plays'].data['h'], F.tensor([1, 2], dtype=idtype)) assert F.array_equal(g.edges['plays'].data['h'], F.tensor([1, 2], dtype=idtype))
# test for fill_data is str
g = dgl.heterograph({
('user', 'follows', 'user'): (F.tensor([1, 2], dtype=idtype),
F.tensor([0, 1], dtype=idtype)),
('user', 'plays', 'game'): (F.tensor([0, 1], dtype=idtype),
F.tensor([0, 1], dtype=idtype))},
idtype=idtype, device=F.ctx())
g.nodes['user'].data['h'] = F.copy_to(F.tensor([1, 1, 1], dtype=idtype), ctx=F.ctx())
g.nodes['game'].data['h'] = F.copy_to(F.tensor([2, 2], dtype=idtype), ctx=F.ctx())
g.edges['follows'].data['h'] = F.copy_to(F.tensor([1., 2.]), ctx=F.ctx())
g.edges['follows'].data['h1'] = F.copy_to(F.tensor([[0., 1.], [1., 2.]]), ctx=F.ctx())
g.edges['plays'].data['h'] = F.copy_to(F.tensor([1., 2.]), ctx=F.ctx())
g = dgl.add_self_loop(g, fill_data='mean', etype='follows')
assert g.number_of_nodes('user') == 3
assert g.number_of_nodes('game') == 2
assert g.number_of_edges('follows') == 5
assert g.number_of_edges('plays') == 2
u, v = g.edges(form='uv', order='eid', etype='follows')
assert F.array_equal(u, F.tensor([1, 2, 0, 1, 2], dtype=idtype))
assert F.array_equal(v, F.tensor([0, 1, 0, 1, 2], dtype=idtype))
assert F.array_equal(g.edges['follows'].data['h'], F.tensor([1., 2., 1., 2., 0.]))
assert F.array_equal(g.edges['follows'].data['h1'], F.tensor([[0., 1.], [1., 2.], [0., 1.],
[1., 2.], [0., 0.]]))
assert F.array_equal(g.edges['plays'].data['h'], F.tensor([1., 2.]))
raise_error = False raise_error = False
try: try:
g = dgl.add_self_loop(g, etype='plays') g = dgl.add_self_loop(g, etype='plays')
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment