"tests/python/git@developer.sourcefind.cn:OpenDAS/dgl.git" did not exist on "84f90644c7210197b48e5548878e139724231a58"
Unverified Commit 4307fe88 authored by Rhett-Ying's avatar Rhett-Ying Committed by GitHub
Browse files

[Feature] Add dgl.reorder() to re-order graph according to specified … (#3063)



* [Feature] Add dgl.reorder() to re-order graph according to specified strategy

* fix unit test failure for metis reorder

* fix unit test failure on mxnet_cpu

* refine unit test for dgl.reorder

* fix unit test failure on mxnet

* fix array_equal error for mxnet unit test

* fix unit test failure for mxnet

* convert metis output to numpy array explicitly
Co-authored-by: default avatarTong He <hetong007@gmail.com>
parent 61fba9da
...@@ -132,6 +132,7 @@ under the ``dgl`` namespace. ...@@ -132,6 +132,7 @@ under the ``dgl`` namespace.
DGLGraph.add_self_loop DGLGraph.add_self_loop
DGLGraph.remove_self_loop DGLGraph.remove_self_loop
DGLGraph.to_simple DGLGraph.to_simple
DGLGraph.reorder
Adjacency and incidence matrix Adjacency and incidence matrix
--------------------------------- ---------------------------------
......
...@@ -76,6 +76,7 @@ Operators for generating new graphs by manipulating the structure of the existin ...@@ -76,6 +76,7 @@ Operators for generating new graphs by manipulating the structure of the existin
metapath_reachable_graph metapath_reachable_graph
adj_product_graph adj_product_graph
adj_sum_graph adj_sum_graph
reorder
.. _api-batch: .. _api-batch:
......
...@@ -17,6 +17,7 @@ from . import utils, batch ...@@ -17,6 +17,7 @@ from . import utils, batch
from .partition import metis_partition_assignment from .partition import metis_partition_assignment
from .partition import partition_graph_with_halo from .partition import partition_graph_with_halo
from .partition import metis_partition from .partition import metis_partition
from . import subgraph
# TO BE DEPRECATED # TO BE DEPRECATED
from ._deprecate.graph import DGLGraph as DGLGraphStale from ._deprecate.graph import DGLGraph as DGLGraphStale
...@@ -51,7 +52,9 @@ __all__ = [ ...@@ -51,7 +52,9 @@ __all__ = [
'metis_partition', 'metis_partition',
'as_heterograph', 'as_heterograph',
'adj_product_graph', 'adj_product_graph',
'adj_sum_graph'] 'adj_sum_graph',
'reorder'
]
def pairwise_squared_distance(x): def pairwise_squared_distance(x):
...@@ -2888,4 +2891,185 @@ def sort_in_edges(g, tag, tag_offset_name='_TAG_OFFSET'): ...@@ -2888,4 +2891,185 @@ def sort_in_edges(g, tag, tag_offset_name='_TAG_OFFSET'):
new_g.dstdata[tag_offset_name] = F.from_dgl_nd(tag_pos_arr) new_g.dstdata[tag_offset_name] = F.from_dgl_nd(tag_pos_arr)
return new_g return new_g
def reorder(g, permute_algo='rcmk', store_ids=True, permute_config=None):
r"""Return a new graph which re-order and re-label the nodes
according to the specified permute algo.
Homogeneous graph is supported only.
This API is basically implemented by leveraging dgl.node_subgraph(),
so the function signature is similar and raw IDs could be stored
in dgl.NID and dgl.EID.
Parameters
----------
g : DGLGraph
The homogeneous graph.
permute_algo: str, optional
can be ``'rcmk'`` or ``'metis'`` or ``'custom'``. ``'rcmk'`` is the default algo.
* ``'rcmk'``: The Reverse Cuthill–McKee algorithm is an algorithm to permute
a sparse matrix that has a symmetric sparsity pattern into a band matrix form
with a small bandwidth. The resulting index numbers is reversed.
* ``'metis'``: METIS is a set of serial algorithms for partitioning graphs,
partitioning finite element meshes, and producing fill reducing orderings
for sparse matrices. This algorithm has already available in DGL:
``'dgl.partition.metis_partition_assignment'``.
* ``'custom'``: This enables user to pass in self-designed reorder algorithm.
User should pass in ``'nodes_perm'`` via another argument ``'permute_config'`` with
``'custom'`` is specified here. By this way, can the graph be reordered according to
passed in nodes permutation.
store_ids: bool, optional
It's passed into dgl.node_subgraph(). If True, it will store
the raw IDs of the extracted nodes and edges in the ndata
and edata of the resulting graph under name dgl.NID and
dgl.EID, respectively.
permute_config: dict, optional
additional config data for specified permute_algo.
* for ``'rcmk'``, this argument is not required.
* for ``'metis'``, partition part number ``'k'`` is required and specified in this
argument like this: {'k':10}.
* for ``'custom'``, ``'nodes_perm'`` should be specified in this argument like this:
{'nodes_perm':[1,2,3,0]}.
Return
------
DGLGraph
The re-ordered graph
Examples
--------
>>> import dgl
>>> import torch
>>> g = dgl.graph((torch.tensor([0, 1, 2, 3, 4]), torch.tensor([2, 2, 3, 2, 3])))
>>> g.ndata['h'] = torch.arange(g.num_nodes() * 2).view(g.num_nodes(), 2)
>>> g.edata['w'] = torch.arange(g.num_edges() * 1).view(g.num_edges(), 1)
>>> g.ndata
{'h': tensor([[0, 1],
[2, 3],
[4, 5],
[6, 7],
[8, 9]])}
>>> g.edata
{'w': tensor([[0],
[1],
[2],
[3],
[4]])}
Reorder according to 'rcmk' permute_algo which is implemented in
scipy.sparse.csgraph.reverse_cuthill_mckee().
>>> rg = dgl.reorder(g)
>>> rg.ndata
{'h': tensor([[8, 9],
[6, 7],
[2, 3],
[4, 5],
[0, 1]]), '_ID': tensor([4, 3, 1, 2, 0])}
>>> rg.edata
{'w': tensor([[4],
[3],
[1],
[2],
[0]]), '_ID': tensor([4, 3, 1, 2, 0])}
Reorder with according to 'metis' permute_algo which is implemented in
dgl.partition.metis_partition_assignment().
>>> rg = dgl.reorder(g, 'metis', permute_config={'k':2})
>>> rg.ndata
{'h': tensor([[4, 5],
[2, 3],
[0, 1],
[8, 9],
[6, 7]]), '_ID': tensor([2, 1, 0, 4, 3])}
>>> rg.edata
{'w': tensor([[2],
[1],
[0],
[4],
[3]]), '_ID': tensor([2, 1, 0, 4, 3])}
Reorder according to 'custom' permute_algo with user-provided nodes_perm.
>>> nodes_perm = torch.randperm(g.num_nodes())
>>> nodes_perm
tensor([3, 2, 0, 4, 1])
>>> rg = dgl.reorder(g, 'custom', permute_config={'nodes_perm':nodes_perm})
>>> rg.ndata
{'h': tensor([[6, 7],
[4, 5],
[0, 1],
[8, 9],
[2, 3]]), '_ID': tensor([3, 2, 0, 4, 1])}
>>> rg.edata
{'w': tensor([[3],
[2],
[0],
[4],
[1]]), '_ID': tensor([3, 2, 0, 4, 1])}
"""
if not g.is_homogeneous:
raise DGLError("Homograph is supported only.")
expected_algo = ['rcmk', 'metis', 'custom']
if permute_algo not in expected_algo:
raise DGLError("Unexpected permute_algo is specified: {}. Expected algos: {}".format(
permute_algo, expected_algo))
if permute_algo == 'rcmk':
nodes_perm = RCMKPerm(g)
elif permute_algo == 'metis':
if permute_config is None or 'k' not in permute_config:
raise DGLError(
"Partition parts 'k' is required for metis. Please specify in permute_config.")
nodes_perm = METISPerm(g, permute_config['k'])
else:
if permute_config is None or 'nodes_perm' not in permute_config:
raise DGLError(
"permute_algo is specified as custom, but no 'nodes_perm' is specified in \
permute_config.")
nodes_perm = permute_config['nodes_perm']
if len(nodes_perm) != g.num_nodes():
raise DGLError("Length of passed in nodes_perm[{}] does not \
match graph num_nodes[{}].".format(len(nodes_perm), g.num_nodes()))
return subgraph.node_subgraph(g, nodes_perm, store_ids=store_ids)
DGLHeteroGraph.reorder = utils.alias_func(reorder)
def METISPerm(g, k):
"""
For internal use.
g: graph
k: partition parts number
return: permutation of node ids via metis partition and assignment
"""
pids = metis_partition_assignment(
g if g.device == F.cpu() else g.to(F.cpu()), k)
pids = F.asnumpy(pids)
perm = np.zeros(pids.shape, np.int64)
bincnt = np.bincount(pids)
idcnt = np.cumsum(bincnt)
for i, e in enumerate(pids):
idcnt[e] -= 1
perm[idcnt[e]] = i
return perm
def RCMKPerm(g):
"""
For internal use.
g: graph
return: permutation of node ids via RCMK algorithm
"""
fmat = 'csr'
allowed_fmats = sum(g.formats().values(), [])
if fmat not in allowed_fmats:
g = g.formats(allowed_fmats + [fmat])
csr_adj = g.adj(scipy_fmt=fmat)
perm = sparse.csgraph.reverse_cuthill_mckee(csr_adj)
return perm.copy()
_init_api("dgl.transform") _init_api("dgl.transform")
...@@ -1480,5 +1480,94 @@ def test_remove_selfloop(idtype): ...@@ -1480,5 +1480,94 @@ def test_remove_selfloop(idtype):
raise_error = True raise_error = True
assert raise_error assert raise_error
@parametrize_dtype
def test_reorder(idtype):
g = dgl.graph(([0, 1, 2, 3, 4], [2, 2, 3, 2, 3]),
idtype=idtype, device=F.ctx())
g.ndata['h'] = F.copy_to(F.randn((g.num_nodes(), 3)), ctx=F.ctx())
g.edata['w'] = F.copy_to(F.randn((g.num_edges(), 2)), ctx=F.ctx())
# call with default args
rg = dgl.reorder(g)
# reorder back to original according to stored ids
rg2 = dgl.reorder(rg, 'custom', permute_config={
'nodes_perm': np.argsort(F.asnumpy(rg.ndata[dgl.NID]))})
assert F.array_equal(g.ndata['h'], rg2.ndata['h'])
assert F.array_equal(g.edata['w'], rg2.edata['w'])
# do not store ids
rg = dgl.reorder(g, store_ids=False)
assert not dgl.NID in rg.ndata.keys()
assert not dgl.EID in rg.edata.keys()
# metis does not work on windows.
if os.name == 'nt':
pass
else:
# metis_partition may fail for small graph.
mg = create_large_graph(1000).to(F.ctx())
# call with metis strategy, but k is not specified
raise_error = False
try:
dgl.reorder(mg, permute_algo='metis')
except:
raise_error = True
assert raise_error
# call with metis strategy, k is specified
raise_error = False
try:
dgl.reorder(mg,
permute_algo='metis', permute_config={'k': 2})
except:
raise_error = True
assert not raise_error
# call with qualified nodes_perm specified
nodes_perm = np.random.permutation(g.num_nodes())
raise_error = False
try:
dgl.reorder(g, permute_algo='custom', permute_config={
'nodes_perm': nodes_perm})
except:
raise_error = True
assert not raise_error
# call with unqualified nodes_perm specified
raise_error = False
try:
dgl.reorder(g, permute_algo='custom', permute_config={
'nodes_perm': nodes_perm[:g.num_nodes() - 1]})
except:
raise_error = True
assert raise_error
# call with unsupported strategy
raise_error = False
try:
dgl.reorder(g, permute_algo='cmk')
except:
raise_error = True
assert raise_error
# heterograph: not supported
raise_error = False
try:
hg = dgl.heterogrpah({('user', 'follow', 'user'): (
[0, 1], [1, 2])}, idtype=idtype, device=F.ctx())
dgl.reorder(hg)
except:
raise_error = True
assert raise_error
# add 'csr' format if needed
fg = g.formats('csc')
assert 'csr' not in sum(fg.formats().values(), [])
rfg = dgl.reorder(fg)
assert 'csr' in sum(rfg.formats().values(), [])
if __name__ == '__main__': if __name__ == '__main__':
test_partition_with_halo() test_partition_with_halo()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment