Unverified Commit 2df4a95f authored by Quan (Andy) Gan's avatar Quan (Andy) Gan Committed by GitHub
Browse files

[Feature] Make to_heterogeneous(to_homogeneous(hg)) return hg (#2958)



* make to_heterogeneous and to_homogeneous invertible

* docstring

* oops
Co-authored-by: default avatarJinjing Zhou <VoVAllen@users.noreply.github.com>
parent 60426278
......@@ -631,8 +631,10 @@ def to_heterogeneous(G, ntypes, etypes, ntype_field=NTYPE,
Notes
-----
The returned node and edge types may not necessarily be in the same order as
``ntypes`` and ``etypes``.
* The returned node and edge types may not necessarily be in the same order as
``ntypes`` and ``etypes``.
* Calling :func:`~dgl.to_homogeneous` then calling :func:`~dgl.to_heterogeneous` again
yields the same result.
Examples
--------
......@@ -705,7 +707,7 @@ def to_heterogeneous(G, ntypes, etypes, ntype_field=NTYPE,
# relabel nodes to per-type local IDs
ntype_count = np.bincount(ntype_ids, minlength=num_ntypes)
ntype_offset = np.insert(np.cumsum(ntype_count), 0, 0)
ntype_ids_sortidx = np.argsort(ntype_ids)
ntype_ids_sortidx = np.argsort(ntype_ids, kind='stable')
ntype_local_ids = np.zeros_like(ntype_ids)
node_groups = []
for i in range(num_ntypes):
......@@ -848,6 +850,8 @@ def to_homogeneous(G, ndata=None, edata=None, store_type=True, return_count=Fals
to its memory efficiency.
* The ``ntype_count`` and ``etype_count`` lists can help speed up some operations.
See :class:`~dgl.nn.pytorch.conv.RelGraphConv` for such an example.
* Calling :func:`~dgl.to_homogeneous` then calling :func:`~dgl.to_heterogeneous` again
yields the same result.
Examples
--------
......
......@@ -10,6 +10,7 @@ import unittest, pytest
from dgl import DGLError
import test_utils
from test_utils import parametrize_dtype, get_cases
from utils import assert_is_identical_hetero
from scipy.sparse import rand
def create_test_heterograph(idtype):
......@@ -1111,6 +1112,14 @@ def test_to_homo2(idtype):
for i, count in enumerate(etype_count):
assert count == hg.num_edges(hg.canonical_etypes[i])
@parametrize_dtype
def test_invertible_conversion(idtype):
# Test whether to_homogeneous and to_heterogeneous are invertible
hg = create_test_heterograph(idtype)
g = dgl.to_homogeneous(hg)
hg2 = dgl.to_heterogeneous(g, hg.ntypes, hg.etypes)
assert_is_identical_hetero(hg, hg2, True)
@parametrize_dtype
def test_metagraph_reachable(idtype):
g = create_test_heterograph(idtype)
......
......@@ -11,46 +11,7 @@ import io
import unittest, pytest
import test_utils
from test_utils import parametrize_dtype, get_cases
def _assert_is_identical(g, g2):
assert g.is_readonly == g2.is_readonly
assert g.number_of_nodes() == g2.number_of_nodes()
src, dst = g.all_edges(order='eid')
src2, dst2 = g2.all_edges(order='eid')
assert F.array_equal(src, src2)
assert F.array_equal(dst, dst2)
assert len(g.ndata) == len(g2.ndata)
assert len(g.edata) == len(g2.edata)
for k in g.ndata:
assert F.allclose(g.ndata[k], g2.ndata[k])
for k in g.edata:
assert F.allclose(g.edata[k], g2.edata[k])
def _assert_is_identical_hetero(g, g2):
assert g.is_readonly == g2.is_readonly
assert g.ntypes == g2.ntypes
assert g.canonical_etypes == g2.canonical_etypes
# check if two metagraphs are identical
for edges, features in g.metagraph().edges(keys=True).items():
assert g2.metagraph().edges(keys=True)[edges] == features
# check if node ID spaces and feature spaces are equal
for ntype in g.ntypes:
assert g.number_of_nodes(ntype) == g2.number_of_nodes(ntype)
assert len(g.nodes[ntype].data) == len(g2.nodes[ntype].data)
for k in g.nodes[ntype].data:
assert F.allclose(g.nodes[ntype].data[k], g2.nodes[ntype].data[k])
# check if edge ID spaces and feature spaces are equal
for etype in g.canonical_etypes:
src, dst = g.all_edges(etype=etype, order='eid')
src2, dst2 = g2.all_edges(etype=etype, order='eid')
assert F.array_equal(src, src2)
assert F.array_equal(dst, dst2)
for k in g.edges[etype].data:
assert F.allclose(g.edges[etype].data[k], g2.edges[etype].data[k])
from utils import assert_is_identical, assert_is_identical_hetero
def _assert_is_identical_nodeflow(nf1, nf2):
assert nf1.is_readonly == nf2.is_readonly
......@@ -74,13 +35,13 @@ def _assert_is_identical_nodeflow(nf1, nf2):
assert F.allclose(nf1.blocks[i].data[k], nf2.blocks[i].data[k])
def _assert_is_identical_batchedgraph(bg1, bg2):
_assert_is_identical(bg1, bg2)
assert_is_identical(bg1, bg2)
assert bg1.batch_size == bg2.batch_size
assert bg1.batch_num_nodes == bg2.batch_num_nodes
assert bg1.batch_num_edges == bg2.batch_num_edges
def _assert_is_identical_batchedhetero(bg1, bg2):
_assert_is_identical_hetero(bg1, bg2)
assert_is_identical_hetero(bg1, bg2)
for ntype in bg1.ntypes:
assert bg1.batch_num_nodes(ntype) == bg2.batch_num_nodes(ntype)
for canonical_etype in bg1.canonical_etypes:
......
import pytest
import backend as F
import dgl
from dgl.base import is_internal_column
if F._default_context_str == 'cpu':
parametrize_dtype = pytest.mark.parametrize("idtype", [F.int32, F.int64])
......@@ -13,3 +15,58 @@ def check_fail(fn, *args, **kwargs):
return False
except:
return True
def assert_is_identical(g, g2):
assert g.is_readonly == g2.is_readonly
assert g.number_of_nodes() == g2.number_of_nodes()
src, dst = g.all_edges(order='eid')
src2, dst2 = g2.all_edges(order='eid')
assert F.array_equal(src, src2)
assert F.array_equal(dst, dst2)
assert len(g.ndata) == len(g2.ndata)
assert len(g.edata) == len(g2.edata)
for k in g.ndata:
assert F.allclose(g.ndata[k], g2.ndata[k])
for k in g.edata:
assert F.allclose(g.edata[k], g2.edata[k])
def assert_is_identical_hetero(g, g2, ignore_internal_data=False):
assert g.is_readonly == g2.is_readonly
assert g.ntypes == g2.ntypes
assert g.canonical_etypes == g2.canonical_etypes
# check if two metagraphs are identical
for edges, features in g.metagraph().edges(keys=True).items():
assert g2.metagraph().edges(keys=True)[edges] == features
# check if node ID spaces and feature spaces are equal
for ntype in g.ntypes:
assert g.number_of_nodes(ntype) == g2.number_of_nodes(ntype)
if ignore_internal_data:
for k in list(g.nodes[ntype].data.keys()):
if is_internal_column(k):
del g.nodes[ntype].data[k]
for k in list(g2.nodes[ntype].data.keys()):
if is_internal_column(k):
del g2.nodes[ntype].data[k]
assert len(g.nodes[ntype].data) == len(g2.nodes[ntype].data)
for k in g.nodes[ntype].data:
assert F.allclose(g.nodes[ntype].data[k], g2.nodes[ntype].data[k])
# check if edge ID spaces and feature spaces are equal
for etype in g.canonical_etypes:
src, dst = g.all_edges(etype=etype, order='eid')
src2, dst2 = g2.all_edges(etype=etype, order='eid')
assert F.array_equal(src, src2)
assert F.array_equal(dst, dst2)
if ignore_internal_data:
for k in list(g.edges[etype].data.keys()):
if is_internal_column(k):
del g.edges[etype].data[k]
for k in list(g2.edges[etype].data.keys()):
if is_internal_column(k):
del g2.edges[etype].data[k]
assert len(g.edges[etype].data) == len(g2.edges[etype].data)
for k in g.edges[etype].data:
assert F.allclose(g.edges[etype].data[k], g2.edges[etype].data[k])
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment