[Bug fix] [Feature] added option for batching empty data (#2527)

* added option for batching empty data, fixes #2526 * added option for batching empty data, fixes #2526 * decreased line lengths * removed trailing whitespace * fixed wrong feature name * now default behavior when all graphs are empty Co-authored-by: Minjie Wang <wmjlyjemaine@gmail.com>

[Bug fix] [Feature] added option for batching empty data (#2527)
* added option for batching empty data, fixes #2526 * added option for batching empty data, fixes #2526 * decreased line lengths * removed trailing whitespace * fixed wrong feature name * now default behavior when all graphs are empty Co-authored-by: Minjie Wang <wmjlyjemaine@gmail.com>
07787664 · Andrew · GitHub · 4d89b54e · 07787664 · 07787664
Unverified Commit 07787664 authored Jan 14, 2021 by Andrew Committed by GitHub Jan 14, 2021
Showing with 30 additions and 3 deletions

python/dgl/batch.py python/dgl/batch.py +6 -3

tests/compute/test_batched_graph.py tests/compute/test_batched_graph.py +12 -0

tests/compute/test_batched_heterograph.py tests/compute/test_batched_heterograph.py +12 -0

No files found.
--- a/python/dgl/batch.py
+++ b/python/dgl/batch.py
@@ -11,7 +11,8 @@ from . import utils
 __all__ = ['batch', 'unbatch', 'batch_hetero', 'unbatch_hetero']
-def batch(graphs, ndata=ALL, edata=ALL, *, node_attrs=None, edge_attrs=None):
+def batch(graphs, ndata=ALL, edata=ALL, *,
+          node_attrs=None, edge_attrs=None):
    r"""Batch a collection of :class:`DGLGraph` s into one graph for more efficient
    graph computation.
@@ -191,9 +192,10 @@ def batch(graphs, ndata=ALL, edata=ALL, *, node_attrs=None, edge_attrs=None):
    # Batch node feature
    if ndata is not None:
        for ntype_id, ntype in zip(ntype_ids, ntypes):
+            all_empty = all(g._graph.number_of_nodes(ntype_id) == 0 for g in graphs)
            frames = [
                g._node_frames[ntype_id] for g in graphs
-                if g._graph.number_of_nodes(ntype_id) > 0]
+                if g._graph.number_of_nodes(ntype_id) > 0 or all_empty]
            # TODO: do we require graphs with no nodes/edges to have the same schema?  Currently
            # we allow empty graphs to have no features during batching.
            ret_feat = _batch_feat_dicts(frames, ndata, 'nodes["{}"].data'.format(ntype))
@@ -202,9 +204,10 @@ def batch(graphs, ndata=ALL, edata=ALL, *, node_attrs=None, edge_attrs=None):
    # Batch edge feature
    if edata is not None:
        for etype_id, etype in zip(relation_ids, relations):
+            all_empty = all(g._graph.number_of_edges(etype_id) == 0 for g in graphs)
            frames = [
                g._edge_frames[etype_id] for g in graphs
-                if g._graph.number_of_edges(etype_id) > 0]
+                if g._graph.number_of_edges(etype_id) > 0 or all_empty]
            # TODO: do we require graphs with no nodes/edges to have the same schema?  Currently
            # we allow empty graphs to have no features during batching.
            ret_feat = _batch_feat_dicts(frames, edata, 'edges[{}].data'.format(etype))

--- a/tests/compute/test_batched_graph.py
+++ b/tests/compute/test_batched_graph.py
@@ -207,6 +207,18 @@ def test_batch_no_edge(idtype):
    g3.add_nodes(1)  # no edges
    g = dgl.batch([g1, g3, g2]) # should not throw an error
+@parametrize_dtype
+def test_batch_keeps_empty_data(idtype):
+    g1 = dgl.graph(([], [])).astype(idtype).to(F.ctx())
+    g1.ndata["nh"] = F.tensor([])
+    g1.edata["eh"] = F.tensor([]) 
+    g2 = dgl.graph(([], [])).astype(idtype).to(F.ctx())
+    g2.ndata["nh"] = F.tensor([])
+    g2.edata["eh"] = F.tensor([]) 
+    g = dgl.batch([g1, g2])
+    assert "nh" in g.ndata
+    assert "eh" in g.edata    
 def _get_subgraph_batch_info(keys, induced_indices_arr, batch_num_objs):
    """Internal function to compute batch information for subgraphs.
    Parameters

--- a/tests/compute/test_batched_heterograph.py
+++ b/tests/compute/test_batched_heterograph.py
@@ -321,6 +321,18 @@ def test_unbatch2(idtype):
    check_graph_equal(g2, gg2)
    check_graph_equal(g3, gg3)
+@parametrize_dtype
+def test_batch_keeps_empty_data(idtype):
+    g1 = dgl.heterograph({("a", "to", "a"): ([], [])}).astype(idtype).to(F.ctx())
+    g1.nodes["a"].data["nh"] = F.tensor([])
+    g1.edges[("a", "to", "a")].data["eh"] = F.tensor([]) 
+    g2 = dgl.heterograph({("a", "to", "a"): ([], [])}).astype(idtype).to(F.ctx())
+    g2.nodes["a"].data["nh"] = F.tensor([])
+    g2.edges[("a", "to", "a")].data["eh"] = F.tensor([]) 
+    g = dgl.batch([g1, g2])
+    assert "nh" in g.nodes["a"].data
+    assert "eh" in g.edges[("a", "to", "a")].data
 if __name__ == '__main__':
    #test_topology('int32')
    #test_batching_batched('int32')