[GraphBolt] Fix DGL_MiniBatch layout. (#6604)

Co-authored-by: Ubuntu <ubuntu@ip-172-31-0-133.us-west-2.compute.internal>

[GraphBolt] Fix DGL_MiniBatch layout. (#6604)
Co-authored-by: Ubuntu <ubuntu@ip-172-31-0-133.us-west-2.compute.internal>
e242de9c · yxy235 · GitHub · a8e672e7 · e242de9c · e242de9c
Unverified Commit e242de9c authored Nov 24, 2023 by yxy235 Committed by GitHub Nov 24, 2023
3 changed files
--- a/python/dgl/graphbolt/minibatch.py
+++ b/python/dgl/graphbolt/minibatch.py
@@ -642,25 +642,11 @@ def _dgl_minibatch_str(dglminibatch: DGLMiniBatch) -> str:
        # indentation on top of the original if the original data output has
        # line feeds.
        if isinstance(val, list):
-            if len(val) == 0:
-                val = "[]"
-            # Special handling of blocks data. Each element of list occupies
-            # one row and is further structured.
-            elif name == "blocks":
-                blocks_strs = []
-                for block in val:
-                    block_str = str(block).replace(" ", "\n")
-                    block_str = _add_indent(block_str, len("Block") + 1)
-                    blocks_strs.append(block_str)
-                val = "[" + ",\n".join(blocks_strs) + "]"
-            else:
-                val = [
-                    _add_indent(
-                        str(val_str), len(str(val_str).split("': ")[0]) + 3
-                    )
-                    for val_str in val
-                ]
-                val = "[" + ",\n".join(val) + "]"
+            val = [str(val_str) for val_str in val]
+            val = "[" + ",\n".join(val) + "]"
+        elif isinstance(val, tuple):
+            val = [str(val_str) for val_str in val]
+            val = "(" + ",\n".join(val) + ")"
        else:
            val = str(val)
        final_str = (

--- a/tests/python/pytorch/graphbolt/impl/test_minibatch.py
+++ b/tests/python/pytorch/graphbolt/impl/test_minibatch.py
@@ -229,7 +229,7 @@ def test_minibatch_representation():
    assert result == expect_result, print(expect_result, result)


-def test_dgl_minibatch_representation():
+def test_dgl_minibatch_representation_homo():
    node_pairs = [
        (
            torch.tensor([0, 1, 2, 2, 2, 1]),
@@ -290,24 +290,118 @@ def test_dgl_minibatch_representation():
    )
    dgl_minibatch = minibatch.to_dgl()
    expect_result = str(
-        """DGLMiniBatch(positive_node_pairs=(tensor([0, 1, 2]), tensor([3, 4, 5])),
+        """DGLMiniBatch(positive_node_pairs=(tensor([0, 1, 2]),
+                                  tensor([3, 4, 5])),
             output_nodes=None,
             node_features={'x': tensor([7, 6, 2, 2])},
-             negative_node_pairs=(tensor([0, 1, 2]), tensor([6, 0, 0])),
+             negative_node_pairs=(tensor([0, 1, 2]),
+                                  tensor([6, 0, 0])),
             labels=tensor([0., 1., 2.]),
             input_nodes=None,
             edge_features=[{'x': tensor([[8],
-                                          [1],
-                                          [6]])},
+                                    [1],
+                                    [6]])},
                            {'x': tensor([[2],
-                                          [8],
-                                          [8]])}],
-             blocks=[Block(num_src_nodes=4,
-                           num_dst_nodes=4,
-                           num_edges=6),
-                     Block(num_src_nodes=3,
-                           num_dst_nodes=2,
-                           num_edges=3)],
+                                    [8],
+                                    [8]])}],
+             blocks=[Block(num_src_nodes=4, num_dst_nodes=4, num_edges=6),
+                     Block(num_src_nodes=3, num_dst_nodes=2, num_edges=3)],
+          )"""
+    )
+    result = str(dgl_minibatch)
+    assert result == expect_result, print(result)
+
+
+def test_dgl_minibatch_representation_hetero():
+    node_pairs = [
+        {
+            relation: (torch.tensor([0, 1, 1]), torch.tensor([0, 1, 2])),
+            reverse_relation: (torch.tensor([1, 0]), torch.tensor([2, 3])),
+        },
+        {relation: (torch.tensor([0, 1]), torch.tensor([1, 0]))},
+    ]
+    original_column_node_ids = [
+        {"B": torch.tensor([10, 11, 12]), "A": torch.tensor([5, 7, 9, 11])},
+        {"B": torch.tensor([10, 11])},
+    ]
+    original_row_node_ids = [
+        {
+            "A": torch.tensor([5, 7, 9, 11]),
+            "B": torch.tensor([10, 11, 12]),
+        },
+        {
+            "A": torch.tensor([5, 7]),
+            "B": torch.tensor([10, 11]),
+        },
+    ]
+    original_edge_ids = [
+        {
+            relation: torch.tensor([19, 20, 21]),
+            reverse_relation: torch.tensor([23, 26]),
+        },
+        {relation: torch.tensor([10, 12])},
+    ]
+    node_features = {
+        ("A", "x"): torch.tensor([6, 4, 0, 1]),
+    }
+    edge_features = [
+        {(relation, "x"): torch.tensor([4, 2, 4])},
+        {(relation, "x"): torch.tensor([0, 6])},
+    ]
+    subgraphs = []
+    for i in range(2):
+        subgraphs.append(
+            gb.FusedSampledSubgraphImpl(
+                node_pairs=node_pairs[i],
+                original_column_node_ids=original_column_node_ids[i],
+                original_row_node_ids=original_row_node_ids[i],
+                original_edge_ids=original_edge_ids[i],
+            )
+        )
+    negative_srcs = {"B": torch.tensor([[8], [1], [6]])}
+    negative_dsts = {"B": torch.tensor([[2], [8], [8]])}
+    compacted_node_pairs = {
+        relation: (torch.tensor([0, 1, 2]), torch.tensor([3, 4, 5])),
+        reverse_relation: (torch.tensor([0, 1, 2]), torch.tensor([3, 4, 5])),
+    }
+    compacted_negative_srcs = {relation: torch.tensor([[0], [1], [2]])}
+    compacted_negative_dsts = {relation: torch.tensor([[6], [0], [0]])}
+    # Test dglminibatch with all attributes.
+    minibatch = gb.MiniBatch(
+        seed_nodes={"B": torch.tensor([10, 15])},
+        node_pairs=node_pairs,
+        sampled_subgraphs=subgraphs,
+        node_features=node_features,
+        edge_features=edge_features,
+        labels={"B": torch.tensor([2, 5])},
+        negative_srcs=negative_srcs,
+        negative_dsts=negative_dsts,
+        compacted_node_pairs=compacted_node_pairs,
+        input_nodes={
+            "A": torch.tensor([5, 7, 9, 11]),
+            "B": torch.tensor([10, 11, 12]),
+        },
+        compacted_negative_srcs=compacted_negative_srcs,
+        compacted_negative_dsts=compacted_negative_dsts,
+    )
+    dgl_minibatch = minibatch.to_dgl()
+    expect_result = str(
+        """DGLMiniBatch(positive_node_pairs={'A:r:B': (tensor([0, 1, 2]), tensor([3, 4, 5])), 'B:rr:A': (tensor([0, 1, 2]), tensor([3, 4, 5]))},
+             output_nodes=None,
+             node_features={('A', 'x'): tensor([6, 4, 0, 1])},
+             negative_node_pairs={'A:r:B': (tensor([0, 1, 2]), tensor([6, 0, 0]))},
+             labels={'B': tensor([2, 5])},
+             input_nodes=None,
+             edge_features=[{('A:r:B', 'x'): tensor([4, 2, 4])},
+                            {('A:r:B', 'x'): tensor([0, 6])}],
+             blocks=[Block(num_src_nodes={'A': 4, 'B': 3},
+                           num_dst_nodes={'A': 4, 'B': 3},
+                           num_edges={('A', 'r', 'B'): 3, ('B', 'rr', 'A'): 2},
+                           metagraph=[('A', 'B', 'r'), ('B', 'A', 'rr')]),
+                     Block(num_src_nodes={'A': 2, 'B': 2},
+                           num_dst_nodes={'B': 2},
+                           num_edges={('A', 'r', 'B'): 2},
+                           metagraph=[('A', 'B', 'r')])],
          )"""
    )
    result = str(dgl_minibatch)

--- a/tests/python/pytorch/graphbolt/test_integration.py
+++ b/tests/python/pytorch/graphbolt/test_integration.py
@@ -61,7 +61,8 @@ def test_integration_link_prediction():
    )
    expected = [
        str(
-            """DGLMiniBatch(positive_node_pairs=(tensor([0, 1, 1, 1]), tensor([2, 3, 3, 1])),
+            """DGLMiniBatch(positive_node_pairs=(tensor([0, 1, 1, 1]),
+                                  tensor([2, 3, 3, 1])),
             output_nodes=None,
             node_features={'feat': tensor([[0.5160, 0.2486],
                                    [0.8672, 0.2276],
@@ -69,58 +70,51 @@ def test_integration_link_prediction():
                                    [0.2109, 0.1089],
                                    [0.9634, 0.2294],
                                    [0.5503, 0.8223]])},
-             negative_node_pairs=(tensor([0, 1, 1, 1]), tensor([0, 3, 4, 5])),
+             negative_node_pairs=(tensor([0, 1, 1, 1]),
+                                  tensor([0, 3, 4, 5])),
             labels=None,
             input_nodes=None,
             edge_features=[{},
                            {}],
-             blocks=[Block(num_src_nodes=6,
-                           num_dst_nodes=6,
-                           num_edges=2),
-                     Block(num_src_nodes=6,
-                           num_dst_nodes=6,
-                           num_edges=2)],
+             blocks=[Block(num_src_nodes=6, num_dst_nodes=6, num_edges=2),
+                     Block(num_src_nodes=6, num_dst_nodes=6, num_edges=2)],
          )"""
        ),
        str(
-            """DGLMiniBatch(positive_node_pairs=(tensor([0, 1, 1, 2]), tensor([0, 0, 1, 1])),
+            """DGLMiniBatch(positive_node_pairs=(tensor([0, 1, 1, 2]),
+                                  tensor([0, 0, 1, 1])),
             output_nodes=None,
             node_features={'feat': tensor([[0.8672, 0.2276],
                                    [0.5503, 0.8223],
                                    [0.9634, 0.2294],
                                    [0.5160, 0.2486],
                                    [0.6172, 0.7865]])},
-             negative_node_pairs=(tensor([0, 1, 1, 2]), tensor([1, 3, 4, 1])),
+             negative_node_pairs=(tensor([0, 1, 1, 2]),
+                                  tensor([1, 3, 4, 1])),
             labels=None,
             input_nodes=None,
             edge_features=[{},
                            {}],
-             blocks=[Block(num_src_nodes=5,
-                           num_dst_nodes=5,
-                           num_edges=2),
-                     Block(num_src_nodes=5,
-                           num_dst_nodes=5,
-                           num_edges=2)],
+             blocks=[Block(num_src_nodes=5, num_dst_nodes=5, num_edges=2),
+                     Block(num_src_nodes=5, num_dst_nodes=5, num_edges=2)],
          )"""
        ),
        str(
-            """DGLMiniBatch(positive_node_pairs=(tensor([0, 1]), tensor([0, 0])),
+            """DGLMiniBatch(positive_node_pairs=(tensor([0, 1]),
+                                  tensor([0, 0])),
             output_nodes=None,
             node_features={'feat': tensor([[0.5160, 0.2486],
                                    [0.5503, 0.8223],
                                    [0.8672, 0.2276],
                                    [0.9634, 0.2294]])},
-             negative_node_pairs=(tensor([0, 1]), tensor([1, 2])),
+             negative_node_pairs=(tensor([0, 1]),
+                                  tensor([1, 2])),
             labels=None,
             input_nodes=None,
             edge_features=[{},
                            {}],
-             blocks=[Block(num_src_nodes=4,
-                           num_dst_nodes=4,
-                           num_edges=2),
-                     Block(num_src_nodes=4,
-                           num_dst_nodes=3,
-                           num_edges=2)],
+             blocks=[Block(num_src_nodes=4, num_dst_nodes=4, num_edges=2),
+                     Block(num_src_nodes=4, num_dst_nodes=3, num_edges=2)],
          )"""
        ),
    ]
@@ -183,7 +177,8 @@ def test_integration_node_classification():
    )
    expected = [
        str(
-            """DGLMiniBatch(positive_node_pairs=(tensor([0, 1, 1, 1]), tensor([2, 3, 3, 1])),
+            """DGLMiniBatch(positive_node_pairs=(tensor([0, 1, 1, 1]),
+                                  tensor([2, 3, 3, 1])),
             output_nodes=None,
             node_features={'feat': tensor([[0.5160, 0.2486],
                                    [0.8672, 0.2276],
@@ -196,16 +191,13 @@ def test_integration_node_classification():
             input_nodes=None,
             edge_features=[{},
                            {}],
-             blocks=[Block(num_src_nodes=6,
-                           num_dst_nodes=5,
-                           num_edges=5),
-                     Block(num_src_nodes=5,
-                           num_dst_nodes=4,
-                           num_edges=4)],
+             blocks=[Block(num_src_nodes=6, num_dst_nodes=5, num_edges=5),
+                     Block(num_src_nodes=5, num_dst_nodes=4, num_edges=4)],
          )"""
        ),
        str(
-            """DGLMiniBatch(positive_node_pairs=(tensor([0, 1, 1, 2]), tensor([0, 0, 1, 1])),
+            """DGLMiniBatch(positive_node_pairs=(tensor([0, 1, 1, 2]),
+                                  tensor([0, 0, 1, 1])),
             output_nodes=None,
             node_features={'feat': tensor([[0.8672, 0.2276],
                                    [0.5503, 0.8223],
@@ -215,16 +207,13 @@ def test_integration_node_classification():
             input_nodes=None,
             edge_features=[{},
                            {}],
-             blocks=[Block(num_src_nodes=3,
-                           num_dst_nodes=3,
-                           num_edges=2),
-                     Block(num_src_nodes=3,
-                           num_dst_nodes=3,
-                           num_edges=2)],
+             blocks=[Block(num_src_nodes=3, num_dst_nodes=3, num_edges=2),
+                     Block(num_src_nodes=3, num_dst_nodes=3, num_edges=2)],
          )"""
        ),
        str(
-            """DGLMiniBatch(positive_node_pairs=(tensor([0, 1]), tensor([0, 0])),
+            """DGLMiniBatch(positive_node_pairs=(tensor([0, 1]),
+                                  tensor([0, 0])),
             output_nodes=None,
             node_features={'feat': tensor([[0.5160, 0.2486],
                                    [0.5503, 0.8223],
@@ -234,12 +223,8 @@ def test_integration_node_classification():
             input_nodes=None,
             edge_features=[{},
                            {}],
-             blocks=[Block(num_src_nodes=3,
-                           num_dst_nodes=2,
-                           num_edges=2),
-                     Block(num_src_nodes=2,
-                           num_dst_nodes=2,
-                           num_edges=2)],
+             blocks=[Block(num_src_nodes=3, num_dst_nodes=2, num_edges=2),
+                     Block(num_src_nodes=2, num_dst_nodes=2, num_edges=2)],
          )"""
        ),
    ]