[GraphBolt] Fix Link Prediction eample evaluation function bug and Enable cuda execution (#6522)

Co-authored-by: Ubuntu <ubuntu@ip-172-31-42-141.ap-northeast-1.compute.internal>

[GraphBolt] Fix Link Prediction eample evaluation function bug and Enable cuda execution (#6522)
Co-authored-by: Ubuntu <ubuntu@ip-172-31-42-141.ap-northeast-1.compute.internal>
a24a38bc · LastWhisper · GitHub · 65efb4f5 · a24a38bc · a24a38bc
Unverified Commit a24a38bc authored Nov 08, 2023 by LastWhisper Committed by GitHub Nov 08, 2023
3 changed files
--- a/examples/sampling/graphbolt/link_prediction.py
+++ b/examples/sampling/graphbolt/link_prediction.py
@@ -103,7 +103,9 @@ def create_dataloader(args, graph, features, itemset, is_train=True):
    # Initialize the ItemSampler to sample mini-batche from the dataset.
    ############################################################################
    datapipe = gb.ItemSampler(
-        itemset, batch_size=args.batch_size, shuffle=is_train
+        itemset,
+        batch_size=args.train_batch_size if is_train else args.eval_batch_size,
+        shuffle=is_train,
    )

    ############################################################################
@@ -205,7 +207,7 @@ def create_dataloader(args, graph, features, itemset, is_train=True):
    return dataloader


-def to_binary_link_dgl_computing_pack(data: gb.MiniBatch):
+def to_binary_link_dgl_computing_pack(data: gb.DGLMiniBatch):
    """Convert the minibatch to a training pair and a label tensor."""
    pos_src, pos_dst = data.positive_node_pairs
    neg_src, neg_dst = data.negative_node_pairs
@@ -249,8 +251,8 @@ def evaluate(args, graph, features, itemset, model):
        )

        # Split the score into positive and negative parts.
-        pos_score = score[: data.compacted_node_pairs[0].shape[0]]
-        neg_score = score[data.compacted_node_pairs[0].shape[0] :]
+        pos_score = score[: data.positive_node_pairs[0].shape[0]]
+        neg_score = score[data.positive_node_pairs[0].shape[0] :]

        # Append the score to the list.
        pos_pred.append(pos_score)
@@ -311,7 +313,11 @@ def parse_args():
    parser.add_argument("--epochs", type=int, default=10)
    parser.add_argument("--lr", type=float, default=0.0005)
    parser.add_argument("--neg-ratio", type=int, default=1)
-    parser.add_argument("--batch-size", type=int, default=512)
+    parser.add_argument("--train-batch-size", type=int, default=512)
+    # TODO [Issue#6534]: Use model.inference instead of dataloader to evaluate.
+    # Since neg_ratio in valid/test set is 1000, which is too large to GPU
+    # memory, we should use small batch size to evaluate.
+    parser.add_argument("--eval-batch-size", type=int, default=2)
    parser.add_argument("--num-workers", type=int, default=4)
    parser.add_argument(
        "--early-stop",
@@ -350,7 +356,8 @@ def main(args):

    in_size = features.size("node", None, "feat")[0]
    hidden_channels = 256
-    model = SAGE(in_size, hidden_channels)
+    args.device = torch.device(args.device)
+    model = SAGE(in_size, hidden_channels).to(args.device)

    # Model training.
    print("Training...")

--- a/python/dgl/graphbolt/subgraph_sampler.py
+++ b/python/dgl/graphbolt/subgraph_sampler.py
@@ -116,8 +116,17 @@ class SubgraphSampler(MiniBatchTransformer):
            compacted = compacted[2:]
            if has_neg_src:
                compacted_negative_srcs = compacted.pop(0)
+                # Since we need to calculate the neg_ratio according to the
+                # compacted_negatvie_srcs shape, we need to reshape it back.
+                compacted_negative_srcs = compacted_negative_srcs.view(
+                    neg_src.shape
+                )
            if has_neg_dst:
                compacted_negative_dsts = compacted.pop(0)
+                # Same as above.
+                compacted_negative_dsts = compacted_negative_dsts.view(
+                    neg_dst.shape
+                )
        return (
            seeds,
            compacted_node_pairs,

--- a/tests/python/pytorch/graphbolt/impl/test_minibatch.py
+++ b/tests/python/pytorch/graphbolt/impl/test_minibatch.py
@@ -153,8 +153,8 @@ def test_minibatch_representation():
    negative_dsts = torch.tensor([[2], [8], [8]])
    input_nodes = torch.tensor([8, 1, 6, 5, 9, 0, 2, 4])
    compacted_node_pairs = (torch.tensor([0, 1, 2]), torch.tensor([3, 4, 5]))
-    compacted_negative_srcs = torch.tensor([0, 1, 2])
-    compacted_negative_dsts = torch.tensor([6, 0, 0])
+    compacted_negative_srcs = torch.tensor([[0], [1], [2]])
+    compacted_negative_dsts = torch.tensor([[6], [0], [0]])
    labels = torch.tensor([0.0, 1.0, 2.0])
    # Test minibatch without data.
    minibatch = gb.MiniBatch()
@@ -217,8 +217,12 @@ def test_minibatch_representation():
                                       [8],
                                       [8]])}],
          compacted_node_pairs=(tensor([0, 1, 2]), tensor([3, 4, 5])),
-          compacted_negative_srcs=tensor([0, 1, 2]),
-          compacted_negative_dsts=tensor([6, 0, 0]),
+          compacted_negative_srcs=tensor([[0],
+                                          [1],
+                                          [2]]),
+          compacted_negative_dsts=tensor([[6],
+                                          [0],
+                                          [0]]),
       )"""
    )
    result = str(minibatch)
@@ -267,8 +271,8 @@ def test_dgl_minibatch_representation():
    negative_dsts = torch.tensor([[2], [8], [8]])
    input_nodes = torch.tensor([8, 1, 6, 5, 9, 0, 2, 4])
    compacted_node_pairs = (torch.tensor([0, 1, 2]), torch.tensor([3, 4, 5]))
-    compacted_negative_srcs = torch.tensor([0, 1, 2])
-    compacted_negative_dsts = torch.tensor([6, 0, 0])
+    compacted_negative_srcs = torch.tensor([[0], [1], [2]])
+    compacted_negative_dsts = torch.tensor([[6], [0], [0]])
    labels = torch.tensor([0.0, 1.0, 2.0])
    # Test dglminibatch with all attributes.
    minibatch = gb.MiniBatch(