[Benchmark] Modify node_classification test to enable `sample_layer_neighbors`. (#7129)

Co-authored-by: Ubuntu <ubuntu@ip-172-31-21-37.ap-northeast-1.compute.internal>

[Benchmark] Modify node_classification test to enable `sample_layer_neighbors`. (#7129)
Co-authored-by: Ubuntu <ubuntu@ip-172-31-21-37.ap-northeast-1.compute.internal>
e60262d3 · caojy1998 · GitHub · bfd7cee1 · e60262d3
Unverified Commit e60262d3 authored Feb 21, 2024 by caojy1998 Committed by GitHub Feb 21, 2024
Hide whitespace changes
Inline Side-by-side

Showing with 27 additions and 2 deletions

examples/sampling/graphbolt/node_classification.py examples/sampling/graphbolt/node_classification.py +27 -2

No files found.
--- a/examples/sampling/graphbolt/node_classification.py
+++ b/examples/sampling/graphbolt/node_classification.py
@@ -117,7 +117,7 @@ def create_dataloader(
    # [Role]:
    # Initialize a neighbor sampler for sampling the neighborhoods of nodes.
    ############################################################################
-    datapipe = datapipe.sample_neighbor(
+    datapipe = getattr(datapipe, args.sample_mode)(
        graph, fanout if job != "infer" else [-1]
    )
@@ -157,7 +157,11 @@ def create_dataloader(
    # [Role]:
    # Initialize a multi-process dataloader to load the data in parallel.
    ############################################################################
-    dataloader = gb.DataLoader(datapipe, num_workers=num_workers)
+    dataloader = gb.DataLoader(
+        datapipe,
+        num_workers=num_workers,
+        overlap_graph_fetch=args.overlap_graph_fetch,
+    )
    # Return the fully-initialized DataLoader object.
    return dataloader
@@ -357,6 +361,13 @@ def parse_args():
        help="Fan-out of neighbor sampling. It is IMPORTANT to keep len(fanout)"
        " identical with the number of layers in your model. Default: 10,10,10",
    )
+    parser.add_argument(
+        "--dataset",
+        type=str,
+        default="ogbn-products",
+        help="The dataset we can use for node classification example. Currently"
+        "dataset ogbn-products, ogbn-arxiv, ogbn-papers100M is supported.",
+    )
    parser.add_argument(
        "--mode",
        default="pinned-cuda",
@@ -364,6 +375,20 @@ def parse_args():
        help="Dataset storage placement and Train device: 'cpu' for CPU and RAM,"
        " 'pinned' for pinned memory in RAM, 'cuda' for GPU and GPU memory.",
    )
+    parser.add_argument(
+        "--sample-mode",
+        default="sample_neighbor",
+        choices=["sample_neighbor", "sample_layer_neighbor"],
+        help="The sampling function when doing layerwise sampling.",
+    )
+    parser.add_argument(
+        "--overlap-graph-fetch",
+        action="store_true",
+        help="An option for enabling overlap_graph_fetch in graphbolt dataloader."
+        "If True, the data loader will overlap the UVA graph fetching operations"
+        "with the rest of operations by using an alternative CUDA stream. Disabled"
+        "by default.",
+    )
    return parser.parse_args()