[Doc] Update NodeDataLoader and EdgeDataLoader for GPU-based neighbor sampling (#3046)

* update docstrings and tidy code * add docs * address comments * Update __init__.py * address comments

[Doc] Update NodeDataLoader and EdgeDataLoader for GPU-based neighbor sampling (#3046)
* update docstrings and tidy code * add docs * address comments * Update __init__.py * address comments
427a5a96 · Quan (Andy) Gan · GitHub · acd21a6d · 427a5a96 · 427a5a96
Unverified Commit 427a5a96 authored Jun 25, 2021 by Quan (Andy) Gan Committed by GitHub Jun 25, 2021
6 changed files
--- a/examples/pytorch/graphsage/train_sampling_unsupervised.py
+++ b/examples/pytorch/graphsage/train_sampling_unsupervised.py
@@ -75,7 +75,7 @@ def run(proc_id, n_gpus, args, devices, data):

    # Create PyTorch DataLoader for constructing blocks
    n_edges = g.num_edges()
-    train_seeds = np.arange(n_edges)
+    train_seeds = th.arange(n_edges)

    # Create sampler
    sampler = dgl.dataloading.MultiLayerNeighborSampler(
@@ -85,13 +85,13 @@ def run(proc_id, n_gpus, args, devices, data):
        # For each edge with ID e in Reddit dataset, the reverse edge is e ± |E|/2.
        reverse_eids=th.cat([
            th.arange(n_edges // 2, n_edges),
-            th.arange(0, n_edges // 2)]),
+            th.arange(0, n_edges // 2)]).to(train_seeds),
        negative_sampler=NegativeSampler(g, args.num_negs, args.neg_share),
+        device=device,
        use_ddp=n_gpus > 1,
        batch_size=args.batch_size,
        shuffle=True,
        drop_last=False,
-        pin_memory=True,
        num_workers=args.num_workers)

    # Define model and optimizer
@@ -174,7 +174,7 @@ def main(args, devices):
    test_mask = g.ndata['test_mask']

    # Create csr/coo/csc formats before launching training processes with multi-gpu.
-    # This avoids creating certain formats in each sub-process, which saves momory and CPU.
+    # This avoids creating certain formats in each sub-process, which saves memory and CPU.
    g.create_formats_()
    # Pack data
    data = train_mask, val_mask, test_mask, n_classes, g

--- a/python/dgl/dataloading/dataloader.py
+++ b/python/dgl/dataloading/dataloader.py
@@ -577,7 +577,7 @@ class EdgeCollator(Collator):
    >>> neg_sampler = dgl.dataloading.negative_sampler.Uniform(5)
    >>> collator = dgl.dataloading.EdgeCollator(
    ...     g, train_eid, sampler, exclude='reverse_id',
-    ...     reverse_eids=reverse_eids, negative_sampler=neg_sampler,
+    ...     reverse_eids=reverse_eids, negative_sampler=neg_sampler)
    >>> dataloader = torch.utils.data.DataLoader(
    ...     collator.dataset, collate_fn=collator.collate,
    ...     batch_size=1024, shuffle=True, drop_last=False, num_workers=4)

--- a/python/dgl/dataloading/pytorch/__init__.py
+++ b/python/dgl/dataloading/pytorch/__init__.py
--- a/python/dgl/dataloading/pytorch/dataloader.py
+++ b/python/dgl/dataloading/pytorch/dataloader.py
--- a/python/dgl/sampling/neighbor.py
+++ b/python/dgl/sampling/neighbor.py
@@ -26,7 +26,7 @@ def sample_neighbors(g, nodes, fanout, edge_dir='in', prob=None, replace=False,
    Parameters
    ----------
    g : DGLGraph
-        The graph.  Must be on CPU.
+        The graph.  Can be either on CPU or GPU.
    nodes : tensor or dict
        Node IDs to sample neighbors from.

@@ -53,6 +53,8 @@ def sample_neighbors(g, nodes, fanout, edge_dir='in', prob=None, replace=False,
        The features must be non-negative floats, and the sum of the features of
        inbound/outbound edges for every node must be positive (though they don't have
        to sum up to one).  Otherwise, the result will be undefined.
+
+        If :attr:`prob` is not None, GPU sampling is not supported.
    replace : bool, optional
        If True, sample with replacement.
    copy_ndata: bool, optional
@@ -75,7 +77,8 @@ def sample_neighbors(g, nodes, fanout, edge_dir='in', prob=None, replace=False,
    Returns
    -------
    DGLGraph
-        A sampled subgraph containing only the sampled neighboring edges.  It is on CPU.
+        A sampled subgraph containing only the sampled neighboring edges, with the
+        same device as the input graph.

    Notes
    -----

--- a/python/dgl/transform.py
+++ b/python/dgl/transform.py
@@ -2071,7 +2071,7 @@ def to_block(g, dst_nodes=None, include_dst_in_src=True):
    Parameters
    ----------
    graph : DGLGraph
-        The graph.
+        The graph.  Can be either on CPU or GPU.
    dst_nodes : Tensor or dict[str, Tensor], optional
        The list of destination nodes.