Unverified Commit 427a5a96 authored by Quan (Andy) Gan's avatar Quan (Andy) Gan Committed by GitHub
Browse files

[Doc] Update NodeDataLoader and EdgeDataLoader for GPU-based neighbor sampling (#3046)

* update docstrings and tidy code

* add docs

* address comments

* Update __init__.py

* address comments
parent acd21a6d
......@@ -75,7 +75,7 @@ def run(proc_id, n_gpus, args, devices, data):
# Create PyTorch DataLoader for constructing blocks
n_edges = g.num_edges()
train_seeds = np.arange(n_edges)
train_seeds = th.arange(n_edges)
# Create sampler
sampler = dgl.dataloading.MultiLayerNeighborSampler(
......@@ -85,13 +85,13 @@ def run(proc_id, n_gpus, args, devices, data):
# For each edge with ID e in Reddit dataset, the reverse edge is e ± |E|/2.
reverse_eids=th.cat([
th.arange(n_edges // 2, n_edges),
th.arange(0, n_edges // 2)]),
th.arange(0, n_edges // 2)]).to(train_seeds),
negative_sampler=NegativeSampler(g, args.num_negs, args.neg_share),
device=device,
use_ddp=n_gpus > 1,
batch_size=args.batch_size,
shuffle=True,
drop_last=False,
pin_memory=True,
num_workers=args.num_workers)
# Define model and optimizer
......@@ -174,7 +174,7 @@ def main(args, devices):
test_mask = g.ndata['test_mask']
# Create csr/coo/csc formats before launching training processes with multi-gpu.
# This avoids creating certain formats in each sub-process, which saves momory and CPU.
# This avoids creating certain formats in each sub-process, which saves memory and CPU.
g.create_formats_()
# Pack data
data = train_mask, val_mask, test_mask, n_classes, g
......
......@@ -577,7 +577,7 @@ class EdgeCollator(Collator):
>>> neg_sampler = dgl.dataloading.negative_sampler.Uniform(5)
>>> collator = dgl.dataloading.EdgeCollator(
... g, train_eid, sampler, exclude='reverse_id',
... reverse_eids=reverse_eids, negative_sampler=neg_sampler,
... reverse_eids=reverse_eids, negative_sampler=neg_sampler)
>>> dataloader = torch.utils.data.DataLoader(
... collator.dataset, collate_fn=collator.collate,
... batch_size=1024, shuffle=True, drop_last=False, num_workers=4)
......
This diff is collapsed.
This diff is collapsed.
......@@ -26,7 +26,7 @@ def sample_neighbors(g, nodes, fanout, edge_dir='in', prob=None, replace=False,
Parameters
----------
g : DGLGraph
The graph. Must be on CPU.
The graph. Can be either on CPU or GPU.
nodes : tensor or dict
Node IDs to sample neighbors from.
......@@ -53,6 +53,8 @@ def sample_neighbors(g, nodes, fanout, edge_dir='in', prob=None, replace=False,
The features must be non-negative floats, and the sum of the features of
inbound/outbound edges for every node must be positive (though they don't have
to sum up to one). Otherwise, the result will be undefined.
If :attr:`prob` is not None, GPU sampling is not supported.
replace : bool, optional
If True, sample with replacement.
copy_ndata: bool, optional
......@@ -75,7 +77,8 @@ def sample_neighbors(g, nodes, fanout, edge_dir='in', prob=None, replace=False,
Returns
-------
DGLGraph
A sampled subgraph containing only the sampled neighboring edges. It is on CPU.
A sampled subgraph containing only the sampled neighboring edges, with the
same device as the input graph.
Notes
-----
......
......@@ -2071,7 +2071,7 @@ def to_block(g, dst_nodes=None, include_dst_in_src=True):
Parameters
----------
graph : DGLGraph
The graph.
The graph. Can be either on CPU or GPU.
dst_nodes : Tensor or dict[str, Tensor], optional
The list of destination nodes.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment