Unverified Commit 427a5a96 authored by Quan (Andy) Gan's avatar Quan (Andy) Gan Committed by GitHub
Browse files

[Doc] Update NodeDataLoader and EdgeDataLoader for GPU-based neighbor sampling (#3046)

* update docstrings and tidy code

* add docs

* address comments

* Update __init__.py

* address comments
parent acd21a6d
...@@ -75,7 +75,7 @@ def run(proc_id, n_gpus, args, devices, data): ...@@ -75,7 +75,7 @@ def run(proc_id, n_gpus, args, devices, data):
# Create PyTorch DataLoader for constructing blocks # Create PyTorch DataLoader for constructing blocks
n_edges = g.num_edges() n_edges = g.num_edges()
train_seeds = np.arange(n_edges) train_seeds = th.arange(n_edges)
# Create sampler # Create sampler
sampler = dgl.dataloading.MultiLayerNeighborSampler( sampler = dgl.dataloading.MultiLayerNeighborSampler(
...@@ -85,13 +85,13 @@ def run(proc_id, n_gpus, args, devices, data): ...@@ -85,13 +85,13 @@ def run(proc_id, n_gpus, args, devices, data):
# For each edge with ID e in Reddit dataset, the reverse edge is e ± |E|/2. # For each edge with ID e in Reddit dataset, the reverse edge is e ± |E|/2.
reverse_eids=th.cat([ reverse_eids=th.cat([
th.arange(n_edges // 2, n_edges), th.arange(n_edges // 2, n_edges),
th.arange(0, n_edges // 2)]), th.arange(0, n_edges // 2)]).to(train_seeds),
negative_sampler=NegativeSampler(g, args.num_negs, args.neg_share), negative_sampler=NegativeSampler(g, args.num_negs, args.neg_share),
device=device,
use_ddp=n_gpus > 1, use_ddp=n_gpus > 1,
batch_size=args.batch_size, batch_size=args.batch_size,
shuffle=True, shuffle=True,
drop_last=False, drop_last=False,
pin_memory=True,
num_workers=args.num_workers) num_workers=args.num_workers)
# Define model and optimizer # Define model and optimizer
...@@ -174,7 +174,7 @@ def main(args, devices): ...@@ -174,7 +174,7 @@ def main(args, devices):
test_mask = g.ndata['test_mask'] test_mask = g.ndata['test_mask']
# Create csr/coo/csc formats before launching training processes with multi-gpu. # Create csr/coo/csc formats before launching training processes with multi-gpu.
# This avoids creating certain formats in each sub-process, which saves momory and CPU. # This avoids creating certain formats in each sub-process, which saves memory and CPU.
g.create_formats_() g.create_formats_()
# Pack data # Pack data
data = train_mask, val_mask, test_mask, n_classes, g data = train_mask, val_mask, test_mask, n_classes, g
......
...@@ -577,7 +577,7 @@ class EdgeCollator(Collator): ...@@ -577,7 +577,7 @@ class EdgeCollator(Collator):
>>> neg_sampler = dgl.dataloading.negative_sampler.Uniform(5) >>> neg_sampler = dgl.dataloading.negative_sampler.Uniform(5)
>>> collator = dgl.dataloading.EdgeCollator( >>> collator = dgl.dataloading.EdgeCollator(
... g, train_eid, sampler, exclude='reverse_id', ... g, train_eid, sampler, exclude='reverse_id',
... reverse_eids=reverse_eids, negative_sampler=neg_sampler, ... reverse_eids=reverse_eids, negative_sampler=neg_sampler)
>>> dataloader = torch.utils.data.DataLoader( >>> dataloader = torch.utils.data.DataLoader(
... collator.dataset, collate_fn=collator.collate, ... collator.dataset, collate_fn=collator.collate,
... batch_size=1024, shuffle=True, drop_last=False, num_workers=4) ... batch_size=1024, shuffle=True, drop_last=False, num_workers=4)
......
This diff is collapsed.
This diff is collapsed.
...@@ -26,7 +26,7 @@ def sample_neighbors(g, nodes, fanout, edge_dir='in', prob=None, replace=False, ...@@ -26,7 +26,7 @@ def sample_neighbors(g, nodes, fanout, edge_dir='in', prob=None, replace=False,
Parameters Parameters
---------- ----------
g : DGLGraph g : DGLGraph
The graph. Must be on CPU. The graph. Can be either on CPU or GPU.
nodes : tensor or dict nodes : tensor or dict
Node IDs to sample neighbors from. Node IDs to sample neighbors from.
...@@ -53,6 +53,8 @@ def sample_neighbors(g, nodes, fanout, edge_dir='in', prob=None, replace=False, ...@@ -53,6 +53,8 @@ def sample_neighbors(g, nodes, fanout, edge_dir='in', prob=None, replace=False,
The features must be non-negative floats, and the sum of the features of The features must be non-negative floats, and the sum of the features of
inbound/outbound edges for every node must be positive (though they don't have inbound/outbound edges for every node must be positive (though they don't have
to sum up to one). Otherwise, the result will be undefined. to sum up to one). Otherwise, the result will be undefined.
If :attr:`prob` is not None, GPU sampling is not supported.
replace : bool, optional replace : bool, optional
If True, sample with replacement. If True, sample with replacement.
copy_ndata: bool, optional copy_ndata: bool, optional
...@@ -75,7 +77,8 @@ def sample_neighbors(g, nodes, fanout, edge_dir='in', prob=None, replace=False, ...@@ -75,7 +77,8 @@ def sample_neighbors(g, nodes, fanout, edge_dir='in', prob=None, replace=False,
Returns Returns
------- -------
DGLGraph DGLGraph
A sampled subgraph containing only the sampled neighboring edges. It is on CPU. A sampled subgraph containing only the sampled neighboring edges, with the
same device as the input graph.
Notes Notes
----- -----
......
...@@ -2071,7 +2071,7 @@ def to_block(g, dst_nodes=None, include_dst_in_src=True): ...@@ -2071,7 +2071,7 @@ def to_block(g, dst_nodes=None, include_dst_in_src=True):
Parameters Parameters
---------- ----------
graph : DGLGraph graph : DGLGraph
The graph. The graph. Can be either on CPU or GPU.
dst_nodes : Tensor or dict[str, Tensor], optional dst_nodes : Tensor or dict[str, Tensor], optional
The list of destination nodes. The list of destination nodes.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment