Unverified Commit d41d07d0 authored by Quan (Andy) Gan's avatar Quan (Andy) Gan Committed by GitHub
Browse files

[Doc and bugfix] Add docs and user guide and update tutorial for sampling pipeline (#3774)



* huuuuge update

* remove

* lint

* lint

* fix

* what happened to nccl

* update multi-gpu unsupervised graphsage example

* replace most of the dgl.mp.process with torch.mp.spawn

* update if condition for use_uva case

* update user guide

* address comments

* incorporating suggestions from @jermainewang

* oops

* fix tutorial to pass CI

* oops

* fix again
Co-authored-by: default avatarXin Yao <xiny@nvidia.com>
parent 3bd5a9b6
......@@ -40,7 +40,45 @@ class _LazyIndex(object):
return flat_index
class LazyFeature(object):
"""Placeholder for prefetching from DataLoader.
"""Placeholder for feature prefetching.
One can assign this object to ``ndata`` or ``edata`` of the graphs returned by various
samplers' :attr:`sample` method. When DGL's dataloader receives the subgraphs
returned by the sampler, it will automatically look up all the ``ndata`` and ``edata``
whose data is a LazyFeature, replacing them with the actual data of the corresponding
nodes/edges from the original graph instead. In particular, for a subgraph returned
by the sampler has a LazyFeature with name ``k`` in ``subgraph.ndata[key]``:
.. code:: python
subgraph.ndata[key] = LazyFeature(k)
Assuming that ``graph`` is the original graph, DGL's dataloader will perform
.. code:: python
subgraph.ndata[key] = graph.ndata[k][subgraph.ndata[dgl.NID]]
DGL dataloader performs similar replacement for ``edata``.
For heterogeneous graphs, the replacement is:
.. code:: python
subgraph.nodes[ntype].data[key] = graph.nodes[ntype].data[k][
subgraph.nodes[ntype].data[dgl.NID]]
For MFGs' ``srcdata`` (and similarly ``dstdata``), the replacement is
.. code:: python
mfg.srcdata[key] = graph.ndata[k][mfg.srcdata[dgl.NID]]
Parameters
----------
name : str
The name of the data in the original graph.
id_ : Tensor, optional
The ID tensor.
"""
__slots__ = ['name', 'id_']
def __init__(self, name=None, id_=None):
......
......@@ -910,8 +910,73 @@ def alias_func(func):
_fn.__doc__ = """Alias of :func:`dgl.{}`.""".format(func.__name__)
return _fn
def apply_each(data, fn, *args, **kwargs):
"""Apply a function to every element in a container.
If the input data is a list or any sequence other than a string, returns a list
whose elements are the same elements applied with the given function.
If the input data is a dict or any mapping, returns a dict whose keys are the same
and values are the elements applied with the given function.
The first argument of the function will be passed with the individual elements from
the input data, followed by the arguments in :attr:`args` and :attr:`kwargs`.
Parameters
----------
data : any
Any object.
fn : callable
Any function.
args, kwargs :
Additional arguments and keyword-arguments passed to the function.
Examples
--------
Applying a ReLU function to a dictionary of tensors:
>>> h = {k: torch.randn(3) for k in ['A', 'B', 'C']}
>>> h = apply_each(h, torch.nn.functional.relu)
>>> assert all((v >= 0).all() for v in h.values())
"""
if isinstance(data, Mapping):
return {k: fn(v, *args, **kwargs) for k, v in data.items()}
elif isinstance(data, Sequence):
return [fn(v, *args, **kwargs) for v in data]
else:
return fn(data, *args, **kwargs)
def recursive_apply(data, fn, *args, **kwargs):
"""Recursively apply a function to every element in a container.
If the input data is a list or any sequence other than a string, returns a list
whose elements are the same elements applied with the given function.
If the input data is a dict or any mapping, returns a dict whose keys are the same
and values are the elements applied with the given function.
If the input data is a nested container, the result will have the same nested
structure where each element is transformed recursively.
The first argument of the function will be passed with the individual elements from
the input data, followed by the arguments in :attr:`args` and :attr:`kwargs`.
Parameters
----------
data : any
Any object.
fn : callable
Any function.
args, kwargs :
Additional arguments and keyword-arguments passed to the function.
Examples
--------
Applying a ReLU function to a dictionary of tensors:
>>> h = {k: torch.randn(3) for k in ['A', 'B', 'C']}
>>> h = recursive_apply(h, torch.nn.functional.relu)
>>> assert all((v >= 0).all() for v in h.values())
"""
if isinstance(data, str): # str is a Sequence
return fn(data, *args, **kwargs)
......
......@@ -79,6 +79,8 @@ HeteroSubgraph SampleNeighbors(
CHECK_EQ(prob.size(), hg->NumEdgeTypes())
<< "Number of probability tensors must match the number of edge types.";
DLContext ctx = aten::GetContextOf(nodes);
std::vector<HeteroGraphPtr> subrels(hg->NumEdgeTypes());
std::vector<IdArray> induced_edges(hg->NumEdgeTypes());
for (dgl_type_t etype = 0; etype < hg->NumEdgeTypes(); ++etype) {
......@@ -93,8 +95,8 @@ HeteroSubgraph SampleNeighbors(
hg->GetRelationGraph(etype)->NumVertexTypes(),
hg->NumVertices(src_vtype),
hg->NumVertices(dst_vtype),
hg->DataType(), hg->Context());
induced_edges[etype] = aten::NullArray(hg->DataType(), hg->Context());
hg->DataType(), ctx);
induced_edges[etype] = aten::NullArray(hg->DataType(), ctx);
} else if (fanouts[etype] == -1) {
const auto &earr = (dir == EdgeDir::kOut) ?
hg->OutEdges(etype, nodes_ntype) :
......
......@@ -85,11 +85,11 @@ test_nids = idx_split['test']
# DGL provides tools to iterate over the dataset in minibatches
# while generating the computation dependencies to compute their outputs
# with the MFGs above. For node classification, you can use
# ``dgl.dataloading.NodeDataLoader`` for iterating over the dataset.
# ``dgl.dataloading.DataLoader`` for iterating over the dataset.
# It accepts a sampler object to control how to generate the computation
# dependencies in the form of MFGs. DGL provides
# implementations of common sampling algorithms such as
# ``dgl.dataloading.MultiLayerNeighborSampler`` which randomly picks
# ``dgl.dataloading.NeighborSampler`` which randomly picks
# a fixed number of neighbors for each node.
#
# .. note::
......@@ -97,7 +97,7 @@ test_nids = idx_split['test']
# To write your own neighbor sampler, please refer to :ref:`this user
# guide section <guide-minibatch-customizing-neighborhood-sampler>`.
#
# The syntax of ``dgl.dataloading.NodeDataLoader`` is mostly similar to a
# The syntax of ``dgl.dataloading.DataLoader`` is mostly similar to a
# PyTorch ``DataLoader``, with the addition that it needs a graph to
# generate computation dependency from, a set of node IDs to iterate on,
# and the neighbor sampler you defined.
......@@ -107,9 +107,9 @@ test_nids = idx_split['test']
# like the following.
#
sampler = dgl.dataloading.MultiLayerNeighborSampler([4, 4])
train_dataloader = dgl.dataloading.NodeDataLoader(
# The following arguments are specific to NodeDataLoader.
sampler = dgl.dataloading.NeighborSampler([4, 4])
train_dataloader = dgl.dataloading.DataLoader(
# The following arguments are specific to DGL's DataLoader.
graph, # The graph
train_nids, # The node IDs to iterate over in minibatches
sampler, # The neighbor sampler
......@@ -141,7 +141,7 @@ print("To compute {} nodes' outputs, we need {} nodes' input features".format(le
######################################################################
# ``NodeDataLoader`` gives us three items per iteration.
# DGL's ``DataLoader`` gives us three items per iteration.
#
# - An ID tensor for the input nodes, i.e., nodes whose input features
# are needed on the first GNN layer for this minibatch.
......@@ -262,7 +262,7 @@ opt = torch.optim.Adam(model.parameters())
# loader.
#
valid_dataloader = dgl.dataloading.NodeDataLoader(
valid_dataloader = dgl.dataloading.DataLoader(
graph, valid_nids, sampler,
batch_size=1024,
shuffle=False,
......
......@@ -91,7 +91,7 @@ test_nids = idx_split['test']
# in a similar fashion introduced in the :doc:`large-scale node classification
# tutorial <L1_large_node_classification>`.
#
# DGL provides ``dgl.dataloading.EdgeDataLoader`` to
# DGL provides ``dgl.dataloading.as_edge_prediction_sampler`` to
# iterate over edges for edge classification or link prediction tasks.
#
# To perform link prediction, you need to specify a negative sampler. DGL
......@@ -105,18 +105,19 @@ negative_sampler = dgl.dataloading.negative_sampler.Uniform(5)
######################################################################
# After defining the negative sampler, one can then define the edge data
# loader with neighbor sampling. To create an ``EdgeDataLoader`` for
# loader with neighbor sampling. To create an ``DataLoader`` for
# link prediction, provide a neighbor sampler object as well as the negative
# sampler object created above.
#
sampler = dgl.dataloading.MultiLayerNeighborSampler([4, 4])
train_dataloader = dgl.dataloading.EdgeDataLoader(
# The following arguments are specific to EdgeDataLoader.
sampler = dgl.dataloading.NeighborSampler([4, 4])
sampler = dgl.dataloading.as_edge_prediction_sampler(
sampler, negative_sampler=negative_sampler)
train_dataloader = dgl.dataloading.DataLoader(
# The following arguments are specific to DataLoader.
graph, # The graph
torch.arange(graph.number_of_edges()), # The edges to iterate over
sampler, # The neighbor sampler
negative_sampler=negative_sampler, # The negative sampler
device=device, # Put the MFGs on CPU or GPU
# The following arguments are inherited from PyTorch DataLoader.
batch_size=1024, # Batch size
......@@ -247,8 +248,8 @@ def inference(model, graph, node_features):
with torch.no_grad():
nodes = torch.arange(graph.number_of_nodes())
sampler = dgl.dataloading.MultiLayerNeighborSampler([4, 4])
train_dataloader = dgl.dataloading.NodeDataLoader(
sampler = dgl.dataloading.NeighborSampler([4, 4])
train_dataloader = dgl.dataloading.DataLoader(
graph, torch.arange(graph.number_of_nodes()), sampler,
batch_size=1024,
shuffle=False,
......@@ -390,80 +391,27 @@ test_neg_dst = torch.randint(0, graph.num_nodes(), (n_test_pos,))
######################################################################
# First you need to construct a graph for ``dgl.dataloading.EdgeDataLoader``
# to iterate on, i.e. with the testing node pairs as edges.
# You also need to label the edges, 1 if positive and 0 if negative.
# First you need to compute the node representations for all the nodes
# with the ``inference`` method above:
#
test_src = torch.cat([test_pos_src, test_pos_dst])
test_dst = torch.cat([test_neg_src, test_neg_dst])
test_graph = dgl.graph((test_src, test_dst), num_nodes=graph.num_nodes())
test_ground_truth = torch.cat(
[torch.ones_like(test_pos_src), torch.zeros_like(test_neg_src)])
######################################################################
# You will need to merge the test graph with the original graph. The
# testing edges' ID will be starting from ``graph.num_edges()``.
#
new_graph = dgl.merge([graph, test_graph])
test_edge_ids = torch.arange(graph.num_edges(), new_graph.num_edges())
######################################################################
# Then you could create a new ``EdgeDataLoader`` instance that
# iterates on the new ``test_graph``, but uses the original ``graph``
# for neighbor sampling.
#
# Note that you do not need negative sampling in this dataloader: the
# negative pairs are already in the new test graph.
#
test_dataloader = dgl.dataloading.EdgeDataLoader(
# The following arguments are specific to EdgeDataLoader.
new_graph, # The graph to iterate edges over
test_edge_ids, # The edges to iterate over
sampler, # The neighbor sampler
device=device, # Put the MFGs on CPU or GPU
exclude=test_edge_ids, # Do not sample test edges as neighbors
# The following arguments are inherited from PyTorch DataLoader.
batch_size=1024, # Batch size
shuffle=True, # Whether to shuffle the nodes for every epoch
drop_last=False, # Whether to drop the last incomplete batch
num_workers=0 # Number of sampler processes
)
node_reprs = inference(model, graph, node_features)
######################################################################
# The rest is similar to training except that you no longer compute
# the gradients, and you collect all the scores and ground truth
# labels for final metric calculation.
#
# .. note::
#
# If the graph does not change, you can also precompute all the
# node representations beforehand with ``inference`` function.
# You can then feed the precomputed results directly into the
# predictor without passing the MFGs into the model.
# Since the predictor is a dot product, you can now easily compute the
# score of positive and negative test pairs to compute metrics such
# as AUC:
#
test_preds = []
test_labels = []
with tqdm.tqdm(test_dataloader) as tq, torch.no_grad():
for step, (input_nodes, pair_graph, mfgs) in enumerate(tq):
# feature copy from CPU to GPU takes place here
inputs = mfgs[0].srcdata['feat']
outputs = model(mfgs, inputs)
test_preds.append(predictor(pair_graph, outputs))
test_labels.append(
# Need to map the IDs of test edges in the merged graph back
# to that of test_ground_truth.
test_ground_truth[pair_graph.edata[dgl.EID] - graph.num_edges()])
h_pos_src = node_reprs[test_pos_src]
h_pos_dst = node_reprs[test_pos_dst]
h_neg_src = node_reprs[test_neg_src]
h_neg_dst = node_reprs[test_neg_dst]
score_pos = (h_pos_src * h_pos_dst).sum(1)
score_neg = (h_neg_src * h_neg_dst).sum(1)
test_preds = torch.cat([score_pos, score_neg]).cpu().numpy()
test_labels = torch.cat([torch.ones_like(score_pos), torch.zeros_like(score_neg)]).cpu().numpy()
test_preds = torch.cat(test_preds).cpu().numpy()
test_labels = torch.cat(test_labels).cpu().numpy()
auc = sklearn.metrics.roc_auc_score(test_labels, test_preds)
print('Link Prediction AUC:', auc)
......
......@@ -74,7 +74,7 @@ import torch.distributed as dist
def init_process_group(world_size, rank):
dist.init_process_group(
backend='nccl',
backend='gloo', # change to 'nccl' for multiple GPUs
init_method='tcp://127.0.0.1:12345',
world_size=world_size,
rank=rank)
......@@ -144,7 +144,10 @@ from torch.nn.parallel import DistributedDataParallel
def init_model(seed, device):
torch.manual_seed(seed)
model = GIN().to(device)
model = DistributedDataParallel(model, device_ids=[device], output_device=device)
if device.type == 'cpu':
model = DistributedDataParallel(model)
else:
model = DistributedDataParallel(model, device_ids=[device], output_device=device)
return model
......@@ -182,9 +185,11 @@ from torch.optim import Adam
def main(rank, world_size, dataset, seed=0):
init_process_group(world_size, rank)
# Assume the GPU ID to be the same as the process ID
device = torch.device('cuda:{:d}'.format(rank))
torch.cuda.set_device(device)
if torch.cuda.is_available():
device = torch.device('cuda:{:d}'.format(rank))
torch.cuda.set_device(device)
else:
device = torch.device('cpu')
model = init_model(seed, device)
criterion = nn.CrossEntropyLoss()
......@@ -223,28 +228,16 @@ def main(rank, world_size, dataset, seed=0):
###############################################################################
# Finally we load the dataset and launch the processes.
#
# .. note::
#
# You will need to use ``dgl.multiprocessing`` instead of the Python
# ``multiprocessing`` package. ``dgl.multiprocessing`` is identical to
# Python’s built-in ``multiprocessing`` except that it handles the
# subtleties between forking and multithreading in Python.
#
if __name__ == '__main__':
import dgl.multiprocessing as mp
import torch.multiprocessing as mp
from dgl.data import GINDataset
num_gpus = 4
procs = []
dataset = GINDataset(name='IMDBBINARY', self_loop=False)
for rank in range(num_gpus):
p = mp.Process(target=main, args=(rank, num_gpus, dataset))
p.start()
procs.append(p)
for p in procs:
p.join()
mp.spawn(main, args=(num_gpus, dataset), nprocs=num_gpus)
# Thumbnail credits: DGL
# sphinx_gallery_thumbnail_path = '_static/blitz_5_graph_classification.png'
......@@ -118,8 +118,8 @@ def run(proc_id, devices):
# Define training and validation dataloader, copied from the previous tutorial
# but with one line of difference: use_ddp to enable distributed data parallel
# data loading.
sampler = dgl.dataloading.MultiLayerNeighborSampler([4, 4])
train_dataloader = dgl.dataloading.NodeDataLoader(
sampler = dgl.dataloading.NeighborSampler([4, 4])
train_dataloader = dgl.dataloading.DataLoader(
# The following arguments are specific to NodeDataLoader.
graph, # The graph
train_nids, # The node IDs to iterate over in minibatches
......@@ -133,7 +133,7 @@ def run(proc_id, devices):
drop_last=False, # Whether to drop the last incomplete batch
num_workers=0 # Number of sampler processes
)
valid_dataloader = dgl.dataloading.NodeDataLoader(
valid_dataloader = dgl.dataloading.DataLoader(
graph, valid_nids, sampler,
device=device,
use_ddp=False,
......@@ -247,16 +247,10 @@ graph.create_formats_()
#
# Say you have four GPUs.
num_gpus = 4
import dgl.multiprocessing as mp
devices = list(range(num_gpus))
procs = []
for proc_id in range(num_gpus):
p = mp.Process(target=run, args=(proc_id, devices))
p.start()
procs.append(p)
for p in procs:
p.join()
if __name__ == '__main__':
num_gpus = 4
import torch.multiprocessing as mp
mp.spawn(run, args=(list(range(num_gpus)),), nprocs=num_gpus)
# Thumbnail credits: Stanford CS224W Notes
# sphinx_gallery_thumbnail_path = '_static/blitz_1_introduction.png'
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment