"examples/sampling/graphbolt/vscode:/vscode.git/clone" did not exist on "35b50b61a5c6fdb34bf6ea0df547ec669bfb17de"
Unverified Commit d41d07d0 authored by Quan (Andy) Gan's avatar Quan (Andy) Gan Committed by GitHub
Browse files

[Doc and bugfix] Add docs and user guide and update tutorial for sampling pipeline (#3774)



* huuuuge update

* remove

* lint

* lint

* fix

* what happened to nccl

* update multi-gpu unsupervised graphsage example

* replace most of the dgl.mp.process with torch.mp.spawn

* update if condition for use_uva case

* update user guide

* address comments

* incorporating suggestions from @jermainewang

* oops

* fix tutorial to pass CI

* oops

* fix again
Co-authored-by: default avatarXin Yao <xiny@nvidia.com>
parent 3bd5a9b6
...@@ -40,7 +40,45 @@ class _LazyIndex(object): ...@@ -40,7 +40,45 @@ class _LazyIndex(object):
return flat_index return flat_index
class LazyFeature(object): class LazyFeature(object):
"""Placeholder for prefetching from DataLoader. """Placeholder for feature prefetching.
One can assign this object to ``ndata`` or ``edata`` of the graphs returned by various
samplers' :attr:`sample` method. When DGL's dataloader receives the subgraphs
returned by the sampler, it will automatically look up all the ``ndata`` and ``edata``
whose data is a LazyFeature, replacing them with the actual data of the corresponding
nodes/edges from the original graph instead. In particular, for a subgraph returned
by the sampler has a LazyFeature with name ``k`` in ``subgraph.ndata[key]``:
.. code:: python
subgraph.ndata[key] = LazyFeature(k)
Assuming that ``graph`` is the original graph, DGL's dataloader will perform
.. code:: python
subgraph.ndata[key] = graph.ndata[k][subgraph.ndata[dgl.NID]]
DGL dataloader performs similar replacement for ``edata``.
For heterogeneous graphs, the replacement is:
.. code:: python
subgraph.nodes[ntype].data[key] = graph.nodes[ntype].data[k][
subgraph.nodes[ntype].data[dgl.NID]]
For MFGs' ``srcdata`` (and similarly ``dstdata``), the replacement is
.. code:: python
mfg.srcdata[key] = graph.ndata[k][mfg.srcdata[dgl.NID]]
Parameters
----------
name : str
The name of the data in the original graph.
id_ : Tensor, optional
The ID tensor.
""" """
__slots__ = ['name', 'id_'] __slots__ = ['name', 'id_']
def __init__(self, name=None, id_=None): def __init__(self, name=None, id_=None):
......
...@@ -910,8 +910,73 @@ def alias_func(func): ...@@ -910,8 +910,73 @@ def alias_func(func):
_fn.__doc__ = """Alias of :func:`dgl.{}`.""".format(func.__name__) _fn.__doc__ = """Alias of :func:`dgl.{}`.""".format(func.__name__)
return _fn return _fn
def apply_each(data, fn, *args, **kwargs):
"""Apply a function to every element in a container.
If the input data is a list or any sequence other than a string, returns a list
whose elements are the same elements applied with the given function.
If the input data is a dict or any mapping, returns a dict whose keys are the same
and values are the elements applied with the given function.
The first argument of the function will be passed with the individual elements from
the input data, followed by the arguments in :attr:`args` and :attr:`kwargs`.
Parameters
----------
data : any
Any object.
fn : callable
Any function.
args, kwargs :
Additional arguments and keyword-arguments passed to the function.
Examples
--------
Applying a ReLU function to a dictionary of tensors:
>>> h = {k: torch.randn(3) for k in ['A', 'B', 'C']}
>>> h = apply_each(h, torch.nn.functional.relu)
>>> assert all((v >= 0).all() for v in h.values())
"""
if isinstance(data, Mapping):
return {k: fn(v, *args, **kwargs) for k, v in data.items()}
elif isinstance(data, Sequence):
return [fn(v, *args, **kwargs) for v in data]
else:
return fn(data, *args, **kwargs)
def recursive_apply(data, fn, *args, **kwargs): def recursive_apply(data, fn, *args, **kwargs):
"""Recursively apply a function to every element in a container. """Recursively apply a function to every element in a container.
If the input data is a list or any sequence other than a string, returns a list
whose elements are the same elements applied with the given function.
If the input data is a dict or any mapping, returns a dict whose keys are the same
and values are the elements applied with the given function.
If the input data is a nested container, the result will have the same nested
structure where each element is transformed recursively.
The first argument of the function will be passed with the individual elements from
the input data, followed by the arguments in :attr:`args` and :attr:`kwargs`.
Parameters
----------
data : any
Any object.
fn : callable
Any function.
args, kwargs :
Additional arguments and keyword-arguments passed to the function.
Examples
--------
Applying a ReLU function to a dictionary of tensors:
>>> h = {k: torch.randn(3) for k in ['A', 'B', 'C']}
>>> h = recursive_apply(h, torch.nn.functional.relu)
>>> assert all((v >= 0).all() for v in h.values())
""" """
if isinstance(data, str): # str is a Sequence if isinstance(data, str): # str is a Sequence
return fn(data, *args, **kwargs) return fn(data, *args, **kwargs)
......
...@@ -79,6 +79,8 @@ HeteroSubgraph SampleNeighbors( ...@@ -79,6 +79,8 @@ HeteroSubgraph SampleNeighbors(
CHECK_EQ(prob.size(), hg->NumEdgeTypes()) CHECK_EQ(prob.size(), hg->NumEdgeTypes())
<< "Number of probability tensors must match the number of edge types."; << "Number of probability tensors must match the number of edge types.";
DLContext ctx = aten::GetContextOf(nodes);
std::vector<HeteroGraphPtr> subrels(hg->NumEdgeTypes()); std::vector<HeteroGraphPtr> subrels(hg->NumEdgeTypes());
std::vector<IdArray> induced_edges(hg->NumEdgeTypes()); std::vector<IdArray> induced_edges(hg->NumEdgeTypes());
for (dgl_type_t etype = 0; etype < hg->NumEdgeTypes(); ++etype) { for (dgl_type_t etype = 0; etype < hg->NumEdgeTypes(); ++etype) {
...@@ -93,8 +95,8 @@ HeteroSubgraph SampleNeighbors( ...@@ -93,8 +95,8 @@ HeteroSubgraph SampleNeighbors(
hg->GetRelationGraph(etype)->NumVertexTypes(), hg->GetRelationGraph(etype)->NumVertexTypes(),
hg->NumVertices(src_vtype), hg->NumVertices(src_vtype),
hg->NumVertices(dst_vtype), hg->NumVertices(dst_vtype),
hg->DataType(), hg->Context()); hg->DataType(), ctx);
induced_edges[etype] = aten::NullArray(hg->DataType(), hg->Context()); induced_edges[etype] = aten::NullArray(hg->DataType(), ctx);
} else if (fanouts[etype] == -1) { } else if (fanouts[etype] == -1) {
const auto &earr = (dir == EdgeDir::kOut) ? const auto &earr = (dir == EdgeDir::kOut) ?
hg->OutEdges(etype, nodes_ntype) : hg->OutEdges(etype, nodes_ntype) :
......
...@@ -85,11 +85,11 @@ test_nids = idx_split['test'] ...@@ -85,11 +85,11 @@ test_nids = idx_split['test']
# DGL provides tools to iterate over the dataset in minibatches # DGL provides tools to iterate over the dataset in minibatches
# while generating the computation dependencies to compute their outputs # while generating the computation dependencies to compute their outputs
# with the MFGs above. For node classification, you can use # with the MFGs above. For node classification, you can use
# ``dgl.dataloading.NodeDataLoader`` for iterating over the dataset. # ``dgl.dataloading.DataLoader`` for iterating over the dataset.
# It accepts a sampler object to control how to generate the computation # It accepts a sampler object to control how to generate the computation
# dependencies in the form of MFGs. DGL provides # dependencies in the form of MFGs. DGL provides
# implementations of common sampling algorithms such as # implementations of common sampling algorithms such as
# ``dgl.dataloading.MultiLayerNeighborSampler`` which randomly picks # ``dgl.dataloading.NeighborSampler`` which randomly picks
# a fixed number of neighbors for each node. # a fixed number of neighbors for each node.
# #
# .. note:: # .. note::
...@@ -97,7 +97,7 @@ test_nids = idx_split['test'] ...@@ -97,7 +97,7 @@ test_nids = idx_split['test']
# To write your own neighbor sampler, please refer to :ref:`this user # To write your own neighbor sampler, please refer to :ref:`this user
# guide section <guide-minibatch-customizing-neighborhood-sampler>`. # guide section <guide-minibatch-customizing-neighborhood-sampler>`.
# #
# The syntax of ``dgl.dataloading.NodeDataLoader`` is mostly similar to a # The syntax of ``dgl.dataloading.DataLoader`` is mostly similar to a
# PyTorch ``DataLoader``, with the addition that it needs a graph to # PyTorch ``DataLoader``, with the addition that it needs a graph to
# generate computation dependency from, a set of node IDs to iterate on, # generate computation dependency from, a set of node IDs to iterate on,
# and the neighbor sampler you defined. # and the neighbor sampler you defined.
...@@ -107,9 +107,9 @@ test_nids = idx_split['test'] ...@@ -107,9 +107,9 @@ test_nids = idx_split['test']
# like the following. # like the following.
# #
sampler = dgl.dataloading.MultiLayerNeighborSampler([4, 4]) sampler = dgl.dataloading.NeighborSampler([4, 4])
train_dataloader = dgl.dataloading.NodeDataLoader( train_dataloader = dgl.dataloading.DataLoader(
# The following arguments are specific to NodeDataLoader. # The following arguments are specific to DGL's DataLoader.
graph, # The graph graph, # The graph
train_nids, # The node IDs to iterate over in minibatches train_nids, # The node IDs to iterate over in minibatches
sampler, # The neighbor sampler sampler, # The neighbor sampler
...@@ -141,7 +141,7 @@ print("To compute {} nodes' outputs, we need {} nodes' input features".format(le ...@@ -141,7 +141,7 @@ print("To compute {} nodes' outputs, we need {} nodes' input features".format(le
###################################################################### ######################################################################
# ``NodeDataLoader`` gives us three items per iteration. # DGL's ``DataLoader`` gives us three items per iteration.
# #
# - An ID tensor for the input nodes, i.e., nodes whose input features # - An ID tensor for the input nodes, i.e., nodes whose input features
# are needed on the first GNN layer for this minibatch. # are needed on the first GNN layer for this minibatch.
...@@ -262,7 +262,7 @@ opt = torch.optim.Adam(model.parameters()) ...@@ -262,7 +262,7 @@ opt = torch.optim.Adam(model.parameters())
# loader. # loader.
# #
valid_dataloader = dgl.dataloading.NodeDataLoader( valid_dataloader = dgl.dataloading.DataLoader(
graph, valid_nids, sampler, graph, valid_nids, sampler,
batch_size=1024, batch_size=1024,
shuffle=False, shuffle=False,
......
...@@ -91,7 +91,7 @@ test_nids = idx_split['test'] ...@@ -91,7 +91,7 @@ test_nids = idx_split['test']
# in a similar fashion introduced in the :doc:`large-scale node classification # in a similar fashion introduced in the :doc:`large-scale node classification
# tutorial <L1_large_node_classification>`. # tutorial <L1_large_node_classification>`.
# #
# DGL provides ``dgl.dataloading.EdgeDataLoader`` to # DGL provides ``dgl.dataloading.as_edge_prediction_sampler`` to
# iterate over edges for edge classification or link prediction tasks. # iterate over edges for edge classification or link prediction tasks.
# #
# To perform link prediction, you need to specify a negative sampler. DGL # To perform link prediction, you need to specify a negative sampler. DGL
...@@ -105,18 +105,19 @@ negative_sampler = dgl.dataloading.negative_sampler.Uniform(5) ...@@ -105,18 +105,19 @@ negative_sampler = dgl.dataloading.negative_sampler.Uniform(5)
###################################################################### ######################################################################
# After defining the negative sampler, one can then define the edge data # After defining the negative sampler, one can then define the edge data
# loader with neighbor sampling. To create an ``EdgeDataLoader`` for # loader with neighbor sampling. To create an ``DataLoader`` for
# link prediction, provide a neighbor sampler object as well as the negative # link prediction, provide a neighbor sampler object as well as the negative
# sampler object created above. # sampler object created above.
# #
sampler = dgl.dataloading.MultiLayerNeighborSampler([4, 4]) sampler = dgl.dataloading.NeighborSampler([4, 4])
train_dataloader = dgl.dataloading.EdgeDataLoader( sampler = dgl.dataloading.as_edge_prediction_sampler(
# The following arguments are specific to EdgeDataLoader. sampler, negative_sampler=negative_sampler)
train_dataloader = dgl.dataloading.DataLoader(
# The following arguments are specific to DataLoader.
graph, # The graph graph, # The graph
torch.arange(graph.number_of_edges()), # The edges to iterate over torch.arange(graph.number_of_edges()), # The edges to iterate over
sampler, # The neighbor sampler sampler, # The neighbor sampler
negative_sampler=negative_sampler, # The negative sampler
device=device, # Put the MFGs on CPU or GPU device=device, # Put the MFGs on CPU or GPU
# The following arguments are inherited from PyTorch DataLoader. # The following arguments are inherited from PyTorch DataLoader.
batch_size=1024, # Batch size batch_size=1024, # Batch size
...@@ -247,8 +248,8 @@ def inference(model, graph, node_features): ...@@ -247,8 +248,8 @@ def inference(model, graph, node_features):
with torch.no_grad(): with torch.no_grad():
nodes = torch.arange(graph.number_of_nodes()) nodes = torch.arange(graph.number_of_nodes())
sampler = dgl.dataloading.MultiLayerNeighborSampler([4, 4]) sampler = dgl.dataloading.NeighborSampler([4, 4])
train_dataloader = dgl.dataloading.NodeDataLoader( train_dataloader = dgl.dataloading.DataLoader(
graph, torch.arange(graph.number_of_nodes()), sampler, graph, torch.arange(graph.number_of_nodes()), sampler,
batch_size=1024, batch_size=1024,
shuffle=False, shuffle=False,
...@@ -390,80 +391,27 @@ test_neg_dst = torch.randint(0, graph.num_nodes(), (n_test_pos,)) ...@@ -390,80 +391,27 @@ test_neg_dst = torch.randint(0, graph.num_nodes(), (n_test_pos,))
###################################################################### ######################################################################
# First you need to construct a graph for ``dgl.dataloading.EdgeDataLoader`` # First you need to compute the node representations for all the nodes
# to iterate on, i.e. with the testing node pairs as edges. # with the ``inference`` method above:
# You also need to label the edges, 1 if positive and 0 if negative.
# #
test_src = torch.cat([test_pos_src, test_pos_dst]) node_reprs = inference(model, graph, node_features)
test_dst = torch.cat([test_neg_src, test_neg_dst])
test_graph = dgl.graph((test_src, test_dst), num_nodes=graph.num_nodes())
test_ground_truth = torch.cat(
[torch.ones_like(test_pos_src), torch.zeros_like(test_neg_src)])
######################################################################
# You will need to merge the test graph with the original graph. The
# testing edges' ID will be starting from ``graph.num_edges()``.
#
new_graph = dgl.merge([graph, test_graph])
test_edge_ids = torch.arange(graph.num_edges(), new_graph.num_edges())
######################################################################
# Then you could create a new ``EdgeDataLoader`` instance that
# iterates on the new ``test_graph``, but uses the original ``graph``
# for neighbor sampling.
#
# Note that you do not need negative sampling in this dataloader: the
# negative pairs are already in the new test graph.
#
test_dataloader = dgl.dataloading.EdgeDataLoader(
# The following arguments are specific to EdgeDataLoader.
new_graph, # The graph to iterate edges over
test_edge_ids, # The edges to iterate over
sampler, # The neighbor sampler
device=device, # Put the MFGs on CPU or GPU
exclude=test_edge_ids, # Do not sample test edges as neighbors
# The following arguments are inherited from PyTorch DataLoader.
batch_size=1024, # Batch size
shuffle=True, # Whether to shuffle the nodes for every epoch
drop_last=False, # Whether to drop the last incomplete batch
num_workers=0 # Number of sampler processes
)
###################################################################### ######################################################################
# The rest is similar to training except that you no longer compute # Since the predictor is a dot product, you can now easily compute the
# the gradients, and you collect all the scores and ground truth # score of positive and negative test pairs to compute metrics such
# labels for final metric calculation. # as AUC:
#
# .. note::
#
# If the graph does not change, you can also precompute all the
# node representations beforehand with ``inference`` function.
# You can then feed the precomputed results directly into the
# predictor without passing the MFGs into the model.
# #
test_preds = []
test_labels = []
with tqdm.tqdm(test_dataloader) as tq, torch.no_grad():
for step, (input_nodes, pair_graph, mfgs) in enumerate(tq):
# feature copy from CPU to GPU takes place here
inputs = mfgs[0].srcdata['feat']
outputs = model(mfgs, inputs) h_pos_src = node_reprs[test_pos_src]
test_preds.append(predictor(pair_graph, outputs)) h_pos_dst = node_reprs[test_pos_dst]
test_labels.append( h_neg_src = node_reprs[test_neg_src]
# Need to map the IDs of test edges in the merged graph back h_neg_dst = node_reprs[test_neg_dst]
# to that of test_ground_truth. score_pos = (h_pos_src * h_pos_dst).sum(1)
test_ground_truth[pair_graph.edata[dgl.EID] - graph.num_edges()]) score_neg = (h_neg_src * h_neg_dst).sum(1)
test_preds = torch.cat([score_pos, score_neg]).cpu().numpy()
test_labels = torch.cat([torch.ones_like(score_pos), torch.zeros_like(score_neg)]).cpu().numpy()
test_preds = torch.cat(test_preds).cpu().numpy()
test_labels = torch.cat(test_labels).cpu().numpy()
auc = sklearn.metrics.roc_auc_score(test_labels, test_preds) auc = sklearn.metrics.roc_auc_score(test_labels, test_preds)
print('Link Prediction AUC:', auc) print('Link Prediction AUC:', auc)
......
...@@ -74,7 +74,7 @@ import torch.distributed as dist ...@@ -74,7 +74,7 @@ import torch.distributed as dist
def init_process_group(world_size, rank): def init_process_group(world_size, rank):
dist.init_process_group( dist.init_process_group(
backend='nccl', backend='gloo', # change to 'nccl' for multiple GPUs
init_method='tcp://127.0.0.1:12345', init_method='tcp://127.0.0.1:12345',
world_size=world_size, world_size=world_size,
rank=rank) rank=rank)
...@@ -144,7 +144,10 @@ from torch.nn.parallel import DistributedDataParallel ...@@ -144,7 +144,10 @@ from torch.nn.parallel import DistributedDataParallel
def init_model(seed, device): def init_model(seed, device):
torch.manual_seed(seed) torch.manual_seed(seed)
model = GIN().to(device) model = GIN().to(device)
model = DistributedDataParallel(model, device_ids=[device], output_device=device) if device.type == 'cpu':
model = DistributedDataParallel(model)
else:
model = DistributedDataParallel(model, device_ids=[device], output_device=device)
return model return model
...@@ -182,9 +185,11 @@ from torch.optim import Adam ...@@ -182,9 +185,11 @@ from torch.optim import Adam
def main(rank, world_size, dataset, seed=0): def main(rank, world_size, dataset, seed=0):
init_process_group(world_size, rank) init_process_group(world_size, rank)
# Assume the GPU ID to be the same as the process ID if torch.cuda.is_available():
device = torch.device('cuda:{:d}'.format(rank)) device = torch.device('cuda:{:d}'.format(rank))
torch.cuda.set_device(device) torch.cuda.set_device(device)
else:
device = torch.device('cpu')
model = init_model(seed, device) model = init_model(seed, device)
criterion = nn.CrossEntropyLoss() criterion = nn.CrossEntropyLoss()
...@@ -223,28 +228,16 @@ def main(rank, world_size, dataset, seed=0): ...@@ -223,28 +228,16 @@ def main(rank, world_size, dataset, seed=0):
############################################################################### ###############################################################################
# Finally we load the dataset and launch the processes. # Finally we load the dataset and launch the processes.
# #
# .. note::
#
# You will need to use ``dgl.multiprocessing`` instead of the Python
# ``multiprocessing`` package. ``dgl.multiprocessing`` is identical to
# Python’s built-in ``multiprocessing`` except that it handles the
# subtleties between forking and multithreading in Python.
#
if __name__ == '__main__': if __name__ == '__main__':
import dgl.multiprocessing as mp import torch.multiprocessing as mp
from dgl.data import GINDataset from dgl.data import GINDataset
num_gpus = 4 num_gpus = 4
procs = [] procs = []
dataset = GINDataset(name='IMDBBINARY', self_loop=False) dataset = GINDataset(name='IMDBBINARY', self_loop=False)
for rank in range(num_gpus): mp.spawn(main, args=(num_gpus, dataset), nprocs=num_gpus)
p = mp.Process(target=main, args=(rank, num_gpus, dataset))
p.start()
procs.append(p)
for p in procs:
p.join()
# Thumbnail credits: DGL # Thumbnail credits: DGL
# sphinx_gallery_thumbnail_path = '_static/blitz_5_graph_classification.png' # sphinx_gallery_thumbnail_path = '_static/blitz_5_graph_classification.png'
...@@ -118,8 +118,8 @@ def run(proc_id, devices): ...@@ -118,8 +118,8 @@ def run(proc_id, devices):
# Define training and validation dataloader, copied from the previous tutorial # Define training and validation dataloader, copied from the previous tutorial
# but with one line of difference: use_ddp to enable distributed data parallel # but with one line of difference: use_ddp to enable distributed data parallel
# data loading. # data loading.
sampler = dgl.dataloading.MultiLayerNeighborSampler([4, 4]) sampler = dgl.dataloading.NeighborSampler([4, 4])
train_dataloader = dgl.dataloading.NodeDataLoader( train_dataloader = dgl.dataloading.DataLoader(
# The following arguments are specific to NodeDataLoader. # The following arguments are specific to NodeDataLoader.
graph, # The graph graph, # The graph
train_nids, # The node IDs to iterate over in minibatches train_nids, # The node IDs to iterate over in minibatches
...@@ -133,7 +133,7 @@ def run(proc_id, devices): ...@@ -133,7 +133,7 @@ def run(proc_id, devices):
drop_last=False, # Whether to drop the last incomplete batch drop_last=False, # Whether to drop the last incomplete batch
num_workers=0 # Number of sampler processes num_workers=0 # Number of sampler processes
) )
valid_dataloader = dgl.dataloading.NodeDataLoader( valid_dataloader = dgl.dataloading.DataLoader(
graph, valid_nids, sampler, graph, valid_nids, sampler,
device=device, device=device,
use_ddp=False, use_ddp=False,
...@@ -247,16 +247,10 @@ graph.create_formats_() ...@@ -247,16 +247,10 @@ graph.create_formats_()
# #
# Say you have four GPUs. # Say you have four GPUs.
num_gpus = 4 if __name__ == '__main__':
import dgl.multiprocessing as mp num_gpus = 4
devices = list(range(num_gpus)) import torch.multiprocessing as mp
procs = [] mp.spawn(run, args=(list(range(num_gpus)),), nprocs=num_gpus)
for proc_id in range(num_gpus):
p = mp.Process(target=run, args=(proc_id, devices))
p.start()
procs.append(p)
for p in procs:
p.join()
# Thumbnail credits: Stanford CS224W Notes # Thumbnail credits: Stanford CS224W Notes
# sphinx_gallery_thumbnail_path = '_static/blitz_1_introduction.png' # sphinx_gallery_thumbnail_path = '_static/blitz_1_introduction.png'
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment