Unverified Commit 3f6f6941 authored by Mufei Li's avatar Mufei Li Committed by GitHub
Browse files

[Bug fix] Various fix from bug bash (#3133)



* Update

* Update

* Update dependencies

* Update

* Update

* Fix ogbn-products gat

* Update

* Update

* Reformat

* Fix typo in node2vec_random_walk

* Specify file encoding

* Working for 6.7

* Update

* Fix subgraph

* Fix doc for sample_neighbors_biased

* Fix hyperlink

* Add example for udf cross reducer

* Fix

* Add example for slice_batch

* Replace dgl.bipartite

* Fix GATConv

* Fix math rendering

* Fix doc
Co-authored-by: default avatarUbuntu <ubuntu@ip-172-31-28-17.us-west-2.compute.internal>
Co-authored-by: default avatarJinjing Zhou <VoVAllen@users.noreply.github.com>
Co-authored-by: default avatarUbuntu <ubuntu@ip-172-31-22-156.us-west-2.compute.internal>
parent 5f2639e2
...@@ -63,13 +63,13 @@ def node2vec_random_walk(g, nodes, p, q, walk_length, prob=None, return_eids=Fal ...@@ -63,13 +63,13 @@ def node2vec_random_walk(g, nodes, p, q, walk_length, prob=None, return_eids=Fal
Examples Examples
-------- --------
>>> g1 = dgl.graph(([0, 1, 1, 2, 3], [1, 2, 3, 0, 0])) >>> g1 = dgl.graph(([0, 1, 1, 2, 3], [1, 2, 3, 0, 0]))
>>> dgl.sampling.node2vec_random_walk(g1, [0, 1, 2, 0], 1, 1, length=4) >>> dgl.sampling.node2vec_random_walk(g1, [0, 1, 2, 0], 1, 1, walk_length=4)
tensor([[0, 1, 3, 0, 1], tensor([[0, 1, 3, 0, 1],
[1, 2, 0, 1, 3], [1, 2, 0, 1, 3],
[2, 0, 1, 3, 0], [2, 0, 1, 3, 0],
[0, 1, 2, 0, 1]]) [0, 1, 2, 0, 1]])
>>> dgl.sampling.node2vec_random_walk(g1, [0, 1, 2, 0], 1, 1, length=4, return_eids=True) >>> dgl.sampling.node2vec_random_walk(g1, [0, 1, 2, 0], 1, 1, walk_length=4, return_eids=True)
(tensor([[0, 1, 3, 0, 1], (tensor([[0, 1, 3, 0, 1],
[1, 2, 0, 1, 2], [1, 2, 0, 1, 2],
[2, 0, 1, 2, 0], [2, 0, 1, 2, 0],
......
...@@ -119,9 +119,9 @@ def node_subgraph(graph, nodes, *, relabel_nodes=True, store_ids=True): ...@@ -119,9 +119,9 @@ def node_subgraph(graph, nodes, *, relabel_nodes=True, store_ids=True):
>>> }) >>> })
>>> sub_g = dgl.node_subgraph(g, {'user': [1, 2]}) >>> sub_g = dgl.node_subgraph(g, {'user': [1, 2]})
>>> sub_g >>> sub_g
Graph(num_nodes={'user': 2, 'game': 0}, Graph(num_nodes={'game': 0, 'user': 2},
num_edges={('user', 'plays', 'game'): 0, ('user', 'follows', 'user'): 2}, num_edges={('user', 'follows', 'user'): 2, ('user', 'plays', 'game'): 0},
metagraph=[('user', 'game'), ('user', 'user')]) metagraph=[('user', 'user', 'follows'), ('user', 'game', 'plays')])
See Also See Also
-------- --------
...@@ -266,9 +266,9 @@ def edge_subgraph(graph, edges, *, relabel_nodes=True, store_ids=True, **depreca ...@@ -266,9 +266,9 @@ def edge_subgraph(graph, edges, *, relabel_nodes=True, store_ids=True, **depreca
>>> sub_g = dgl.edge_subgraph(g, {('user', 'follows', 'user'): [1, 2], >>> sub_g = dgl.edge_subgraph(g, {('user', 'follows', 'user'): [1, 2],
... ('user', 'plays', 'game'): [2]}) ... ('user', 'plays', 'game'): [2]})
>>> print(sub_g) >>> print(sub_g)
Graph(num_nodes={'user': 2, 'game': 1}, Graph(num_nodes={'game': 1, user': 2},
num_edges={('user', 'plays', 'game'): 1, ('user', 'follows', 'user'): 2}, num_edges={('user', 'follows', 'user'): 2, ('user', 'plays', 'game'): 1},
metagraph=[('user', 'game'), ('user', 'user')]) metagraph=[('user', 'user', 'follows'), ('user', 'game', 'plays')])
See Also See Also
-------- --------
......
...@@ -2536,8 +2536,6 @@ def adj_product_graph(A, B, weight_name, etype='_E'): ...@@ -2536,8 +2536,6 @@ def adj_product_graph(A, B, weight_name, etype='_E'):
>>> B = dgl.heterograph({ >>> B = dgl.heterograph({
... ('B', 'BA', 'A'): ([0, 3, 2, 1, 3, 3], [1, 2, 0, 2, 1, 0])}, ... ('B', 'BA', 'A'): ([0, 3, 2, 1, 3, 3], [1, 2, 0, 2, 1, 0])},
... num_nodes_dict={'A': 3, 'B': 4}) ... num_nodes_dict={'A': 3, 'B': 4})
>>> A.edata['w'] = torch.randn(6).requires_grad_()
>>> B.edata['w'] = torch.randn(6).requires_grad_()
If your graph is a multigraph, you will need to call :func:`dgl.to_simple` If your graph is a multigraph, you will need to call :func:`dgl.to_simple`
to convert it into a simple graph first. to convert it into a simple graph first.
...@@ -2545,6 +2543,13 @@ def adj_product_graph(A, B, weight_name, etype='_E'): ...@@ -2545,6 +2543,13 @@ def adj_product_graph(A, B, weight_name, etype='_E'):
>>> A = dgl.to_simple(A) >>> A = dgl.to_simple(A)
>>> B = dgl.to_simple(B) >>> B = dgl.to_simple(B)
Initialize learnable edge weights.
>>> A.edata['w'] = torch.randn(6).requires_grad_()
>>> B.edata['w'] = torch.randn(6).requires_grad_()
Take the product.
>>> C = dgl.adj_product_graph(A, B, 'w') >>> C = dgl.adj_product_graph(A, B, 'w')
>>> C.edges() >>> C.edges()
(tensor([0, 0, 1, 2, 2, 2]), tensor([0, 1, 0, 0, 2, 1])) (tensor([0, 0, 1, 2, 2, 2]), tensor([0, 1, 0, 0, 2, 1]))
...@@ -2660,12 +2665,19 @@ def adj_sum_graph(graphs, weight_name): ...@@ -2660,12 +2665,19 @@ def adj_sum_graph(graphs, weight_name):
>>> A.edata['w'] = torch.randn(6).requires_grad_() >>> A.edata['w'] = torch.randn(6).requires_grad_()
>>> B.edata['w'] = torch.randn(6).requires_grad_() >>> B.edata['w'] = torch.randn(6).requires_grad_()
If your graph is a multigraph, you will need to call :func:`dgl.to_simple` If your graph is a multigraph, call :func:`dgl.to_simple`
to convert it into a simple graph first. to convert it into a simple graph first.
>>> A = dgl.to_simple(A) >>> A = dgl.to_simple(A)
>>> B = dgl.to_simple(B) >>> B = dgl.to_simple(B)
Initialize learnable edge weights.
>>> A.edata['w'] = torch.randn(6).requires_grad_()
>>> B.edata['w'] = torch.randn(6).requires_grad_()
Take the sum.
>>> C = dgl.adj_sum_graph([A, B], 'w') >>> C = dgl.adj_sum_graph([A, B], 'w')
>>> C.edges() >>> C.edges()
(tensor([0, 0, 0, 1, 1, 1, 2, 2, 2, 2]), (tensor([0, 0, 0, 1, 1, 1, 2, 2, 2, 2]),
...@@ -2930,7 +2942,7 @@ def reorder_graph(g, node_permute_algo='rcmk', edge_permute_algo='src', ...@@ -2930,7 +2942,7 @@ def reorder_graph(g, node_permute_algo='rcmk', edge_permute_algo='src',
generated/scipy.sparse.csgraph.reverse_cuthill_mckee.html# generated/scipy.sparse.csgraph.reverse_cuthill_mckee.html#
scipy-sparse-csgraph-reverse-cuthill-mckee>`__ from ``scipy`` to generate nodes scipy-sparse-csgraph-reverse-cuthill-mckee>`__ from ``scipy`` to generate nodes
permutation. permutation.
* ``metis``: Use the :func:`~dgl.partition.metis_partition_assignment` function * ``metis``: Use the :func:`~dgl.metis_partition_assignment` function
to partition the input graph, which gives a cluster assignment of each node. to partition the input graph, which gives a cluster assignment of each node.
DGL then sorts the assignment array so the new node order will put nodes of DGL then sorts the assignment array so the new node order will put nodes of
the same cluster together. the same cluster together.
......
...@@ -11,66 +11,64 @@ knowledge in GNNs for graph classification and we recommend you to check ...@@ -11,66 +11,64 @@ knowledge in GNNs for graph classification and we recommend you to check
To use a single GPU in training a GNN, we need to put the model, graph(s), and other To use a single GPU in training a GNN, we need to put the model, graph(s), and other
tensors (e.g. labels) on the same GPU: tensors (e.g. labels) on the same GPU:
"""
""" .. code:: python
import torch
# Use the first GPU import torch
device = torch.device("cuda:0")
model = model.to(device)
graph = graph.to(device)
labels = labels.to(device)
"""
############################################################################### # Use the first GPU
# The node and edge features in the graphs, if any, will also be on the GPU. device = torch.device("cuda:0")
# After that, the forward computation, backward computation and parameter model = model.to(device)
# update will take place on the GPU. For graph classification, this repeats graph = graph.to(device)
# for each minibatch gradient descent. labels = labels.to(device)
#
# Using multiple GPUs allows performing more computation per unit of time. It The node and edge features in the graphs, if any, will also be on the GPU.
# is like having a team work together, where each GPU is a team member. We need After that, the forward computation, backward computation and parameter
# to distribute the computation workload across GPUs and let them synchronize update will take place on the GPU. For graph classification, this repeats
# the efforts regularly. PyTorch provides convenient APIs for this task with for each minibatch gradient descent.
# multiple processes, one per GPU, and we can use them in conjunction with DGL.
# Using multiple GPUs allows performing more computation per unit of time. It
# Intuitively, we can distribute the workload along the dimension of data. This is like having a team work together, where each GPU is a team member. We need
# allows multiple GPUs to perform the forward and backward computation of to distribute the computation workload across GPUs and let them synchronize
# multiple gradient descents in parallel. To distribute a dataset across the efforts regularly. PyTorch provides convenient APIs for this task with
# multiple GPUs, we need to partition it into multiple mutually exclusive multiple processes, one per GPU, and we can use them in conjunction with DGL.
# subsets of a similar size, one per GPU. We need to repeat the random
# partition every epoch to guarantee randomness. We can use Intuitively, we can distribute the workload along the dimension of data. This
# :func:`~dgl.dataloading.pytorch.GraphDataLoader`, which wraps some PyTorch allows multiple GPUs to perform the forward and backward computation of
# APIs and does the job for graph classification in data loading. multiple gradient descents in parallel. To distribute a dataset across
# multiple GPUs, we need to partition it into multiple mutually exclusive
# Once all GPUs have finished the backward computation for its minibatch, subsets of a similar size, one per GPU. We need to repeat the random
# we need to synchronize the model parameter update across them. Specifically, partition every epoch to guarantee randomness. We can use
# this involves collecting gradients from all GPUs, averaging them and updating :func:`~dgl.dataloading.pytorch.GraphDataLoader`, which wraps some PyTorch
# the model parameters on each GPU. We can wrap a PyTorch model with APIs and does the job for graph classification in data loading.
# :func:`~torch.nn.parallel.DistributedDataParallel` so that the model
# parameter update will invoke gradient synchronization first under the hood. Once all GPUs have finished the backward computation for its minibatch,
# we need to synchronize the model parameter update across them. Specifically,
# .. image:: https://data.dgl.ai/tutorial/mgpu_gc.png this involves collecting gradients from all GPUs, averaging them and updating
# :width: 450px the model parameters on each GPU. We can wrap a PyTorch model with
# :align: center :func:`~torch.nn.parallel.DistributedDataParallel` so that the model
# parameter update will invoke gradient synchronization first under the hood.
# That’s the core behind this tutorial. We will explore it more in detail with
# a complete example below. .. image:: https://data.dgl.ai/tutorial/mgpu_gc.png
# :width: 450px
# .. note:: :align: center
#
# See `this tutorial <https://pytorch.org/tutorials/intermediate/ddp_tutorial.html>`__ That’s the core behind this tutorial. We will explore it more in detail with
# from PyTorch for general multi-GPU training with ``DistributedDataParallel``. a complete example below.
#
# Distributed Process Group Initialization .. note::
# ----------------------------------------
# See `this tutorial <https://pytorch.org/tutorials/intermediate/ddp_tutorial.html>`__
# For communication between multiple processes in multi-gpu training, we need from PyTorch for general multi-GPU training with ``DistributedDataParallel``.
# to start the distributed backend at the beginning of each process. We use
# `world_size` to refer to the number of processes and `rank` to refer to the Distributed Process Group Initialization
# process ID, which should be an integer from `0` to `world_size - 1`. ----------------------------------------
#
For communication between multiple processes in multi-gpu training, we need
to start the distributed backend at the beginning of each process. We use
`world_size` to refer to the number of processes and `rank` to refer to the
process ID, which should be an integer from `0` to `world_size - 1`.
"""
import torch.distributed as dist import torch.distributed as dist
...@@ -193,9 +191,7 @@ def main(rank, world_size, dataset, seed=0): ...@@ -193,9 +191,7 @@ def main(rank, world_size, dataset, seed=0):
optimizer = Adam(model.parameters(), lr=0.01) optimizer = Adam(model.parameters(), lr=0.01)
train_loader, val_loader, test_loader = get_dataloaders(dataset, train_loader, val_loader, test_loader = get_dataloaders(dataset,
seed, seed)
world_size,
rank)
for epoch in range(5): for epoch in range(5):
model.train() model.train()
# The line below ensures all processes use a different # The line below ensures all processes use a different
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment