Unverified Commit 172949d4 authored by Xin Yao's avatar Xin Yao Committed by GitHub
Browse files

[Example] Fix multi-GPU RGCN example (#3871)



* fix multi-gpu rgcn example

* remove dgl.multiprocessing in turorials

* add a comment
Co-authored-by: default avatarJinjing Zhou <VoVAllen@users.noreply.github.com>
parent 61edb798
......@@ -31,12 +31,13 @@ def collect_eval(n_gpus, queue, labels):
def run(proc_id, n_gpus, n_cpus, args, devices, dataset, queue=None):
dev_id = devices[proc_id]
th.cuda.set_device(dev_id)
g, num_rels, num_classes, labels, train_idx, test_idx,\
target_idx, inv_target = dataset
dist_init_method = 'tcp://{master_ip}:{master_port}'.format(
master_ip='127.0.0.1', master_port='12345')
backend = 'gloo'
backend = 'nccl'
if proc_id == 0:
print("backend using {}".format(backend))
th.distributed.init_process_group(backend=backend,
......@@ -101,6 +102,8 @@ def main(args, devices):
g.create_formats_()
n_gpus = len(devices)
# required for mp.Queue() to work with mp.spawn()
mp.set_start_method('spawn')
n_cpus = mp.cpu_count()
queue = mp.Queue(n_gpus)
mp.spawn(run, args=(n_gpus, n_cpus // n_gpus, args, devices, data, queue),
......
......@@ -206,19 +206,6 @@ def run(proc_id, devices):
#
# A typical scenario for multi-GPU training with DDP is to replicate the
# model once per GPU, and spawn one trainer process per GPU.
#
# PyTorch tutorials recommend using ``multiprocessing.spawn`` to spawn
# multiple processes. This however is undesirable for training node
# classification or link prediction models on a single large graph,
# especially on Linux. The reason is that a single large graph itself may
# take a lot of memory, and ``mp.spawn`` will duplicate all objects in the
# program, including the large graph. Consequently, the large graph will
# be duplicated as many times as the number of GPUs.
#
# To alleviate the problem we recommend using ``multiprocessing.Process``,
# which *forks* from the main process and allows sharing the same graph
# object to trainer processes via *copy-on-write*. This can greatly reduce
# the memory consumption.
#
# Normally, DGL maintains only one sparse matrix representation (usually COO)
# for each graph, and will create new formats when some APIs are called for
......@@ -238,12 +225,6 @@ graph.create_formats_()
######################################################################
# Then you can spawn the subprocesses to train with multiple GPUs.
#
# .. note::
#
# You will need to use ``dgl.multiprocessing`` instead of the Python
# ``multiprocessing`` package. ``dgl.multiprocessing`` is identical to
# Python’s built-in ``multiprocessing`` except that it handles the
# subtleties between forking and multithreading in Python.
#
# .. code:: python
#
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment