"src/vscode:/vscode.git/clone" did not exist on "567c5acf4e14ca69b040225d50c8bba29a773e4a"
Unverified Commit b377e1b9 authored by Rhett Ying's avatar Rhett Ying Committed by GitHub
Browse files

[Dist][Examples] refactor dist graphsage examples (#4269)

* [Dist][Examples] refactor dist graphsage examples

* refine train_dist.py

* update train_dist_unsupervised.py

* fix debug info

* update train_dist_transductive

* update unsupervised_transductive

* remove distgnn

* fix join() in standalone mode

* change batch_labels to long() for ogbn-papers100M

* free unnecessary mem

* lint

* fix lint

* refine

* fix lint

* fix incorrect args

* refine
parent ff090f69
...@@ -137,7 +137,6 @@ python3 ~/workspace/dgl/tools/launch.py \ ...@@ -137,7 +137,6 @@ python3 ~/workspace/dgl/tools/launch.py \
--num_servers 1 \ --num_servers 1 \
--part_config data/ogb-product.json \ --part_config data/ogb-product.json \
--ip_config ip_config.txt \ --ip_config ip_config.txt \
--graph_format csc,coo \
"python3 train_dist_unsupervised.py --graph_name ogb-product --ip_config ip_config.txt --num_epochs 3 --batch_size 1000" "python3 train_dist_unsupervised.py --graph_name ogb-product --ip_config ip_config.txt --num_epochs 3 --batch_size 1000"
``` ```
...@@ -158,24 +157,22 @@ To run supervised with transductive setting (nodes are initialized with node emb ...@@ -158,24 +157,22 @@ To run supervised with transductive setting (nodes are initialized with node emb
```bash ```bash
python3 ~/workspace/dgl/tools/launch.py --workspace ~/workspace/dgl/examples/pytorch/graphsage/dist/ \ python3 ~/workspace/dgl/tools/launch.py --workspace ~/workspace/dgl/examples/pytorch/graphsage/dist/ \
--num_trainers 4 \ --num_trainers 4 \
--num_samplers 4 \
--num_servers 1 \ --num_servers 1 \
--num_samplers 0 \ --num_samplers 0 \
--part_config data/ogb-product.json \ --part_config data/ogb-product.json \
--ip_config ip_config.txt \ --ip_config ip_config.txt \
"python3 train_dist_transductive.py --graph_name ogb-product --ip_config ip_config.txt --batch_size 1000 --num_gpu 4 --eval_every 5" "python3 train_dist_transductive.py --graph_name ogb-product --ip_config ip_config.txt --batch_size 1000 --num_gpus 4 --eval_every 5"
``` ```
To run supervised with transductive setting using dgl distributed DistEmbedding To run supervised with transductive setting using dgl distributed DistEmbedding
```bash ```bash
python3 ~/workspace/dgl/tools/launch.py --workspace ~/workspace/dgl/examples/pytorch/graphsage/dist/ \ python3 ~/workspace/dgl/tools/launch.py --workspace ~/workspace/dgl/examples/pytorch/graphsage/dist/ \
--num_trainers 4 \ --num_trainers 4 \
--num_samplers 4 \
--num_servers 1 \ --num_servers 1 \
--num_samplers 0 \ --num_samplers 0 \
--part_config data/ogb-product.json \ --part_config data/ogb-product.json \
--ip_config ip_config.txt \ --ip_config ip_config.txt \
"python3 train_dist_transductive.py --graph_name ogb-product --ip_config ip_config.txt --batch_size 1000 --num_gpu 4 --eval_every 5 --dgl_sparse" "python3 train_dist_transductive.py --graph_name ogb-product --ip_config ip_config.txt --batch_size 1000 --num_gpus 4 --eval_every 5 --dgl_sparse"
``` ```
To run unsupervised with transductive setting (nodes are initialized with node embedding) To run unsupervised with transductive setting (nodes are initialized with node embedding)
...@@ -186,7 +183,6 @@ python3 ~/workspace/dgl/tools/launch.py --workspace ~/workspace/dgl/examples/pyt ...@@ -186,7 +183,6 @@ python3 ~/workspace/dgl/tools/launch.py --workspace ~/workspace/dgl/examples/pyt
--num_servers 1 \ --num_servers 1 \
--part_config data/ogb-product.json \ --part_config data/ogb-product.json \
--ip_config ip_config.txt \ --ip_config ip_config.txt \
--graph_format csc,coo \
"python3 train_dist_unsupervised_transductive.py --graph_name ogb-product --ip_config ip_config.txt --num_epochs 3 --batch_size 1000 --num_gpus 4" "python3 train_dist_unsupervised_transductive.py --graph_name ogb-product --ip_config ip_config.txt --num_epochs 3 --batch_size 1000 --num_gpus 4"
``` ```
...@@ -198,7 +194,6 @@ python3 ~/workspace/dgl/tools/launch.py --workspace ~/workspace/dgl/examples/pyt ...@@ -198,7 +194,6 @@ python3 ~/workspace/dgl/tools/launch.py --workspace ~/workspace/dgl/examples/pyt
--num_servers 1 \ --num_servers 1 \
--part_config data/ogb-product.json \ --part_config data/ogb-product.json \
--ip_config ip_config.txt \ --ip_config ip_config.txt \
--graph_format csc,coo \
"python3 train_dist_unsupervised_transductive.py --graph_name ogb-product --ip_config ip_config.txt --num_epochs 3 --batch_size 1000 --num_gpus 4 --dgl_sparse" "python3 train_dist_unsupervised_transductive.py --graph_name ogb-product --ip_config ip_config.txt --num_epochs 3 --batch_size 1000 --num_gpus 4 --dgl_sparse"
``` ```
......
import os
os.environ["DGLBACKEND"] = "pytorch"
import argparse import argparse
import math
import socket import socket
import time import time
from functools import wraps from contextlib import contextmanager
from multiprocessing import Process
import numpy as np import numpy as np
import torch as th import torch as th
import torch.multiprocessing as mp
import torch.nn as nn import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
import torch.optim as optim import torch.optim as optim
import tqdm import tqdm
from torch.utils.data import DataLoader
import dgl import dgl
import dgl.function as fn
import dgl.nn.pytorch as dglnn import dgl.nn.pytorch as dglnn
from dgl import DGLGraph
from dgl.data import load_data, register_data_args
from dgl.data.utils import load_graphs
from dgl.distributed import DistDataLoader
def load_subtensor(g, seeds, input_nodes, device, load_feat=True): def load_subtensor(g, seeds, input_nodes, device, load_feat=True):
""" """
...@@ -37,40 +23,6 @@ def load_subtensor(g, seeds, input_nodes, device, load_feat=True): ...@@ -37,40 +23,6 @@ def load_subtensor(g, seeds, input_nodes, device, load_feat=True):
return batch_inputs, batch_labels return batch_inputs, batch_labels
class NeighborSampler(object):
def __init__(self, g, fanouts, sample_neighbors, device, load_feat=True):
self.g = g
self.fanouts = fanouts
self.sample_neighbors = sample_neighbors
self.device = device
self.load_feat = load_feat
def sample_blocks(self, seeds):
seeds = th.LongTensor(np.asarray(seeds))
blocks = []
for fanout in self.fanouts:
# For each seed node, sample ``fanout`` neighbors.
frontier = self.sample_neighbors(
self.g, seeds, fanout, replace=True
)
# Then we compact the frontier into a bipartite graph for message passing.
block = dgl.to_block(frontier, seeds)
# Obtain the seed nodes for next layer.
seeds = block.srcdata[dgl.NID]
blocks.insert(0, block)
input_nodes = blocks[0].srcdata[dgl.NID]
seeds = blocks[-1].dstdata[dgl.NID]
batch_inputs, batch_labels = load_subtensor(
self.g, seeds, input_nodes, "cpu", self.load_feat
)
if self.load_feat:
blocks[0].srcdata["features"] = batch_inputs
blocks[-1].dstdata["labels"] = batch_labels
return blocks
class DistSAGE(nn.Module): class DistSAGE(nn.Module):
def __init__( def __init__(
self, in_feats, n_hidden, n_classes, n_layers, activation, dropout self, in_feats, n_hidden, n_classes, n_layers, activation, dropout
...@@ -89,72 +41,68 @@ class DistSAGE(nn.Module): ...@@ -89,72 +41,68 @@ class DistSAGE(nn.Module):
def forward(self, blocks, x): def forward(self, blocks, x):
h = x h = x
for l, (layer, block) in enumerate(zip(self.layers, blocks)): for i, (layer, block) in enumerate(zip(self.layers, blocks)):
h = layer(block, h) h = layer(block, h)
if l != len(self.layers) - 1: if i != len(self.layers) - 1:
h = self.activation(h) h = self.activation(h)
h = self.dropout(h) h = self.dropout(h)
return h return h
def inference(self, g, x, batch_size, device): def inference(self, g, x, batch_size, device):
""" """
Inference with the GraphSAGE model on full neighbors (i.e. without neighbor sampling). Inference with the GraphSAGE model on full neighbors (i.e. without
neighbor sampling).
g : the entire graph. g : the entire graph.
x : the input of entire node set. x : the input of entire node set.
The inference code is written in a fashion that it could handle any number of nodes and Distributed layer-wise inference.
layers.
""" """
# During inference with sampling, multi-layer blocks are very inefficient because # During inference with sampling, multi-layer blocks are very
# lots of computations in the first few layers are repeated. # inefficient because lots of computations in the first few layers
# Therefore, we compute the representation of all nodes layer by layer. The nodes # are repeated. Therefore, we compute the representation of all nodes
# on each layer are of course splitted in batches. # layer by layer. The nodes on each layer are of course splitted in
# batches.
# TODO: can we standardize this? # TODO: can we standardize this?
nodes = dgl.distributed.node_split( nodes = dgl.distributed.node_split(
np.arange(g.number_of_nodes()), np.arange(g.num_nodes()),
g.get_partition_book(), g.get_partition_book(),
force_even=True, force_even=True,
) )
y = dgl.distributed.DistTensor( y = dgl.distributed.DistTensor(
(g.number_of_nodes(), self.n_hidden), (g.num_nodes(), self.n_hidden),
th.float32, th.float32,
"h", "h",
persistent=True, persistent=True,
) )
for l, layer in enumerate(self.layers): for i, layer in enumerate(self.layers):
if l == len(self.layers) - 1: if i == len(self.layers) - 1:
y = dgl.distributed.DistTensor( y = dgl.distributed.DistTensor(
(g.number_of_nodes(), self.n_classes), (g.num_nodes(), self.n_classes),
th.float32, th.float32,
"h_last", "h_last",
persistent=True, persistent=True,
) )
sampler = NeighborSampler(
g, [-1], dgl.distributed.sample_neighbors, device
)
print( print(
"|V|={}, eval batch size: {}".format( f"|V|={g.num_nodes()}, eval batch size: {batch_size}"
g.number_of_nodes(), batch_size
)
) )
# Create PyTorch DataLoader for constructing blocks
dataloader = DistDataLoader( sampler = dgl.dataloading.NeighborSampler([-1])
dataset=nodes, dataloader = dgl.dataloading.DistNodeDataLoader(
g,
nodes,
sampler,
batch_size=batch_size, batch_size=batch_size,
collate_fn=sampler.sample_blocks,
shuffle=False, shuffle=False,
drop_last=False, drop_last=False,
) )
for blocks in tqdm.tqdm(dataloader): for input_nodes, output_nodes, blocks in tqdm.tqdm(dataloader):
block = blocks[0].to(device) block = blocks[0].to(device)
input_nodes = block.srcdata[dgl.NID]
output_nodes = block.dstdata[dgl.NID]
h = x[input_nodes].to(device) h = x[input_nodes].to(device)
h_dst = h[: block.number_of_dst_nodes()] h_dst = h[: block.number_of_dst_nodes()]
h = layer(block, (h, h_dst)) h = layer(block, (h, h_dst))
if l != len(self.layers) - 1: if i != len(self.layers) - 1:
h = self.activation(h) h = self.activation(h)
h = self.dropout(h) h = self.dropout(h)
...@@ -164,6 +112,11 @@ class DistSAGE(nn.Module): ...@@ -164,6 +112,11 @@ class DistSAGE(nn.Module):
g.barrier() g.barrier()
return y return y
@contextmanager
def join(self):
"""dummy join for standalone"""
yield
def compute_acc(pred, labels): def compute_acc(pred, labels):
""" """
...@@ -196,23 +149,18 @@ def run(args, device, data): ...@@ -196,23 +149,18 @@ def run(args, device, data):
# Unpack data # Unpack data
train_nid, val_nid, test_nid, in_feats, n_classes, g = data train_nid, val_nid, test_nid, in_feats, n_classes, g = data
shuffle = True shuffle = True
# Create sampler # prefetch_node_feats/prefetch_labels are not supported for DistGraph yet.
sampler = NeighborSampler( sampler = dgl.dataloading.NeighborSampler(
g, [int(fanout) for fanout in args.fan_out.split(",")]
[int(fanout) for fanout in args.fan_out.split(",")],
dgl.distributed.sample_neighbors,
device,
) )
dataloader = dgl.dataloading.DistNodeDataLoader(
# Create DataLoader for constructing blocks g,
dataloader = DistDataLoader( train_nid,
dataset=train_nid.numpy(), sampler,
batch_size=args.batch_size, batch_size=args.batch_size,
collate_fn=sampler.sample_blocks,
shuffle=shuffle, shuffle=shuffle,
drop_last=False, drop_last=False,
) )
# Define model and optimizer # Define model and optimizer
model = DistSAGE( model = DistSAGE(
in_feats, in_feats,
...@@ -247,28 +195,27 @@ def run(args, device, data): ...@@ -247,28 +195,27 @@ def run(args, device, data):
num_seeds = 0 num_seeds = 0
num_inputs = 0 num_inputs = 0
start = time.time() start = time.time()
# Loop over the dataloader to sample the computation dependency graph as a list of # Loop over the dataloader to sample the computation dependency graph
# blocks. # as a list of blocks.
step_time = [] step_time = []
with model.join(): with model.join():
for step, blocks in enumerate(dataloader): for step, (input_nodes, seeds, blocks) in enumerate(dataloader):
tic_step = time.time() tic_step = time.time()
sample_time += tic_step - start sample_time += tic_step - start
# fetch features/labels
# The nodes for input lies at the LHS side of the first block. batch_inputs, batch_labels = load_subtensor(
# The nodes for output lies at the RHS side of the last block. g, seeds, input_nodes, "cpu"
batch_inputs = blocks[0].srcdata["features"] )
batch_labels = blocks[-1].dstdata["labels"]
batch_labels = batch_labels.long() batch_labels = batch_labels.long()
num_seeds += len(blocks[-1].dstdata[dgl.NID]) num_seeds += len(blocks[-1].dstdata[dgl.NID])
num_inputs += len(blocks[0].srcdata[dgl.NID]) num_inputs += len(blocks[0].srcdata[dgl.NID])
# move to target device
blocks = [block.to(device) for block in blocks] blocks = [block.to(device) for block in blocks]
batch_inputs = batch_inputs.to(device)
batch_labels = batch_labels.to(device) batch_labels = batch_labels.to(device)
# Compute loss and prediction # Compute loss and prediction
start = time.time() start = time.time()
# print(g.rank(), blocks[0].device, model.module.layers[0].fc_neigh.weight.device, dev_id)
batch_pred = model(blocks, batch_inputs) batch_pred = model(blocks, batch_inputs)
loss = loss_fcn(batch_pred, batch_labels) loss = loss_fcn(batch_pred, batch_labels)
forward_end = time.time() forward_end = time.time()
...@@ -292,7 +239,9 @@ def run(args, device, data): ...@@ -292,7 +239,9 @@ def run(args, device, data):
else 0 else 0
) )
print( print(
"Part {} | Epoch {:05d} | Step {:05d} | Loss {:.4f} | Train Acc {:.4f} | Speed (samples/sec) {:.4f} | GPU {:.1f} MB | time {:.3f} s".format( "Part {} | Epoch {:05d} | Step {:05d} | Loss {:.4f} | "
"Train Acc {:.4f} | Speed (samples/sec) {:.4f} | GPU "
"{:.1f} MB | time {:.3f} s".format(
g.rank(), g.rank(),
epoch, epoch,
step, step,
...@@ -300,14 +249,16 @@ def run(args, device, data): ...@@ -300,14 +249,16 @@ def run(args, device, data):
acc.item(), acc.item(),
np.mean(iter_tput[3:]), np.mean(iter_tput[3:]),
gpu_mem_alloc, gpu_mem_alloc,
np.sum(step_time[-args.log_every :]), np.sum(step_time[-args.log_every:]),
) )
) )
start = time.time() start = time.time()
toc = time.time() toc = time.time()
print( print(
"Part {}, Epoch Time(s): {:.4f}, sample+data_copy: {:.4f}, forward: {:.4f}, backward: {:.4f}, update: {:.4f}, #seeds: {}, #inputs: {}".format( "Part {}, Epoch Time(s): {:.4f}, sample+data_copy: {:.4f}, "
"forward: {:.4f}, backward: {:.4f}, update: {:.4f}, #seeds: {}, "
"#inputs: {}".format(
g.rank(), g.rank(),
toc - tic, toc - tic,
sample_time, sample_time,
...@@ -323,7 +274,7 @@ def run(args, device, data): ...@@ -323,7 +274,7 @@ def run(args, device, data):
if epoch % args.eval_every == 0 and epoch != 0: if epoch % args.eval_every == 0 and epoch != 0:
start = time.time() start = time.time()
val_acc, test_acc = evaluate( val_acc, test_acc = evaluate(
model.module, model if args.standalone else model.module,
g, g,
g.ndata["features"], g.ndata["features"],
g.ndata["labels"], g.ndata["labels"],
...@@ -333,7 +284,8 @@ def run(args, device, data): ...@@ -333,7 +284,8 @@ def run(args, device, data):
device, device,
) )
print( print(
"Part {}, Val Acc {:.4f}, Test Acc {:.4f}, time: {:.4f}".format( "Part {}, Val Acc {:.4f}, Test Acc {:.4f}, time: {:.4f}".format
(
g.rank(), val_acc, test_acc, time.time() - start g.rank(), val_acc, test_acc, time.time() - start
) )
) )
...@@ -346,7 +298,10 @@ def main(args): ...@@ -346,7 +298,10 @@ def main(args):
print(socket.gethostname(), "Initializing DGL process group") print(socket.gethostname(), "Initializing DGL process group")
th.distributed.init_process_group(backend=args.backend) th.distributed.init_process_group(backend=args.backend)
print(socket.gethostname(), "Initializing DistGraph") print(socket.gethostname(), "Initializing DistGraph")
g = dgl.distributed.DistGraph(args.graph_name, part_config=args.part_config) g = dgl.distributed.DistGraph(
args.graph_name,
part_config=args.part_config
)
print(socket.gethostname(), "rank:", g.rank()) print(socket.gethostname(), "rank:", g.rank())
pb = g.get_partition_book() pb = g.get_partition_book()
...@@ -381,7 +336,8 @@ def main(args): ...@@ -381,7 +336,8 @@ def main(args):
) )
local_nid = pb.partid2nids(pb.partid).detach().numpy() local_nid = pb.partid2nids(pb.partid).detach().numpy()
print( print(
"part {}, train: {} (local: {}), val: {} (local: {}), test: {} (local: {})".format( "part {}, train: {} (local: {}), val: {} (local: {}), test: {} "
"(local: {})".format(
g.rank(), g.rank(),
len(train_nid), len(train_nid),
len(np.intersect1d(train_nid.numpy(), local_nid)), len(np.intersect1d(train_nid.numpy(), local_nid)),
...@@ -398,8 +354,8 @@ def main(args): ...@@ -398,8 +354,8 @@ def main(args):
dev_id = g.rank() % args.num_gpus dev_id = g.rank() % args.num_gpus
device = th.device("cuda:" + str(dev_id)) device = th.device("cuda:" + str(dev_id))
n_classes = args.n_classes n_classes = args.n_classes
if n_classes == -1: if n_classes == 0:
labels = g.ndata["labels"][np.arange(g.number_of_nodes())] labels = g.ndata["labels"][np.arange(g.num_nodes())]
n_classes = len(th.unique(labels[th.logical_not(th.isnan(labels))])) n_classes = len(th.unique(labels[th.logical_not(th.isnan(labels))]))
del labels del labels
print("#labels:", n_classes) print("#labels:", n_classes)
...@@ -413,7 +369,6 @@ def main(args): ...@@ -413,7 +369,6 @@ def main(args):
if __name__ == "__main__": if __name__ == "__main__":
parser = argparse.ArgumentParser(description="GCN") parser = argparse.ArgumentParser(description="GCN")
register_data_args(parser)
parser.add_argument("--graph_name", type=str, help="graph name") parser.add_argument("--graph_name", type=str, help="graph name")
parser.add_argument("--id", type=int, help="the partition id") parser.add_argument("--id", type=int, help="the partition id")
parser.add_argument( parser.add_argument(
...@@ -422,14 +377,8 @@ if __name__ == "__main__": ...@@ -422,14 +377,8 @@ if __name__ == "__main__":
parser.add_argument( parser.add_argument(
"--part_config", type=str, help="The path to the partition config file" "--part_config", type=str, help="The path to the partition config file"
) )
parser.add_argument("--num_clients", type=int, help="The number of clients")
parser.add_argument( parser.add_argument(
"--n_classes", "--n_classes", type=int, default=0, help="the number of classes"
type=int,
default=-1,
help="The number of classes. If not specified, this"
" value will be calculated via scaning all the labels"
" in the dataset which probably causes memory burst.",
) )
parser.add_argument( parser.add_argument(
"--backend", "--backend",
...@@ -463,7 +412,8 @@ if __name__ == "__main__": ...@@ -463,7 +412,8 @@ if __name__ == "__main__":
"--pad-data", "--pad-data",
default=False, default=False,
action="store_true", action="store_true",
help="Pad train nid to the same length across machine, to ensure num of batches to be the same.", help="Pad train nid to the same length across machine, to ensure num "
"of batches to be the same.",
) )
parser.add_argument( parser.add_argument(
"--net_type", "--net_type",
......
import os
os.environ["DGLBACKEND"] = "pytorch"
import argparse import argparse
import math
import time import time
from functools import wraps
from multiprocessing import Process
import numpy as np import numpy as np
import torch as th import torch as th
import torch.multiprocessing as mp
import torch.nn as nn import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
import torch.optim as optim import torch.optim as optim
import tqdm
from torch.utils.data import DataLoader
from train_dist import DistSAGE, NeighborSampler, compute_acc
import dgl import dgl
import dgl.function as fn from dgl.distributed import DistEmbedding
import dgl.nn.pytorch as dglnn from train_dist import DistSAGE, compute_acc
from dgl import DGLGraph
from dgl.data import load_data, register_data_args
from dgl.data.utils import load_graphs
from dgl.distributed import DistDataLoader, DistEmbedding
class TransDistSAGE(DistSAGE):
def __init__(
self, in_feats, n_hidden, n_classes, n_layers, activation, dropout
):
super(TransDistSAGE, self).__init__(
in_feats, n_hidden, n_classes, n_layers, activation, dropout
)
def inference(self, standalone, g, x, batch_size, device):
"""
Inference with the GraphSAGE model on full neighbors (i.e. without neighbor sampling).
g : the entire graph.
x : the input of entire node set.
The inference code is written in a fashion that it could handle any number of nodes and
layers.
"""
# During inference with sampling, multi-layer blocks are very inefficient because
# lots of computations in the first few layers are repeated.
# Therefore, we compute the representation of all nodes layer by layer. The nodes
# on each layer are of course splitted in batches.
# TODO: can we standardize this?
nodes = dgl.distributed.node_split(
np.arange(g.number_of_nodes()),
g.get_partition_book(),
force_even=True,
)
y = dgl.distributed.DistTensor(
(g.number_of_nodes(), self.n_hidden),
th.float32,
"h",
persistent=True,
)
for l, layer in enumerate(self.layers):
if l == len(self.layers) - 1:
y = dgl.distributed.DistTensor(
(g.number_of_nodes(), self.n_classes),
th.float32,
"h_last",
persistent=True,
)
sampler = NeighborSampler(
g,
[-1],
dgl.distributed.sample_neighbors,
device,
load_feat=False,
)
print(
"|V|={}, eval batch size: {}".format(
g.number_of_nodes(), batch_size
)
)
# Create PyTorch DataLoader for constructing blocks
dataloader = DistDataLoader(
dataset=nodes,
batch_size=batch_size,
collate_fn=sampler.sample_blocks,
shuffle=False,
drop_last=False,
)
for blocks in tqdm.tqdm(dataloader):
block = blocks[0].to(device)
input_nodes = block.srcdata[dgl.NID]
output_nodes = block.dstdata[dgl.NID]
h = x[input_nodes].to(device)
h_dst = h[: block.number_of_dst_nodes()]
h = layer(block, (h, h_dst))
if l != len(self.layers) - 1:
h = self.activation(h)
h = self.dropout(h)
y[output_nodes] = h.cpu()
x = y
g.barrier()
return y
def initializer(shape, dtype): def initializer(shape, dtype):
...@@ -114,7 +18,9 @@ def initializer(shape, dtype): ...@@ -114,7 +18,9 @@ def initializer(shape, dtype):
class DistEmb(nn.Module): class DistEmb(nn.Module):
def __init__(self, num_nodes, emb_size, dgl_sparse_emb=False, dev_id="cpu"): def __init__(
self, num_nodes, emb_size, dgl_sparse_emb=False, dev_id="cpu"
):
super().__init__() super().__init__()
self.dev_id = dev_id self.dev_id = dev_id
self.emb_size = emb_size self.emb_size = emb_size
...@@ -138,11 +44,11 @@ class DistEmb(nn.Module): ...@@ -138,11 +44,11 @@ class DistEmb(nn.Module):
def load_embs(standalone, emb_layer, g): def load_embs(standalone, emb_layer, g):
nodes = dgl.distributed.node_split( nodes = dgl.distributed.node_split(
np.arange(g.number_of_nodes()), g.get_partition_book(), force_even=True np.arange(g.num_nodes()), g.get_partition_book(), force_even=True
) )
x = dgl.distributed.DistTensor( x = dgl.distributed.DistTensor(
( (
g.number_of_nodes(), g.num_nodes(),
emb_layer.module.emb_size emb_layer.module.emb_size
if isinstance(emb_layer, th.nn.parallel.DistributedDataParallel) if isinstance(emb_layer, th.nn.parallel.DistributedDataParallel)
else emb_layer.emb_size, else emb_layer.emb_size,
...@@ -154,7 +60,7 @@ def load_embs(standalone, emb_layer, g): ...@@ -154,7 +60,7 @@ def load_embs(standalone, emb_layer, g):
num_nodes = nodes.shape[0] num_nodes = nodes.shape[0]
for i in range((num_nodes + 1023) // 1024): for i in range((num_nodes + 1023) // 1024):
idx = nodes[ idx = nodes[
i * 1024 : (i + 1) * 1024 i * 1024: (i + 1) * 1024
if (i + 1) * 1024 < num_nodes if (i + 1) * 1024 < num_nodes
else num_nodes else num_nodes
] ]
...@@ -187,11 +93,13 @@ def evaluate( ...@@ -187,11 +93,13 @@ def evaluate(
batch_size : Number of nodes to compute at the same time. batch_size : Number of nodes to compute at the same time.
device : The GPU device to evaluate on. device : The GPU device to evaluate on.
""" """
if not standalone:
model = model.module
model.eval() model.eval()
emb_layer.eval() emb_layer.eval()
with th.no_grad(): with th.no_grad():
inputs = load_embs(standalone, emb_layer, g) inputs = load_embs(standalone, emb_layer, g)
pred = model.inference(standalone, g, inputs, batch_size, device) pred = model.inference(g, inputs, batch_size, device)
model.train() model.train()
emb_layer.train() emb_layer.train()
return compute_acc(pred[val_nid], labels[val_nid]), compute_acc( return compute_acc(pred[val_nid], labels[val_nid]), compute_acc(
...@@ -202,24 +110,17 @@ def evaluate( ...@@ -202,24 +110,17 @@ def evaluate(
def run(args, device, data): def run(args, device, data):
# Unpack data # Unpack data
train_nid, val_nid, test_nid, n_classes, g = data train_nid, val_nid, test_nid, n_classes, g = data
# Create sampler sampler = dgl.dataloading.NeighborSampler(
sampler = NeighborSampler( [int(fanout) for fanout in args.fan_out.split(",")]
g,
[int(fanout) for fanout in args.fan_out.split(",")],
dgl.distributed.sample_neighbors,
device,
load_feat=False,
) )
dataloader = dgl.dataloading.DistNodeDataLoader(
# Create DataLoader for constructing blocks g,
dataloader = DistDataLoader( train_nid,
dataset=train_nid.numpy(), sampler,
batch_size=args.batch_size, batch_size=args.batch_size,
collate_fn=sampler.sample_blocks,
shuffle=True, shuffle=True,
drop_last=False, drop_last=False,
) )
# Define model and optimizer # Define model and optimizer
emb_layer = DistEmb( emb_layer = DistEmb(
g.num_nodes(), g.num_nodes(),
...@@ -227,7 +128,7 @@ def run(args, device, data): ...@@ -227,7 +128,7 @@ def run(args, device, data):
dgl_sparse_emb=args.dgl_sparse, dgl_sparse_emb=args.dgl_sparse,
dev_id=device, dev_id=device,
) )
model = TransDistSAGE( model = DistSAGE(
args.num_hidden, args.num_hidden,
args.num_hidden, args.num_hidden,
n_classes, n_classes,
...@@ -263,9 +164,10 @@ def run(args, device, data): ...@@ -263,9 +164,10 @@ def run(args, device, data):
emb_optimizer = th.optim.SparseAdam( emb_optimizer = th.optim.SparseAdam(
list(emb_layer.module.sparse_emb.parameters()), lr=args.sparse_lr list(emb_layer.module.sparse_emb.parameters()), lr=args.sparse_lr
) )
print("optimize Pytorch sparse embedding:", emb_layer.module.sparse_emb) print(
"optimize Pytorch sparse embedding:",
train_size = th.sum(g.ndata["train_mask"][0 : g.number_of_nodes()]) emb_layer.module.sparse_emb
)
# Training loop # Training loop
iter_tput = [] iter_tput = []
...@@ -280,26 +182,20 @@ def run(args, device, data): ...@@ -280,26 +182,20 @@ def run(args, device, data):
num_seeds = 0 num_seeds = 0
num_inputs = 0 num_inputs = 0
start = time.time() start = time.time()
# Loop over the dataloader to sample the computation dependency graph as a list of with model.join():
# blocks. # Loop over the dataloader to sample the computation dependency
# graph as a list of blocks.
step_time = [] step_time = []
for step, blocks in enumerate(dataloader): for step, (input_nodes, seeds, blocks) in enumerate(dataloader):
tic_step = time.time() tic_step = time.time()
sample_time += tic_step - start sample_time += tic_step - start
# The nodes for input lies at the LHS side of the first block.
# The nodes for output lies at the RHS side of the last block.
batch_inputs = blocks[0].srcdata[dgl.NID]
batch_labels = blocks[-1].dstdata["labels"]
batch_labels = batch_labels.long()
num_seeds += len(blocks[-1].dstdata[dgl.NID]) num_seeds += len(blocks[-1].dstdata[dgl.NID])
num_inputs += len(blocks[0].srcdata[dgl.NID]) num_inputs += len(blocks[0].srcdata[dgl.NID])
blocks = [block.to(device) for block in blocks] blocks = [block.to(device) for block in blocks]
batch_labels = batch_labels.to(device) batch_labels = g.ndata["labels"][seeds].long().to(device)
# Compute loss and prediction # Compute loss and prediction
start = time.time() start = time.time()
batch_inputs = emb_layer(batch_inputs) batch_inputs = emb_layer(input_nodes)
batch_pred = model(blocks, batch_inputs) batch_pred = model(blocks, batch_inputs)
loss = loss_fcn(batch_pred, batch_labels) loss = loss_fcn(batch_pred, batch_labels)
forward_end = time.time() forward_end = time.time()
...@@ -325,7 +221,9 @@ def run(args, device, data): ...@@ -325,7 +221,9 @@ def run(args, device, data):
else 0 else 0
) )
print( print(
"Part {} | Epoch {:05d} | Step {:05d} | Loss {:.4f} | Train Acc {:.4f} | Speed (samples/sec) {:.4f} | GPU {:.1f} MB | time {:.3f} s".format( "Part {} | Epoch {:05d} | Step {:05d} | Loss {:.4f} | "
"Train Acc {:.4f} | Speed (samples/sec) {:.4f} | GPU "
"{:.1f} MB | time {:.3f} s".format(
g.rank(), g.rank(),
epoch, epoch,
step, step,
...@@ -333,14 +231,16 @@ def run(args, device, data): ...@@ -333,14 +231,16 @@ def run(args, device, data):
acc.item(), acc.item(),
np.mean(iter_tput[3:]), np.mean(iter_tput[3:]),
gpu_mem_alloc, gpu_mem_alloc,
np.sum(step_time[-args.log_every :]), np.sum(step_time[-args.log_every:]),
) )
) )
start = time.time() start = time.time()
toc = time.time() toc = time.time()
print( print(
"Part {}, Epoch Time(s): {:.4f}, sample+data_copy: {:.4f}, forward: {:.4f}, backward: {:.4f}, update: {:.4f}, #seeds: {}, #inputs: {}".format( "Part {}, Epoch Time(s): {:.4f}, sample+data_copy: {:.4f}, forward"
": {:.4f}, backward: {:.4f}, update: {:.4f}, #seeds: {}, #inputs"
": {}".format(
g.rank(), g.rank(),
toc - tic, toc - tic,
sample_time, sample_time,
...@@ -357,7 +257,7 @@ def run(args, device, data): ...@@ -357,7 +257,7 @@ def run(args, device, data):
start = time.time() start = time.time()
val_acc, test_acc = evaluate( val_acc, test_acc = evaluate(
args.standalone, args.standalone,
model.module, model,
emb_layer, emb_layer,
g, g,
g.ndata["labels"], g.ndata["labels"],
...@@ -367,7 +267,8 @@ def run(args, device, data): ...@@ -367,7 +267,8 @@ def run(args, device, data):
device, device,
) )
print( print(
"Part {}, Val Acc {:.4f}, Test Acc {:.4f}, time: {:.4f}".format( "Part {}, Val Acc {:.4f}, Test Acc {:.4f}, time: {:.4f}".format
(
g.rank(), val_acc, test_acc, time.time() - start g.rank(), val_acc, test_acc, time.time() - start
) )
) )
...@@ -377,7 +278,10 @@ def main(args): ...@@ -377,7 +278,10 @@ def main(args):
dgl.distributed.initialize(args.ip_config) dgl.distributed.initialize(args.ip_config)
if not args.standalone: if not args.standalone:
th.distributed.init_process_group(backend="gloo") th.distributed.init_process_group(backend="gloo")
g = dgl.distributed.DistGraph(args.graph_name, part_config=args.part_config) g = dgl.distributed.DistGraph(
args.graph_name,
part_config=args.part_config
)
print("rank:", g.rank()) print("rank:", g.rank())
pb = g.get_partition_book() pb = g.get_partition_book()
...@@ -392,7 +296,8 @@ def main(args): ...@@ -392,7 +296,8 @@ def main(args):
) )
local_nid = pb.partid2nids(pb.partid).detach().numpy() local_nid = pb.partid2nids(pb.partid).detach().numpy()
print( print(
"part {}, train: {} (local: {}), val: {} (local: {}), test: {} (local: {})".format( "part {}, train: {} (local: {}), val: {} (local: {}), test: {} "
"(local: {})".format(
g.rank(), g.rank(),
len(train_nid), len(train_nid),
len(np.intersect1d(train_nid.numpy(), local_nid)), len(np.intersect1d(train_nid.numpy(), local_nid)),
...@@ -405,8 +310,9 @@ def main(args): ...@@ -405,8 +310,9 @@ def main(args):
if args.num_gpus == -1: if args.num_gpus == -1:
device = th.device("cpu") device = th.device("cpu")
else: else:
device = th.device("cuda:" + str(args.local_rank)) dev_id = g.rank() % args.num_gpus
labels = g.ndata["labels"][np.arange(g.number_of_nodes())] device = th.device("cuda:" + str(dev_id))
labels = g.ndata["labels"][np.arange(g.num_nodes())]
n_classes = len(th.unique(labels[th.logical_not(th.isnan(labels))])) n_classes = len(th.unique(labels[th.logical_not(th.isnan(labels))]))
print("#labels:", n_classes) print("#labels:", n_classes)
...@@ -418,7 +324,6 @@ def main(args): ...@@ -418,7 +324,6 @@ def main(args):
if __name__ == "__main__": if __name__ == "__main__":
parser = argparse.ArgumentParser(description="GCN") parser = argparse.ArgumentParser(description="GCN")
register_data_args(parser)
parser.add_argument("--graph_name", type=str, help="graph name") parser.add_argument("--graph_name", type=str, help="graph name")
parser.add_argument("--id", type=int, help="the partition id") parser.add_argument("--id", type=int, help="the partition id")
parser.add_argument( parser.add_argument(
...@@ -427,7 +332,6 @@ if __name__ == "__main__": ...@@ -427,7 +332,6 @@ if __name__ == "__main__":
parser.add_argument( parser.add_argument(
"--part_config", type=str, help="The path to the partition config file" "--part_config", type=str, help="The path to the partition config file"
) )
parser.add_argument("--num_clients", type=int, help="The number of clients")
parser.add_argument("--n_classes", type=int, help="the number of classes") parser.add_argument("--n_classes", type=int, help="the number of classes")
parser.add_argument( parser.add_argument(
"--num_gpus", "--num_gpus",
......
import os
os.environ["DGLBACKEND"] = "pytorch"
import argparse import argparse
import math
import time import time
from functools import wraps from contextlib import contextmanager
from multiprocessing import Process
import numpy as np import numpy as np
import sklearn.linear_model as lm import sklearn.linear_model as lm
import sklearn.metrics as skm import sklearn.metrics as skm
import torch as th import torch as th
import torch.multiprocessing as mp
import torch.nn as nn import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
import torch.optim as optim import torch.optim as optim
import tqdm import tqdm
import dgl import dgl
import dgl.function as fn import dgl.function as fn
import dgl.nn.pytorch as dglnn import dgl.nn.pytorch as dglnn
from dgl import DGLGraph
from dgl.data import load_data, register_data_args
from dgl.data.utils import load_graphs
from dgl.distributed import DistDataLoader
class SAGE(nn.Module): class DistSAGE(nn.Module):
def __init__( def __init__(
self, in_feats, n_hidden, n_classes, n_layers, activation, dropout self, in_feats, n_hidden, n_classes, n_layers, activation, dropout
): ):
...@@ -44,224 +32,66 @@ class SAGE(nn.Module): ...@@ -44,224 +32,66 @@ class SAGE(nn.Module):
def forward(self, blocks, x): def forward(self, blocks, x):
h = x h = x
for l, (layer, block) in enumerate(zip(self.layers, blocks)): for i, (layer, block) in enumerate(zip(self.layers, blocks)):
h = layer(block, h) h = layer(block, h)
if l != len(self.layers) - 1: if i != len(self.layers) - 1:
h = self.activation(h) h = self.activation(h)
h = self.dropout(h) h = self.dropout(h)
return h return h
def inference(self, g, x, batch_size, device): def inference(self, g, x, batch_size, device):
""" """
Inference with the GraphSAGE model on full neighbors (i.e. without neighbor sampling). Inference with the GraphSAGE model on full neighbors (i.e. without
g : the entire graph. neighbor sampling).
x : the input of entire node set.
The inference code is written in a fashion that it could handle any number of nodes and
layers.
"""
# During inference with sampling, multi-layer blocks are very inefficient because
# lots of computations in the first few layers are repeated.
# Therefore, we compute the representation of all nodes layer by layer. The nodes
# on each layer are of course splitted in batches.
# TODO: can we standardize this?
for l, layer in enumerate(self.layers):
y = th.zeros(
g.number_of_nodes(),
self.n_hidden if l != len(self.layers) - 1 else self.n_classes,
)
sampler = dgl.dataloading.MultiLayerNeighborSampler([None])
dataloader = dgl.dataloading.DistNodeDataLoader(
g,
th.arange(g.number_of_nodes()),
sampler,
batch_size=batch_size,
shuffle=True,
drop_last=False,
num_workers=0,
)
for input_nodes, output_nodes, blocks in tqdm.tqdm(dataloader):
block = blocks[0]
block = block.int().to(device)
h = x[input_nodes].to(device)
h = layer(block, h)
if l != len(self.layers) - 1:
h = self.activation(h)
h = self.dropout(h)
y[output_nodes] = h.cpu()
x = y
return y
class NegativeSampler(object):
def __init__(self, g, neg_nseeds):
self.neg_nseeds = neg_nseeds
def __call__(self, num_samples):
# select local neg nodes as seeds
return self.neg_nseeds[
th.randint(self.neg_nseeds.shape[0], (num_samples,))
]
class NeighborSampler(object):
def __init__(
self, g, fanouts, neg_nseeds, sample_neighbors, num_negs, remove_edge
):
self.g = g
self.fanouts = fanouts
self.sample_neighbors = sample_neighbors
self.neg_sampler = NegativeSampler(g, neg_nseeds)
self.num_negs = num_negs
self.remove_edge = remove_edge
def sample_blocks(self, seed_edges):
n_edges = len(seed_edges)
seed_edges = th.LongTensor(np.asarray(seed_edges))
heads, tails = self.g.find_edges(seed_edges)
neg_tails = self.neg_sampler(self.num_negs * n_edges)
neg_heads = heads.view(-1, 1).expand(n_edges, self.num_negs).flatten()
# Maintain the correspondence between heads, tails and negative tails as two
# graphs.
# pos_graph contains the correspondence between each head and its positive tail.
# neg_graph contains the correspondence between each head and its negative tails.
# Both pos_graph and neg_graph are first constructed with the same node space as
# the original graph. Then they are compacted together with dgl.compact_graphs.
pos_graph = dgl.graph(
(heads, tails), num_nodes=self.g.number_of_nodes()
)
neg_graph = dgl.graph(
(neg_heads, neg_tails), num_nodes=self.g.number_of_nodes()
)
pos_graph, neg_graph = dgl.compact_graphs([pos_graph, neg_graph])
seeds = pos_graph.ndata[dgl.NID]
blocks = []
for fanout in self.fanouts:
# For each seed node, sample ``fanout`` neighbors.
frontier = self.sample_neighbors(
self.g, seeds, fanout, replace=True
)
if self.remove_edge:
# Remove all edges between heads and tails, as well as heads and neg_tails.
_, _, edge_ids = frontier.edge_ids(
th.cat([heads, tails, neg_heads, neg_tails]),
th.cat([tails, heads, neg_tails, neg_heads]),
return_uv=True,
)
frontier = dgl.remove_edges(frontier, edge_ids)
# Then we compact the frontier into a bipartite graph for message passing.
block = dgl.to_block(frontier, seeds)
# Obtain the seed nodes for next layer.
seeds = block.srcdata[dgl.NID]
blocks.insert(0, block)
input_nodes = blocks[0].srcdata[dgl.NID]
blocks[0].srcdata["features"] = load_subtensor(
self.g, input_nodes, "cpu"
)
# Pre-generate CSR format that it can be used in training directly
return pos_graph, neg_graph, blocks
class PosNeighborSampler(object):
def __init__(self, g, fanouts, sample_neighbors):
self.g = g
self.fanouts = fanouts
self.sample_neighbors = sample_neighbors
def sample_blocks(self, seeds):
seeds = th.LongTensor(np.asarray(seeds))
blocks = []
for fanout in self.fanouts:
# For each seed node, sample ``fanout`` neighbors.
frontier = self.sample_neighbors(
self.g, seeds, fanout, replace=True
)
# Then we compact the frontier into a bipartite graph for message passing.
block = dgl.to_block(frontier, seeds)
# Obtain the seed nodes for next layer.
seeds = block.srcdata[dgl.NID]
blocks.insert(0, block)
return blocks
class DistSAGE(SAGE):
def __init__(
self, in_feats, n_hidden, n_classes, n_layers, activation, dropout
):
super(DistSAGE, self).__init__(
in_feats, n_hidden, n_classes, n_layers, activation, dropout
)
def inference(self, g, x, batch_size, device):
"""
Inference with the GraphSAGE model on full neighbors (i.e. without neighbor sampling).
g : the entire graph. g : the entire graph.
x : the input of entire node set. x : the input of entire node set.
The inference code is written in a fashion that it could handle any number of nodes and The inference code is written in a fashion that it could handle any
layers. number of nodes and layers.
""" """
# During inference with sampling, multi-layer blocks are very inefficient because # During inference with sampling, multi-layer blocks are very
# lots of computations in the first few layers are repeated. # inefficient because lots of computations in the first few layers are
# Therefore, we compute the representation of all nodes layer by layer. The nodes # repeated. Therefore, we compute the representation of all nodes layer
# on each layer are of course splitted in batches. # by layer. The nodes on each layer are of course splitted in batches.
# TODO: can we standardize this? # TODO: can we standardize this?
nodes = dgl.distributed.node_split( nodes = dgl.distributed.node_split(
np.arange(g.number_of_nodes()), np.arange(g.num_nodes()),
g.get_partition_book(), g.get_partition_book(),
force_even=True, force_even=True,
) )
y = dgl.distributed.DistTensor( y = dgl.distributed.DistTensor(
(g.number_of_nodes(), self.n_hidden), (g.num_nodes(), self.n_hidden),
th.float32, th.float32,
"h", "h",
persistent=True, persistent=True,
) )
for l, layer in enumerate(self.layers): for i, layer in enumerate(self.layers):
if l == len(self.layers) - 1: if i == len(self.layers) - 1:
y = dgl.distributed.DistTensor( y = dgl.distributed.DistTensor(
(g.number_of_nodes(), self.n_classes), (g.num_nodes(), self.n_classes),
th.float32, th.float32,
"h_last", "h_last",
persistent=True, persistent=True,
) )
# Create sampler
sampler = PosNeighborSampler( sampler = dgl.dataloading.NeighborSampler([-1])
g, [-1], dgl.distributed.sample_neighbors # Create dataloader
) dataloader = dgl.dataloading.DistNodeDataLoader(
print( g,
"|V|={}, eval batch size: {}".format( nodes,
g.number_of_nodes(), batch_size sampler,
)
)
# Create PyTorch DataLoader for constructing blocks
dataloader = DistDataLoader(
dataset=nodes,
batch_size=batch_size, batch_size=batch_size,
collate_fn=sampler.sample_blocks,
shuffle=False, shuffle=False,
drop_last=False, drop_last=False,
) )
for blocks in tqdm.tqdm(dataloader): for input_nodes, output_nodes, blocks in tqdm.tqdm(dataloader):
block = blocks[0].to(device) block = blocks[0].to(device)
input_nodes = block.srcdata[dgl.NID]
output_nodes = block.dstdata[dgl.NID]
h = x[input_nodes].to(device) h = x[input_nodes].to(device)
h_dst = h[: block.number_of_dst_nodes()] h_dst = h[: block.number_of_dst_nodes()]
h = layer(block, (h, h_dst)) h = layer(block, (h, h_dst))
if l != len(self.layers) - 1: if i != len(self.layers) - 1:
h = self.activation(h) h = self.activation(h)
h = self.dropout(h) h = self.dropout(h)
...@@ -271,6 +101,11 @@ class DistSAGE(SAGE): ...@@ -271,6 +101,11 @@ class DistSAGE(SAGE):
g.barrier() g.barrier()
return y return y
@contextmanager
def join(self):
"""dummy join for standalone"""
yield
def load_subtensor(g, input_nodes, device): def load_subtensor(g, input_nodes, device):
""" """
...@@ -359,24 +194,24 @@ def run(args, device, data): ...@@ -359,24 +194,24 @@ def run(args, device, data):
labels, labels,
) = data ) = data
# Create sampler # Create sampler
sampler = NeighborSampler( neg_sampler = dgl.dataloading.negative_sampler.Uniform(args.num_negs)
sampler = dgl.dataloading.NeighborSampler(
[int(fanout) for fanout in args.fan_out.split(",")]
)
# Create dataloader
exclude = "reverse_id" if args.remove_edge else None
reverse_eids = th.arange(g.num_edges()) if args.remove_edge else None
dataloader = dgl.dataloading.DistEdgeDataLoader(
g, g,
[int(fanout) for fanout in args.fan_out.split(",")], train_eids,
train_nids, sampler,
dgl.distributed.sample_neighbors, negative_sampler=neg_sampler,
args.num_negs, exclude=exclude,
args.remove_edge, reverse_eids=reverse_eids,
)
# Create PyTorch DataLoader for constructing blocks
dataloader = dgl.distributed.DistDataLoader(
dataset=train_eids.numpy(),
batch_size=args.batch_size, batch_size=args.batch_size,
collate_fn=sampler.sample_blocks,
shuffle=True, shuffle=True,
drop_last=False, drop_last=False,
) )
# Define model and optimizer # Define model and optimizer
model = DistSAGE( model = DistSAGE(
in_feats, in_feats,
...@@ -402,16 +237,10 @@ def run(args, device, data): ...@@ -402,16 +237,10 @@ def run(args, device, data):
# Training loop # Training loop
epoch = 0 epoch = 0
for epoch in range(args.num_epochs): for epoch in range(args.num_epochs):
sample_time = 0
copy_time = 0
forward_time = 0
backward_time = 0
update_time = 0
num_seeds = 0 num_seeds = 0
num_inputs = 0 num_inputs = 0
step_time = [] step_time = []
iter_t = []
sample_t = [] sample_t = []
feat_copy_t = [] feat_copy_t = []
forward_t = [] forward_t = []
...@@ -420,22 +249,22 @@ def run(args, device, data): ...@@ -420,22 +249,22 @@ def run(args, device, data):
iter_tput = [] iter_tput = []
start = time.time() start = time.time()
# Loop over the dataloader to sample the computation dependency graph as a list of with model.join():
# blocks. # Loop over the dataloader to sample the computation dependency
for step, (pos_graph, neg_graph, blocks) in enumerate(dataloader): # graph as a list of blocks.
for step, (input_nodes, pos_graph, neg_graph, blocks) in enumerate(
dataloader
):
tic_step = time.time() tic_step = time.time()
sample_t.append(tic_step - start) sample_t.append(tic_step - start)
copy_t = time.time()
pos_graph = pos_graph.to(device) pos_graph = pos_graph.to(device)
neg_graph = neg_graph.to(device) neg_graph = neg_graph.to(device)
blocks = [block.to(device) for block in blocks] blocks = [block.to(device) for block in blocks]
# The nodes for input lies at the LHS side of the first block. batch_inputs = load_subtensor(g, input_nodes, device)
# The nodes for output lies at the RHS side of the last block.
# Load the input features as well as output labels
batch_inputs = blocks[0].srcdata["features"]
copy_time = time.time() copy_time = time.time()
feat_copy_t.append(copy_time - tic_step) feat_copy_t.append(copy_time - copy_t)
# Compute loss and prediction # Compute loss and prediction
batch_pred = model(blocks, batch_inputs) batch_pred = model(blocks, batch_inputs)
...@@ -451,8 +280,7 @@ def run(args, device, data): ...@@ -451,8 +280,7 @@ def run(args, device, data):
optimizer.step() optimizer.step()
update_t.append(time.time() - compute_end) update_t.append(time.time() - compute_end)
pos_edges = pos_graph.number_of_edges() pos_edges = pos_graph.num_edges()
neg_edges = neg_graph.number_of_edges()
step_t = time.time() - start step_t = time.time() - start
step_time.append(step_t) step_time.append(step_t)
...@@ -460,25 +288,29 @@ def run(args, device, data): ...@@ -460,25 +288,29 @@ def run(args, device, data):
num_seeds += pos_edges num_seeds += pos_edges
if step % args.log_every == 0: if step % args.log_every == 0:
print( print(
"[{}] Epoch {:05d} | Step {:05d} | Loss {:.4f} | Speed (samples/sec) {:.4f} | time {:.3f} s" "[{}] Epoch {:05d} | Step {:05d} | Loss {:.4f} | Speed "
"| sample {:.3f} | copy {:.3f} | forward {:.3f} | backward {:.3f} | update {:.3f}".format( "(samples/sec) {:.4f} | time {:.3f}s | sample {:.3f} | "
"copy {:.3f} | forward {:.3f} | backward {:.3f} | "
"update {:.3f}".format(
g.rank(), g.rank(),
epoch, epoch,
step, step,
loss.item(), loss.item(),
np.mean(iter_tput[3:]), np.mean(iter_tput[3:]),
np.sum(step_time[-args.log_every :]), np.sum(step_time[-args.log_every:]),
np.sum(sample_t[-args.log_every :]), np.sum(sample_t[-args.log_every:]),
np.sum(feat_copy_t[-args.log_every :]), np.sum(feat_copy_t[-args.log_every:]),
np.sum(forward_t[-args.log_every :]), np.sum(forward_t[-args.log_every:]),
np.sum(backward_t[-args.log_every :]), np.sum(backward_t[-args.log_every:]),
np.sum(update_t[-args.log_every :]), np.sum(update_t[-args.log_every:]),
) )
) )
start = time.time() start = time.time()
print( print(
"[{}]Epoch Time(s): {:.4f}, sample: {:.4f}, data copy: {:.4f}, forward: {:.4f}, backward: {:.4f}, update: {:.4f}, #seeds: {}, #inputs: {}".format( "[{}]Epoch Time(s): {:.4f}, sample: {:.4f}, data copy: {:.4f}, "
"forward: {:.4f}, backward: {:.4f}, update: {:.4f}, #seeds: {}, "
"#inputs: {}".format(
g.rank(), g.rank(),
np.sum(step_time), np.sum(step_time),
np.sum(sample_t), np.sum(sample_t),
...@@ -493,13 +325,12 @@ def run(args, device, data): ...@@ -493,13 +325,12 @@ def run(args, device, data):
epoch += 1 epoch += 1
# evaluate the embedding using LogisticRegression # evaluate the embedding using LogisticRegression
if args.standalone:
pred = generate_emb( pred = generate_emb(
model, g, g.ndata["features"], args.batch_size_eval, device model if args.standalone else model.module,
) g,
else: g.ndata["features"],
pred = generate_emb( args.batch_size_eval,
model.module, g, g.ndata["features"], args.batch_size_eval, device device,
) )
if g.rank() == 0: if g.rank() == 0:
eval_acc, test_acc = compute_acc( eval_acc, test_acc = compute_acc(
...@@ -518,7 +349,6 @@ def run(args, device, data): ...@@ -518,7 +349,6 @@ def run(args, device, data):
if g.rank() == 0: if g.rank() == 0:
th.save(pred, "emb.pt") th.save(pred, "emb.pt")
else: else:
feat = g.ndata["features"]
th.save(pred, "emb.pt") th.save(pred, "emb.pt")
...@@ -526,32 +356,35 @@ def main(args): ...@@ -526,32 +356,35 @@ def main(args):
dgl.distributed.initialize(args.ip_config) dgl.distributed.initialize(args.ip_config)
if not args.standalone: if not args.standalone:
th.distributed.init_process_group(backend="gloo") th.distributed.init_process_group(backend="gloo")
g = dgl.distributed.DistGraph(args.graph_name, part_config=args.part_config) g = dgl.distributed.DistGraph(
args.graph_name, part_config=args.part_config
)
print("rank:", g.rank()) print("rank:", g.rank())
print("number of edges", g.number_of_edges()) print("number of edges", g.num_edges())
train_eids = dgl.distributed.edge_split( train_eids = dgl.distributed.edge_split(
th.ones((g.number_of_edges(),), dtype=th.bool), th.ones((g.num_edges(),), dtype=th.bool),
g.get_partition_book(), g.get_partition_book(),
force_even=True, force_even=True,
) )
train_nids = dgl.distributed.node_split( train_nids = dgl.distributed.node_split(
th.ones((g.number_of_nodes(),), dtype=th.bool), g.get_partition_book() th.ones((g.num_nodes(),), dtype=th.bool), g.get_partition_book()
) )
global_train_nid = th.LongTensor( global_train_nid = th.LongTensor(
np.nonzero(g.ndata["train_mask"][np.arange(g.number_of_nodes())]) np.nonzero(g.ndata["train_mask"][np.arange(g.num_nodes())])
) )
global_valid_nid = th.LongTensor( global_valid_nid = th.LongTensor(
np.nonzero(g.ndata["val_mask"][np.arange(g.number_of_nodes())]) np.nonzero(g.ndata["val_mask"][np.arange(g.num_nodes())])
) )
global_test_nid = th.LongTensor( global_test_nid = th.LongTensor(
np.nonzero(g.ndata["test_mask"][np.arange(g.number_of_nodes())]) np.nonzero(g.ndata["test_mask"][np.arange(g.num_nodes())])
) )
labels = g.ndata["labels"][np.arange(g.number_of_nodes())] labels = g.ndata["labels"][np.arange(g.num_nodes())]
if args.num_gpus == -1: if args.num_gpus == -1:
device = th.device("cpu") device = th.device("cpu")
else: else:
device = th.device("cuda:" + str(args.local_rank)) dev_id = g.rank() % args.num_gpus
device = th.device("cuda:" + str(dev_id))
# Pack data # Pack data
in_feats = g.ndata["features"].shape[1] in_feats = g.ndata["features"].shape[1]
...@@ -577,7 +410,6 @@ def main(args): ...@@ -577,7 +410,6 @@ def main(args):
if __name__ == "__main__": if __name__ == "__main__":
parser = argparse.ArgumentParser(description="GCN") parser = argparse.ArgumentParser(description="GCN")
register_data_args(parser)
parser.add_argument("--graph_name", type=str, help="graph name") parser.add_argument("--graph_name", type=str, help="graph name")
parser.add_argument("--id", type=int, help="the partition id") parser.add_argument("--id", type=int, help="the partition id")
parser.add_argument( parser.add_argument(
...@@ -610,12 +442,6 @@ if __name__ == "__main__": ...@@ -610,12 +442,6 @@ if __name__ == "__main__":
"--standalone", action="store_true", help="run in the standalone mode" "--standalone", action="store_true", help="run in the standalone mode"
) )
parser.add_argument("--num_negs", type=int, default=1) parser.add_argument("--num_negs", type=int, default=1)
parser.add_argument(
"--neg_share",
default=False,
action="store_true",
help="sharing neg nodes for positive nodes",
)
parser.add_argument( parser.add_argument(
"--remove_edge", "--remove_edge",
default=False, default=False,
......
import os
os.environ["DGLBACKEND"] = "pytorch"
import argparse import argparse
import math
import time import time
from functools import wraps
from multiprocessing import Process
import numpy as np import numpy as np
import sklearn.linear_model as lm
import sklearn.metrics as skm
import torch as th import torch as th
import torch.multiprocessing as mp
import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
import torch.optim as optim import torch.optim as optim
import tqdm
from train_dist_transductive import DistEmb, load_embs
from train_dist_unsupervised import (
SAGE,
CrossEntropyLoss,
NeighborSampler,
PosNeighborSampler,
compute_acc,
)
import dgl import dgl
import dgl.function as fn from train_dist_transductive import DistEmb, load_embs
import dgl.nn.pytorch as dglnn from train_dist_unsupervised import CrossEntropyLoss, DistSAGE, compute_acc
from dgl import DGLGraph
from dgl.data import load_data, register_data_args
from dgl.data.utils import load_graphs
from dgl.distributed import DistDataLoader
def generate_emb(standalone, model, emb_layer, g, batch_size, device): def generate_emb(standalone, model, emb_layer, g, batch_size, device):
...@@ -43,6 +19,8 @@ def generate_emb(standalone, model, emb_layer, g, batch_size, device): ...@@ -43,6 +19,8 @@ def generate_emb(standalone, model, emb_layer, g, batch_size, device):
batch_size : Number of nodes to compute at the same time. batch_size : Number of nodes to compute at the same time.
device : The GPU device to evaluate on. device : The GPU device to evaluate on.
""" """
if not standalone:
model = model.module
model.eval() model.eval()
emb_layer.eval() emb_layer.eval()
with th.no_grad(): with th.no_grad():
...@@ -64,24 +42,24 @@ def run(args, device, data): ...@@ -64,24 +42,24 @@ def run(args, device, data):
labels, labels,
) = data ) = data
# Create sampler # Create sampler
sampler = NeighborSampler( neg_sampler = dgl.dataloading.negative_sampler.Uniform(args.num_negs)
g, sampler = dgl.dataloading.NeighborSampler(
[int(fanout) for fanout in args.fan_out.split(",")], [int(fanout) for fanout in args.fan_out.split(",")]
train_nids,
dgl.distributed.sample_neighbors,
args.num_negs,
args.remove_edge,
) )
# Create dataloader
# Create PyTorch DataLoader for constructing blocks exclude = "reverse_id" if args.remove_edge else None
dataloader = dgl.distributed.DistDataLoader( reverse_eids = th.arange(g.num_edges()) if args.remove_edge else None
dataset=train_eids.numpy(), dataloader = dgl.dataloading.DistEdgeDataLoader(
g,
train_eids,
sampler,
negative_sampler=neg_sampler,
exclude=exclude,
reverse_eids=reverse_eids,
batch_size=args.batch_size, batch_size=args.batch_size,
collate_fn=sampler.sample_blocks,
shuffle=True, shuffle=True,
drop_last=False, drop_last=False,
) )
# Define model and optimizer # Define model and optimizer
emb_layer = DistEmb( emb_layer = DistEmb(
g.num_nodes(), g.num_nodes(),
...@@ -89,7 +67,7 @@ def run(args, device, data): ...@@ -89,7 +67,7 @@ def run(args, device, data):
dgl_sparse_emb=args.dgl_sparse, dgl_sparse_emb=args.dgl_sparse,
dev_id=device, dev_id=device,
) )
model = SAGE( model = DistSAGE(
args.num_hidden, args.num_hidden,
args.num_hidden, args.num_hidden,
args.num_hidden, args.num_hidden,
...@@ -126,21 +104,16 @@ def run(args, device, data): ...@@ -126,21 +104,16 @@ def run(args, device, data):
emb_optimizer = th.optim.SparseAdam( emb_optimizer = th.optim.SparseAdam(
list(emb_layer.module.sparse_emb.parameters()), lr=args.sparse_lr list(emb_layer.module.sparse_emb.parameters()), lr=args.sparse_lr
) )
print("optimize Pytorch sparse embedding:", emb_layer.module.sparse_emb) print(
"optimize Pytorch sparse embedding:", emb_layer.module.sparse_emb
)
# Training loop # Training loop
epoch = 0 epoch = 0
for epoch in range(args.num_epochs): for epoch in range(args.num_epochs):
sample_time = 0
copy_time = 0
forward_time = 0
backward_time = 0
update_time = 0
num_seeds = 0 num_seeds = 0
num_inputs = 0 num_inputs = 0
step_time = [] step_time = []
iter_t = []
sample_t = [] sample_t = []
feat_copy_t = [] feat_copy_t = []
forward_t = [] forward_t = []
...@@ -149,25 +122,24 @@ def run(args, device, data): ...@@ -149,25 +122,24 @@ def run(args, device, data):
iter_tput = [] iter_tput = []
start = time.time() start = time.time()
# Loop over the dataloader to sample the computation dependency graph as a list of with model.join():
# blocks. # Loop over the dataloader to sample the computation dependency
for step, (pos_graph, neg_graph, blocks) in enumerate(dataloader): # graph as a list of blocks.
for step, (input_nodes, pos_graph, neg_graph, blocks) in enumerate(
dataloader
):
tic_step = time.time() tic_step = time.time()
sample_t.append(tic_step - start) sample_t.append(tic_step - start)
copy_t = time.time()
pos_graph = pos_graph.to(device) pos_graph = pos_graph.to(device)
neg_graph = neg_graph.to(device) neg_graph = neg_graph.to(device)
blocks = [block.to(device) for block in blocks] blocks = [block.to(device) for block in blocks]
# The nodes for input lies at the LHS side of the first block. feat_copy_t.append(copy_t - tic_step)
# The nodes for output lies at the RHS side of the last block.
# Load the input features as well as output labels
batch_inputs = blocks[0].srcdata[dgl.NID]
copy_time = time.time() copy_time = time.time()
feat_copy_t.append(copy_time - tic_step)
# Compute loss and prediction # Compute loss and prediction
batch_inputs = emb_layer(batch_inputs) batch_inputs = emb_layer(input_nodes)
batch_pred = model(blocks, batch_inputs) batch_pred = model(blocks, batch_inputs)
loss = loss_fcn(batch_pred, pos_graph, neg_graph) loss = loss_fcn(batch_pred, pos_graph, neg_graph)
forward_end = time.time() forward_end = time.time()
...@@ -183,8 +155,7 @@ def run(args, device, data): ...@@ -183,8 +155,7 @@ def run(args, device, data):
optimizer.step() optimizer.step()
update_t.append(time.time() - compute_end) update_t.append(time.time() - compute_end)
pos_edges = pos_graph.number_of_edges() pos_edges = pos_graph.num_edges()
neg_edges = neg_graph.number_of_edges()
step_t = time.time() - start step_t = time.time() - start
step_time.append(step_t) step_time.append(step_t)
...@@ -192,26 +163,30 @@ def run(args, device, data): ...@@ -192,26 +163,30 @@ def run(args, device, data):
num_seeds += pos_edges num_seeds += pos_edges
if step % args.log_every == 0: if step % args.log_every == 0:
print( print(
"[{}] Epoch {:05d} | Step {:05d} | Loss {:.4f} | Speed (samples/sec) {:.4f} | time {:.3f} s" "[{}] Epoch {:05d} | Step {:05d} | Loss {:.4f} | Speed "
"| sample {:.3f} | copy {:.3f} | forward {:.3f} | backward {:.3f} | update {:.3f}".format( "(samples/sec) {:.4f} | time {:.3f}s | sample {:.3f} | "
"copy {:.3f} | forward {:.3f} | backward {:.3f} | "
"update {:.3f}".format(
g.rank(), g.rank(),
epoch, epoch,
step, step,
loss.item(), loss.item(),
np.mean(iter_tput[3:]), np.mean(iter_tput[3:]),
np.sum(step_time[-args.log_every :]), np.sum(step_time[-args.log_every:]),
np.sum(sample_t[-args.log_every :]), np.sum(sample_t[-args.log_every:]),
np.sum(feat_copy_t[-args.log_every :]), np.sum(feat_copy_t[-args.log_every:]),
np.sum(forward_t[-args.log_every :]), np.sum(forward_t[-args.log_every:]),
np.sum(backward_t[-args.log_every :]), np.sum(backward_t[-args.log_every:]),
np.sum(update_t[-args.log_every :]), np.sum(update_t[-args.log_every:]),
) )
) )
start = time.time() start = time.time()
print( print(
"[{}]Epoch Time(s): {:.4f}, sample: {:.4f}, data copy: {:.4f}, forward: {:.4f}, backward: {:.4f}, update: {:.4f}, #seeds: {}, #inputs: {}".format( "[{}]Epoch Time(s): {:.4f}, sample: {:.4f}, data copy: {:.4f}, "
"forward: {:.4f}, backward: {:.4f}, update: {:.4f}, #seeds: {}, "
"#inputs: {}".format(
g.rank(), g.rank(),
np.sum(step_time), np.sum(step_time),
np.sum(sample_t), np.sum(sample_t),
...@@ -226,13 +201,8 @@ def run(args, device, data): ...@@ -226,13 +201,8 @@ def run(args, device, data):
epoch += 1 epoch += 1
# evaluate the embedding using LogisticRegression # evaluate the embedding using LogisticRegression
if args.standalone:
pred = generate_emb( pred = generate_emb(
True, model, emb_layer, g, args.batch_size_eval, device args.standalone, model, emb_layer, g, args.batch_size_eval, device
)
else:
pred = generate_emb(
False, model.module, emb_layer, g, args.batch_size_eval, device
) )
if g.rank() == 0: if g.rank() == 0:
eval_acc, test_acc = compute_acc( eval_acc, test_acc = compute_acc(
...@@ -251,7 +221,6 @@ def run(args, device, data): ...@@ -251,7 +221,6 @@ def run(args, device, data):
if g.rank() == 0: if g.rank() == 0:
th.save(pred, "emb.pt") th.save(pred, "emb.pt")
else: else:
feat = g.ndata["features"]
th.save(pred, "emb.pt") th.save(pred, "emb.pt")
...@@ -259,32 +228,35 @@ def main(args): ...@@ -259,32 +228,35 @@ def main(args):
dgl.distributed.initialize(args.ip_config) dgl.distributed.initialize(args.ip_config)
if not args.standalone: if not args.standalone:
th.distributed.init_process_group(backend="gloo") th.distributed.init_process_group(backend="gloo")
g = dgl.distributed.DistGraph(args.graph_name, part_config=args.part_config) g = dgl.distributed.DistGraph(
args.graph_name, part_config=args.part_config
)
print("rank:", g.rank()) print("rank:", g.rank())
print("number of edges", g.number_of_edges()) print("number of edges", g.num_edges())
train_eids = dgl.distributed.edge_split( train_eids = dgl.distributed.edge_split(
th.ones((g.number_of_edges(),), dtype=th.bool), th.ones((g.num_edges(),), dtype=th.bool),
g.get_partition_book(), g.get_partition_book(),
force_even=True, force_even=True,
) )
train_nids = dgl.distributed.node_split( train_nids = dgl.distributed.node_split(
th.ones((g.number_of_nodes(),), dtype=th.bool), g.get_partition_book() th.ones((g.num_nodes(),), dtype=th.bool), g.get_partition_book()
) )
global_train_nid = th.LongTensor( global_train_nid = th.LongTensor(
np.nonzero(g.ndata["train_mask"][np.arange(g.number_of_nodes())]) np.nonzero(g.ndata["train_mask"][np.arange(g.num_nodes())])
) )
global_valid_nid = th.LongTensor( global_valid_nid = th.LongTensor(
np.nonzero(g.ndata["val_mask"][np.arange(g.number_of_nodes())]) np.nonzero(g.ndata["val_mask"][np.arange(g.num_nodes())])
) )
global_test_nid = th.LongTensor( global_test_nid = th.LongTensor(
np.nonzero(g.ndata["test_mask"][np.arange(g.number_of_nodes())]) np.nonzero(g.ndata["test_mask"][np.arange(g.num_nodes())])
) )
labels = g.ndata["labels"][np.arange(g.number_of_nodes())] labels = g.ndata["labels"][np.arange(g.num_nodes())]
if args.num_gpus == -1: if args.num_gpus == -1:
device = th.device("cpu") device = th.device("cpu")
else: else:
device = th.device("cuda:" + str(args.local_rank)) dev_id = g.rank() % args.num_gpus
device = th.device("cuda:" + str(dev_id))
# Pack data # Pack data
global_train_nid = global_train_nid.squeeze() global_train_nid = global_train_nid.squeeze()
...@@ -308,7 +280,6 @@ def main(args): ...@@ -308,7 +280,6 @@ def main(args):
if __name__ == "__main__": if __name__ == "__main__":
parser = argparse.ArgumentParser(description="GCN") parser = argparse.ArgumentParser(description="GCN")
register_data_args(parser)
parser.add_argument("--graph_name", type=str, help="graph name") parser.add_argument("--graph_name", type=str, help="graph name")
parser.add_argument("--id", type=int, help="the partition id") parser.add_argument("--id", type=int, help="the partition id")
parser.add_argument( parser.add_argument(
......
## DistGNN vertex-cut based graph partitioning (using Libra)
### How to run graph partitioning
```python partition_graph.py --dataset <dataset> --num-parts <num_parts> --out-dir <output_location>```
Example: The following command-line creates 4 partitions of pubmed graph
``` python partition_graph.py --dataset pubmed --num-parts 4 --out-dir ./```
The ouptut partitions are created in the current directory in Libra_result_\<dataset\>/ folder.
The *upcoming DistGNN* application can directly use these partitions for distributed training.
### How Libra partitioning works
Libra is a vertex-cut based graph partitioning method. It applies greedy heuristics to uniquely distribute the input graph edges among the partitions. It generates the partitions as a list of edges. Script ```libra_partition.py``` after generates the Libra partitions and converts the Libra output to DGL/DistGNN input format.
Note: Current Libra implementation is sequential. Extra overhead is paid due to the additional work of format conversion of the partitioned graph.
### Expected partitioning timinigs
Cora, Pubmed, Citeseer: < 10 sec (<10GB)
Reddit: ~150 sec (~ 25GB)
OGBN-Products: ~200 sec (~30GB)
Proteins: 1800 sec (Format conversion from public data takes time) (~100GB)
OGBN-Paper100M: 2500 sec (~200GB)
### Settings
Tested with:
Cent OS 7.6
gcc v8.3.0
PyTorch 1.7.1
Python 3.7.10
r"""
Copyright (c) 2021 Intel Corporation
\file Graph partitioning
\brief Calls Libra - Vertex-cut based graph partitioner for distirbuted training
\author Vasimuddin Md <vasimuddin.md@intel.com>,
Guixiang Ma <guixiang.ma@intel.com>
Sanchit Misra <sanchit.misra@intel.com>,
Ramanarayan Mohanty <ramanarayan.mohanty@intel.com>,
Sasikanth Avancha <sasikanth.avancha@intel.com>
Nesreen K. Ahmed <nesreen.k.ahmed@intel.com>
"""
import argparse
import csv
import os
import random
import sys
import time
from statistics import mean
import numpy as np
sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
from load_graph import load_ogb
import dgl
from dgl.base import DGLError
from dgl.data import load_data
from dgl.distgnn.partition import partition_graph
from dgl.distgnn.tools import load_proteins
if __name__ == "__main__":
argparser = argparse.ArgumentParser()
argparser.add_argument("--dataset", type=str, default="cora")
argparser.add_argument("--num-parts", type=int, default=2)
argparser.add_argument("--out-dir", type=str, default="./")
args = argparser.parse_args()
dataset = args.dataset
num_community = args.num_parts
out_dir = "Libra_result_" + dataset ## "Libra_result_" prefix is mandatory
resultdir = os.path.join(args.out_dir, out_dir)
print("Input dataset for partitioning: ", dataset)
if args.dataset == "ogbn-products":
print("Loading ogbn-products")
G, _ = load_ogb("ogbn-products")
elif args.dataset == "ogbn-papers100M":
print("Loading ogbn-papers100M")
G, _ = load_ogb("ogbn-papers100M")
elif args.dataset == "proteins":
G = load_proteins("proteins")
elif args.dataset == "ogbn-arxiv":
print("Loading ogbn-arxiv")
G, _ = load_ogb("ogbn-arxiv")
else:
try:
G = load_data(args)[0]
except:
raise DGLError("Error: Dataset {} not found !!!".format(dataset))
print("Done loading the graph.", flush=True)
partition_graph(num_community, G, resultdir)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment