"src/array/vscode:/vscode.git/clone" did not exist on "2647afc9b343bdafbf47b1c55ab2f8691bcee0a8"
Unverified Commit f19f05ce authored by Hongzhi (Steve), Chen's avatar Hongzhi (Steve), Chen Committed by GitHub
Browse files

[Misc] Black auto fix. (#4651)


Co-authored-by: default avatarSteve <ubuntu@ip-172-31-34-29.ap-northeast-1.compute.internal>
parent 977b1ba4
import torch as th import torch as th
import dgl import dgl
class NegativeSampler(object): class NegativeSampler(object):
def __init__(self, g, k, neg_share=False, device=None): def __init__(self, g, k, neg_share=False, device=None):
if device is None: if device is None:
...@@ -16,6 +18,6 @@ class NegativeSampler(object): ...@@ -16,6 +18,6 @@ class NegativeSampler(object):
dst = self.weights.multinomial(n, replacement=True) dst = self.weights.multinomial(n, replacement=True)
dst = dst.view(-1, 1, self.k).expand(-1, self.k, -1).flatten() dst = dst.view(-1, 1, self.k).expand(-1, self.k, -1).flatten()
else: else:
dst = self.weights.multinomial(n*self.k, replacement=True) dst = self.weights.multinomial(n * self.k, replacement=True)
src = src.repeat_interleave(self.k) src = src.repeat_interleave(self.k)
return src, dst return src, dst
import argparse
import time
import numpy as np
import torch import torch
import torch.nn as nn import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
import torchmetrics.functional as MF import torchmetrics.functional as MF
import tqdm
from ogb.nodeproppred import DglNodePropPredDataset
import dgl import dgl
import dgl.nn as dglnn import dgl.nn as dglnn
import time
import numpy as np
from ogb.nodeproppred import DglNodePropPredDataset
import tqdm
import argparse
class SAGE(nn.Module): class SAGE(nn.Module):
def __init__(self, in_feats, n_hidden, n_classes): def __init__(self, in_feats, n_hidden, n_classes):
super().__init__() super().__init__()
self.layers = nn.ModuleList() self.layers = nn.ModuleList()
self.layers.append(dglnn.SAGEConv(in_feats, n_hidden, 'mean')) self.layers.append(dglnn.SAGEConv(in_feats, n_hidden, "mean"))
self.layers.append(dglnn.SAGEConv(n_hidden, n_hidden, 'mean')) self.layers.append(dglnn.SAGEConv(n_hidden, n_hidden, "mean"))
self.layers.append(dglnn.SAGEConv(n_hidden, n_classes, 'mean')) self.layers.append(dglnn.SAGEConv(n_hidden, n_classes, "mean"))
self.dropout = nn.Dropout(0.5) self.dropout = nn.Dropout(0.5)
self.n_hidden = n_hidden self.n_hidden = n_hidden
self.n_classes = n_classes self.n_classes = n_classes
...@@ -33,20 +36,31 @@ class SAGE(nn.Module): ...@@ -33,20 +36,31 @@ class SAGE(nn.Module):
def inference(self, g, device, batch_size, num_workers, buffer_device=None): def inference(self, g, device, batch_size, num_workers, buffer_device=None):
# The difference between this inference function and the one in the official # The difference between this inference function and the one in the official
# example is that the intermediate results can also benefit from prefetching. # example is that the intermediate results can also benefit from prefetching.
feat = g.ndata['feat'] feat = g.ndata["feat"]
sampler = dgl.dataloading.MultiLayerFullNeighborSampler(1, prefetch_node_feats=['feat']) sampler = dgl.dataloading.MultiLayerFullNeighborSampler(
1, prefetch_node_feats=["feat"]
)
dataloader = dgl.dataloading.DataLoader( dataloader = dgl.dataloading.DataLoader(
g, torch.arange(g.num_nodes()).to(g.device), sampler, device=device, g,
batch_size=batch_size, shuffle=False, drop_last=False, torch.arange(g.num_nodes()).to(g.device),
num_workers=num_workers) sampler,
device=device,
batch_size=batch_size,
shuffle=False,
drop_last=False,
num_workers=num_workers,
)
if buffer_device is None: if buffer_device is None:
buffer_device = device buffer_device = device
for l, layer in enumerate(self.layers): for l, layer in enumerate(self.layers):
y = torch.empty( y = torch.empty(
g.num_nodes(), self.n_hidden if l != len(self.layers) - 1 else self.n_classes, g.num_nodes(),
device=buffer_device, pin_memory=True) self.n_hidden if l != len(self.layers) - 1 else self.n_classes,
device=buffer_device,
pin_memory=True,
)
feat = feat.to(device) feat = feat.to(device)
for input_nodes, output_nodes, blocks in tqdm.tqdm(dataloader): for input_nodes, output_nodes, blocks in tqdm.tqdm(dataloader):
# use an explicitly contuous slice # use an explicitly contuous slice
...@@ -57,44 +71,64 @@ class SAGE(nn.Module): ...@@ -57,44 +71,64 @@ class SAGE(nn.Module):
h = self.dropout(h) h = self.dropout(h)
# be design, our output nodes are contiguous so we can take # be design, our output nodes are contiguous so we can take
# advantage of that here # advantage of that here
y[output_nodes[0]:output_nodes[-1]+1] = h.to(buffer_device) y[output_nodes[0] : output_nodes[-1] + 1] = h.to(buffer_device)
feat = y feat = y
return y return y
dataset = DglNodePropPredDataset("ogbn-products")
dataset = DglNodePropPredDataset('ogbn-products')
graph, labels = dataset[0] graph, labels = dataset[0]
graph.ndata['label'] = labels.squeeze() graph.ndata["label"] = labels.squeeze()
split_idx = dataset.get_idx_split() split_idx = dataset.get_idx_split()
train_idx, valid_idx, test_idx = split_idx['train'], split_idx['valid'], split_idx['test'] train_idx, valid_idx, test_idx = (
split_idx["train"],
split_idx["valid"],
split_idx["test"],
)
device = 'cuda' device = "cuda"
train_idx = train_idx.to(device) train_idx = train_idx.to(device)
valid_idx = valid_idx.to(device) valid_idx = valid_idx.to(device)
test_idx = test_idx.to(device) test_idx = test_idx.to(device)
graph = graph.to(device) graph = graph.to(device)
model = SAGE(graph.ndata['feat'].shape[1], 256, dataset.num_classes).to(device) model = SAGE(graph.ndata["feat"].shape[1], 256, dataset.num_classes).to(device)
opt = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=5e-4) opt = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=5e-4)
sampler = dgl.dataloading.NeighborSampler( sampler = dgl.dataloading.NeighborSampler(
[15, 10, 5], prefetch_node_feats=['feat'], prefetch_labels=['label']) [15, 10, 5], prefetch_node_feats=["feat"], prefetch_labels=["label"]
)
train_dataloader = dgl.dataloading.DataLoader( train_dataloader = dgl.dataloading.DataLoader(
graph, train_idx, sampler, device=device, batch_size=1024, shuffle=True, graph,
drop_last=False, num_workers=0, use_uva=False) train_idx,
sampler,
device=device,
batch_size=1024,
shuffle=True,
drop_last=False,
num_workers=0,
use_uva=False,
)
valid_dataloader = dgl.dataloading.DataLoader( valid_dataloader = dgl.dataloading.DataLoader(
graph, valid_idx, sampler, device=device, batch_size=1024, shuffle=True, graph,
drop_last=False, num_workers=0, use_uva=False) valid_idx,
sampler,
device=device,
batch_size=1024,
shuffle=True,
drop_last=False,
num_workers=0,
use_uva=False,
)
durations = [] durations = []
for _ in range(10): for _ in range(10):
model.train() model.train()
t0 = time.time() t0 = time.time()
for it, (input_nodes, output_nodes, blocks) in enumerate(train_dataloader): for it, (input_nodes, output_nodes, blocks) in enumerate(train_dataloader):
x = blocks[0].srcdata['feat'] x = blocks[0].srcdata["feat"]
y = blocks[-1].dstdata['label'] y = blocks[-1].dstdata["label"]
y_hat = model(blocks, x) y_hat = model(blocks, x)
loss = F.cross_entropy(y_hat, y) loss = F.cross_entropy(y_hat, y)
opt.zero_grad() opt.zero_grad()
...@@ -103,7 +137,7 @@ for _ in range(10): ...@@ -103,7 +137,7 @@ for _ in range(10):
if it % 20 == 0: if it % 20 == 0:
acc = MF.accuracy(torch.argmax(y_hat, dim=1), y) acc = MF.accuracy(torch.argmax(y_hat, dim=1), y)
mem = torch.cuda.max_memory_allocated() / 1000000 mem = torch.cuda.max_memory_allocated() / 1000000
print('Loss', loss.item(), 'Acc', acc.item(), 'GPU Mem', mem, 'MB') print("Loss", loss.item(), "Acc", acc.item(), "GPU Mem", mem, "MB")
tt = time.time() tt = time.time()
print(tt - t0) print(tt - t0)
durations.append(tt - t0) durations.append(tt - t0)
...@@ -113,19 +147,19 @@ for _ in range(10): ...@@ -113,19 +147,19 @@ for _ in range(10):
y_hats = [] y_hats = []
for it, (input_nodes, output_nodes, blocks) in enumerate(valid_dataloader): for it, (input_nodes, output_nodes, blocks) in enumerate(valid_dataloader):
with torch.no_grad(): with torch.no_grad():
x = blocks[0].srcdata['feat'] x = blocks[0].srcdata["feat"]
ys.append(blocks[-1].dstdata['label']) ys.append(blocks[-1].dstdata["label"])
y_hats.append(torch.argmax(model(blocks, x), dim=1)) y_hats.append(torch.argmax(model(blocks, x), dim=1))
acc = MF.accuracy(torch.cat(y_hats), torch.cat(ys)) acc = MF.accuracy(torch.cat(y_hats), torch.cat(ys))
print('Validation acc:', acc.item()) print("Validation acc:", acc.item())
print(np.mean(durations[4:]), np.std(durations[4:])) print(np.mean(durations[4:]), np.std(durations[4:]))
# Test accuracy and offline inference of all nodes # Test accuracy and offline inference of all nodes
model.eval() model.eval()
with torch.no_grad(): with torch.no_grad():
pred = model.inference(graph, device, 4096, 0, 'cpu') pred = model.inference(graph, device, 4096, 0, "cpu")
pred = pred[test_idx].to(device) pred = pred[test_idx].to(device)
label = graph.ndata['label'][test_idx] label = graph.ndata["label"][test_idx]
acc = MF.accuracy(torch.argmax(pred, dim=1), label) acc = MF.accuracy(torch.argmax(pred, dim=1), label)
print('Test acc:', acc.item()) print("Test acc:", acc.item())
import dgl
import numpy as np
import torch as th
import argparse import argparse
import time
import sys
import os import os
sys.path.append(os.path.join(os.path.dirname(__file__), '..')) import sys
from load_graph import load_reddit, load_ogb import time
import numpy as np
import torch as th
import dgl
sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
from load_graph import load_ogb, load_reddit
if __name__ == '__main__': if __name__ == "__main__":
argparser = argparse.ArgumentParser("Partition builtin graphs") argparser = argparse.ArgumentParser("Partition builtin graphs")
argparser.add_argument('--dataset', type=str, default='reddit', argparser.add_argument(
help='datasets: reddit, ogb-product, ogb-paper100M') "--dataset",
argparser.add_argument('--num_parts', type=int, default=4, type=str,
help='number of partitions') default="reddit",
argparser.add_argument('--part_method', type=str, default='metis', help="datasets: reddit, ogb-product, ogb-paper100M",
help='the partition method') )
argparser.add_argument('--balance_train', action='store_true', argparser.add_argument(
help='balance the training size in each partition.') "--num_parts", type=int, default=4, help="number of partitions"
argparser.add_argument('--undirected', action='store_true', )
help='turn the graph into an undirected graph.') argparser.add_argument(
argparser.add_argument('--balance_edges', action='store_true', "--part_method", type=str, default="metis", help="the partition method"
help='balance the number of edges in each partition.') )
argparser.add_argument('--num_trainers_per_machine', type=int, default=1, argparser.add_argument(
help='the number of trainers per machine. The trainer ids are stored\ "--balance_train",
in the node feature \'trainer_id\'') action="store_true",
argparser.add_argument('--output', type=str, default='data', help="balance the training size in each partition.",
help='Output path of partitioned graph.') )
argparser.add_argument(
"--undirected",
action="store_true",
help="turn the graph into an undirected graph.",
)
argparser.add_argument(
"--balance_edges",
action="store_true",
help="balance the number of edges in each partition.",
)
argparser.add_argument(
"--num_trainers_per_machine",
type=int,
default=1,
help="the number of trainers per machine. The trainer ids are stored\
in the node feature 'trainer_id'",
)
argparser.add_argument(
"--output",
type=str,
default="data",
help="Output path of partitioned graph.",
)
args = argparser.parse_args() args = argparser.parse_args()
start = time.time() start = time.time()
if args.dataset == 'reddit': if args.dataset == "reddit":
g, _ = load_reddit() g, _ = load_reddit()
elif args.dataset == 'ogb-product': elif args.dataset == "ogb-product":
g, _ = load_ogb('ogbn-products') g, _ = load_ogb("ogbn-products")
elif args.dataset == 'ogb-paper100M': elif args.dataset == "ogb-paper100M":
g, _ = load_ogb('ogbn-papers100M') g, _ = load_ogb("ogbn-papers100M")
print('load {} takes {:.3f} seconds'.format(args.dataset, time.time() - start)) print(
print('|V|={}, |E|={}'.format(g.number_of_nodes(), g.number_of_edges())) "load {} takes {:.3f} seconds".format(args.dataset, time.time() - start)
print('train: {}, valid: {}, test: {}'.format(th.sum(g.ndata['train_mask']), )
th.sum(g.ndata['val_mask']), print("|V|={}, |E|={}".format(g.number_of_nodes(), g.number_of_edges()))
th.sum(g.ndata['test_mask']))) print(
"train: {}, valid: {}, test: {}".format(
th.sum(g.ndata["train_mask"]),
th.sum(g.ndata["val_mask"]),
th.sum(g.ndata["test_mask"]),
)
)
if args.balance_train: if args.balance_train:
balance_ntypes = g.ndata['train_mask'] balance_ntypes = g.ndata["train_mask"]
else: else:
balance_ntypes = None balance_ntypes = None
...@@ -52,8 +84,13 @@ if __name__ == '__main__': ...@@ -52,8 +84,13 @@ if __name__ == '__main__':
sym_g.ndata[key] = g.ndata[key] sym_g.ndata[key] = g.ndata[key]
g = sym_g g = sym_g
dgl.distributed.partition_graph(g, args.dataset, args.num_parts, args.output, dgl.distributed.partition_graph(
g,
args.dataset,
args.num_parts,
args.output,
part_method=args.part_method, part_method=args.part_method,
balance_ntypes=balance_ntypes, balance_ntypes=balance_ntypes,
balance_edges=args.balance_edges, balance_edges=args.balance_edges,
num_trainers_per_machine=args.num_trainers_per_machine) num_trainers_per_machine=args.num_trainers_per_machine,
)
import os import os
os.environ['DGLBACKEND']='pytorch'
os.environ["DGLBACKEND"] = "pytorch"
import argparse
import math
import time
from functools import wraps
from multiprocessing import Process from multiprocessing import Process
import argparse, time, math
import numpy as np import numpy as np
from functools import wraps
import tqdm
import sklearn.linear_model as lm import sklearn.linear_model as lm
import sklearn.metrics as skm import sklearn.metrics as skm
import dgl
from dgl import DGLGraph
from dgl.data import register_data_args, load_data
from dgl.data.utils import load_graphs
import dgl.function as fn
import dgl.nn.pytorch as dglnn
import torch as th import torch as th
import torch.multiprocessing as mp
import torch.nn as nn import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
import torch.optim as optim import torch.optim as optim
import torch.multiprocessing as mp import tqdm
from train_dist_transductive import DistEmb, load_embs
from train_dist_unsupervised import (
SAGE,
CrossEntropyLoss,
NeighborSampler,
PosNeighborSampler,
compute_acc,
)
import dgl
import dgl.function as fn
import dgl.nn.pytorch as dglnn
from dgl import DGLGraph
from dgl.data import load_data, register_data_args
from dgl.data.utils import load_graphs
from dgl.distributed import DistDataLoader from dgl.distributed import DistDataLoader
from train_dist_unsupervised import SAGE, NeighborSampler, PosNeighborSampler, CrossEntropyLoss, compute_acc
from train_dist_transductive import DistEmb, load_embs
def generate_emb(standalone, model, emb_layer, g, batch_size, device): def generate_emb(standalone, model, emb_layer, g, batch_size, device):
""" """
...@@ -42,12 +51,27 @@ def generate_emb(standalone, model, emb_layer, g, batch_size, device): ...@@ -42,12 +51,27 @@ def generate_emb(standalone, model, emb_layer, g, batch_size, device):
g.barrier() g.barrier()
return pred return pred
def run(args, device, data): def run(args, device, data):
# Unpack data # Unpack data
train_eids, train_nids, g, global_train_nid, global_valid_nid, global_test_nid, labels = data (
train_eids,
train_nids,
g,
global_train_nid,
global_valid_nid,
global_test_nid,
labels,
) = data
# Create sampler # Create sampler
sampler = NeighborSampler(g, [int(fanout) for fanout in args.fan_out.split(',')], train_nids, sampler = NeighborSampler(
dgl.distributed.sample_neighbors, args.num_negs, args.remove_edge) g,
[int(fanout) for fanout in args.fan_out.split(",")],
train_nids,
dgl.distributed.sample_neighbors,
args.num_negs,
args.remove_edge,
)
# Create PyTorch DataLoader for constructing blocks # Create PyTorch DataLoader for constructing blocks
dataloader = dgl.distributed.DistDataLoader( dataloader = dgl.distributed.DistDataLoader(
...@@ -55,18 +79,33 @@ def run(args, device, data): ...@@ -55,18 +79,33 @@ def run(args, device, data):
batch_size=args.batch_size, batch_size=args.batch_size,
collate_fn=sampler.sample_blocks, collate_fn=sampler.sample_blocks,
shuffle=True, shuffle=True,
drop_last=False) drop_last=False,
)
# Define model and optimizer # Define model and optimizer
emb_layer = DistEmb(g.num_nodes(), args.num_hidden, dgl_sparse_emb=args.dgl_sparse, dev_id=device) emb_layer = DistEmb(
model = SAGE(args.num_hidden, args.num_hidden, args.num_hidden, args.num_layers, F.relu, args.dropout) g.num_nodes(),
args.num_hidden,
dgl_sparse_emb=args.dgl_sparse,
dev_id=device,
)
model = SAGE(
args.num_hidden,
args.num_hidden,
args.num_hidden,
args.num_layers,
F.relu,
args.dropout,
)
model = model.to(device) model = model.to(device)
if not args.standalone: if not args.standalone:
if args.num_gpus == -1: if args.num_gpus == -1:
model = th.nn.parallel.DistributedDataParallel(model) model = th.nn.parallel.DistributedDataParallel(model)
else: else:
dev_id = g.rank() % args.num_gpus dev_id = g.rank() % args.num_gpus
model = th.nn.parallel.DistributedDataParallel(model, device_ids=[dev_id], output_device=dev_id) model = th.nn.parallel.DistributedDataParallel(
model, device_ids=[dev_id], output_device=dev_id
)
if not args.dgl_sparse: if not args.dgl_sparse:
emb_layer = th.nn.parallel.DistributedDataParallel(emb_layer) emb_layer = th.nn.parallel.DistributedDataParallel(emb_layer)
loss_fcn = CrossEntropyLoss() loss_fcn = CrossEntropyLoss()
...@@ -74,14 +113,20 @@ def run(args, device, data): ...@@ -74,14 +113,20 @@ def run(args, device, data):
optimizer = optim.Adam(model.parameters(), lr=args.lr) optimizer = optim.Adam(model.parameters(), lr=args.lr)
if args.dgl_sparse: if args.dgl_sparse:
emb_optimizer = dgl.distributed.optim.SparseAdam([emb_layer.sparse_emb], lr=args.sparse_lr) emb_optimizer = dgl.distributed.optim.SparseAdam(
print('optimize DGL sparse embedding:', emb_layer.sparse_emb) [emb_layer.sparse_emb], lr=args.sparse_lr
)
print("optimize DGL sparse embedding:", emb_layer.sparse_emb)
elif args.standalone: elif args.standalone:
emb_optimizer = th.optim.SparseAdam(list(emb_layer.sparse_emb.parameters()), lr=args.sparse_lr) emb_optimizer = th.optim.SparseAdam(
print('optimize Pytorch sparse embedding:', emb_layer.sparse_emb) list(emb_layer.sparse_emb.parameters()), lr=args.sparse_lr
)
print("optimize Pytorch sparse embedding:", emb_layer.sparse_emb)
else: else:
emb_optimizer = th.optim.SparseAdam(list(emb_layer.module.sparse_emb.parameters()), lr=args.sparse_lr) emb_optimizer = th.optim.SparseAdam(
print('optimize Pytorch sparse embedding:', emb_layer.module.sparse_emb) list(emb_layer.module.sparse_emb.parameters()), lr=args.sparse_lr
)
print("optimize Pytorch sparse embedding:", emb_layer.module.sparse_emb)
# Training loop # Training loop
epoch = 0 epoch = 0
...@@ -146,26 +191,54 @@ def run(args, device, data): ...@@ -146,26 +191,54 @@ def run(args, device, data):
iter_tput.append(pos_edges / step_t) iter_tput.append(pos_edges / step_t)
num_seeds += pos_edges num_seeds += pos_edges
if step % args.log_every == 0: if step % args.log_every == 0:
print('[{}] Epoch {:05d} | Step {:05d} | Loss {:.4f} | Speed (samples/sec) {:.4f} | time {:.3f} s' \ print(
'| sample {:.3f} | copy {:.3f} | forward {:.3f} | backward {:.3f} | update {:.3f}'.format( "[{}] Epoch {:05d} | Step {:05d} | Loss {:.4f} | Speed (samples/sec) {:.4f} | time {:.3f} s"
g.rank(), epoch, step, loss.item(), np.mean(iter_tput[3:]), np.sum(step_time[-args.log_every:]), "| sample {:.3f} | copy {:.3f} | forward {:.3f} | backward {:.3f} | update {:.3f}".format(
np.sum(sample_t[-args.log_every:]), np.sum(feat_copy_t[-args.log_every:]), np.sum(forward_t[-args.log_every:]), g.rank(),
np.sum(backward_t[-args.log_every:]), np.sum(update_t[-args.log_every:]))) epoch,
step,
loss.item(),
np.mean(iter_tput[3:]),
np.sum(step_time[-args.log_every :]),
np.sum(sample_t[-args.log_every :]),
np.sum(feat_copy_t[-args.log_every :]),
np.sum(forward_t[-args.log_every :]),
np.sum(backward_t[-args.log_every :]),
np.sum(update_t[-args.log_every :]),
)
)
start = time.time() start = time.time()
print('[{}]Epoch Time(s): {:.4f}, sample: {:.4f}, data copy: {:.4f}, forward: {:.4f}, backward: {:.4f}, update: {:.4f}, #seeds: {}, #inputs: {}'.format( print(
g.rank(), np.sum(step_time), np.sum(sample_t), np.sum(feat_copy_t), np.sum(forward_t), np.sum(backward_t), np.sum(update_t), num_seeds, num_inputs)) "[{}]Epoch Time(s): {:.4f}, sample: {:.4f}, data copy: {:.4f}, forward: {:.4f}, backward: {:.4f}, update: {:.4f}, #seeds: {}, #inputs: {}".format(
g.rank(),
np.sum(step_time),
np.sum(sample_t),
np.sum(feat_copy_t),
np.sum(forward_t),
np.sum(backward_t),
np.sum(update_t),
num_seeds,
num_inputs,
)
)
epoch += 1 epoch += 1
# evaluate the embedding using LogisticRegression # evaluate the embedding using LogisticRegression
if args.standalone: if args.standalone:
pred = generate_emb(True, model, emb_layer, g, args.batch_size_eval, device) pred = generate_emb(
True, model, emb_layer, g, args.batch_size_eval, device
)
else: else:
pred = generate_emb(False, model.module, emb_layer, g, args.batch_size_eval, device) pred = generate_emb(
False, model.module, emb_layer, g, args.batch_size_eval, device
)
if g.rank() == 0: if g.rank() == 0:
eval_acc, test_acc = compute_acc(pred, labels, global_train_nid, global_valid_nid, global_test_nid) eval_acc, test_acc = compute_acc(
print('eval acc {:.4f}; test acc {:.4f}'.format(eval_acc, test_acc)) pred, labels, global_train_nid, global_valid_nid, global_test_nid
)
print("eval acc {:.4f}; test acc {:.4f}".format(eval_acc, test_acc))
# sync for eval and test # sync for eval and test
if not args.standalone: if not args.standalone:
...@@ -176,29 +249,42 @@ def run(args, device, data): ...@@ -176,29 +249,42 @@ def run(args, device, data):
# save features into file # save features into file
if g.rank() == 0: if g.rank() == 0:
th.save(pred, 'emb.pt') th.save(pred, "emb.pt")
else: else:
feat = g.ndata['features'] feat = g.ndata["features"]
th.save(pred, 'emb.pt') th.save(pred, "emb.pt")
def main(args): def main(args):
dgl.distributed.initialize(args.ip_config) dgl.distributed.initialize(args.ip_config)
if not args.standalone: if not args.standalone:
th.distributed.init_process_group(backend='gloo') th.distributed.init_process_group(backend="gloo")
g = dgl.distributed.DistGraph(args.graph_name, part_config=args.part_config) g = dgl.distributed.DistGraph(args.graph_name, part_config=args.part_config)
print('rank:', g.rank()) print("rank:", g.rank())
print('number of edges', g.number_of_edges()) print("number of edges", g.number_of_edges())
train_eids = dgl.distributed.edge_split(th.ones((g.number_of_edges(),), dtype=th.bool), g.get_partition_book(), force_even=True) train_eids = dgl.distributed.edge_split(
train_nids = dgl.distributed.node_split(th.ones((g.number_of_nodes(),), dtype=th.bool), g.get_partition_book()) th.ones((g.number_of_edges(),), dtype=th.bool),
global_train_nid = th.LongTensor(np.nonzero(g.ndata['train_mask'][np.arange(g.number_of_nodes())])) g.get_partition_book(),
global_valid_nid = th.LongTensor(np.nonzero(g.ndata['val_mask'][np.arange(g.number_of_nodes())])) force_even=True,
global_test_nid = th.LongTensor(np.nonzero(g.ndata['test_mask'][np.arange(g.number_of_nodes())])) )
labels = g.ndata['labels'][np.arange(g.number_of_nodes())] train_nids = dgl.distributed.node_split(
th.ones((g.number_of_nodes(),), dtype=th.bool), g.get_partition_book()
)
global_train_nid = th.LongTensor(
np.nonzero(g.ndata["train_mask"][np.arange(g.number_of_nodes())])
)
global_valid_nid = th.LongTensor(
np.nonzero(g.ndata["val_mask"][np.arange(g.number_of_nodes())])
)
global_test_nid = th.LongTensor(
np.nonzero(g.ndata["test_mask"][np.arange(g.number_of_nodes())])
)
labels = g.ndata["labels"][np.arange(g.number_of_nodes())]
if args.num_gpus == -1: if args.num_gpus == -1:
device = th.device('cpu') device = th.device("cpu")
else: else:
device = th.device('cuda:'+str(args.local_rank)) device = th.device("cuda:" + str(args.local_rank))
# Pack data # Pack data
global_train_nid = global_train_nid.squeeze() global_train_nid = global_train_nid.squeeze()
...@@ -207,41 +293,74 @@ def main(args): ...@@ -207,41 +293,74 @@ def main(args):
print("number of train {}".format(global_train_nid.shape[0])) print("number of train {}".format(global_train_nid.shape[0]))
print("number of valid {}".format(global_valid_nid.shape[0])) print("number of valid {}".format(global_valid_nid.shape[0]))
print("number of test {}".format(global_test_nid.shape[0])) print("number of test {}".format(global_test_nid.shape[0]))
data = train_eids, train_nids, g, global_train_nid, global_valid_nid, global_test_nid, labels data = (
train_eids,
train_nids,
g,
global_train_nid,
global_valid_nid,
global_test_nid,
labels,
)
run(args, device, data) run(args, device, data)
print("parent ends") print("parent ends")
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='GCN') if __name__ == "__main__":
parser = argparse.ArgumentParser(description="GCN")
register_data_args(parser) register_data_args(parser)
parser.add_argument('--graph_name', type=str, help='graph name') parser.add_argument("--graph_name", type=str, help="graph name")
parser.add_argument('--id', type=int, help='the partition id') parser.add_argument("--id", type=int, help="the partition id")
parser.add_argument('--ip_config', type=str, help='The file for IP configuration') parser.add_argument(
parser.add_argument('--part_config', type=str, help='The path to the partition config file') "--ip_config", type=str, help="The file for IP configuration"
parser.add_argument('--n_classes', type=int, help='the number of classes') )
parser.add_argument('--num_gpus', type=int, default=-1, parser.add_argument(
help="the number of GPU device. Use -1 for CPU training") "--part_config", type=str, help="The path to the partition config file"
parser.add_argument('--num_epochs', type=int, default=5) )
parser.add_argument('--num_hidden', type=int, default=16) parser.add_argument("--n_classes", type=int, help="the number of classes")
parser.add_argument('--num-layers', type=int, default=2) parser.add_argument(
parser.add_argument('--fan_out', type=str, default='10,25') "--num_gpus",
parser.add_argument('--batch_size', type=int, default=1000) type=int,
parser.add_argument('--batch_size_eval', type=int, default=100000) default=-1,
parser.add_argument('--log_every', type=int, default=20) help="the number of GPU device. Use -1 for CPU training",
parser.add_argument('--eval_every', type=int, default=5) )
parser.add_argument('--lr', type=float, default=0.003) parser.add_argument("--num_epochs", type=int, default=5)
parser.add_argument('--dropout', type=float, default=0.5) parser.add_argument("--num_hidden", type=int, default=16)
parser.add_argument('--local_rank', type=int, help='get rank of the process') parser.add_argument("--num-layers", type=int, default=2)
parser.add_argument('--standalone', action='store_true', help='run in the standalone mode') parser.add_argument("--fan_out", type=str, default="10,25")
parser.add_argument('--num_negs', type=int, default=1) parser.add_argument("--batch_size", type=int, default=1000)
parser.add_argument('--neg_share', default=False, action='store_true', parser.add_argument("--batch_size_eval", type=int, default=100000)
help="sharing neg nodes for positive nodes") parser.add_argument("--log_every", type=int, default=20)
parser.add_argument('--remove_edge', default=False, action='store_true', parser.add_argument("--eval_every", type=int, default=5)
help="whether to remove edges during sampling") parser.add_argument("--lr", type=float, default=0.003)
parser.add_argument("--dgl_sparse", action='store_true', parser.add_argument("--dropout", type=float, default=0.5)
help='Whether to use DGL sparse embedding') parser.add_argument(
parser.add_argument("--sparse_lr", type=float, default=1e-2, "--local_rank", type=int, help="get rank of the process"
help="sparse lr rate") )
parser.add_argument(
"--standalone", action="store_true", help="run in the standalone mode"
)
parser.add_argument("--num_negs", type=int, default=1)
parser.add_argument(
"--neg_share",
default=False,
action="store_true",
help="sharing neg nodes for positive nodes",
)
parser.add_argument(
"--remove_edge",
default=False,
action="store_true",
help="whether to remove edges during sampling",
)
parser.add_argument(
"--dgl_sparse",
action="store_true",
help="Whether to use DGL sparse embedding",
)
parser.add_argument(
"--sparse_lr", type=float, default=1e-2, help="sparse lr rate"
)
args = parser.parse_args() args = parser.parse_args()
print(args) print(args)
main(args) main(args)
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment