Unverified Commit a9f2acf3 authored by Hongzhi (Steve), Chen's avatar Hongzhi (Steve), Chen Committed by GitHub
Browse files

[Misc] Black auto fix. (#4641)



* [Misc] Black auto fix.

* sort
Co-authored-by: default avatarSteve <ubuntu@ip-172-31-34-29.ap-northeast-1.compute.internal>
parent 08c50eb7
import time
import dgl
from dgl.nn.pytorch import SAGEConv
import torch
import torch.nn as nn
import torch.nn.functional as F
import dgl
from dgl.nn.pytorch import SAGEConv
from .. import utils
class GraphSAGE(nn.Module):
def __init__(self,
in_feats,
n_hidden,
n_classes,
n_layers,
activation,
dropout,
aggregator_type):
def __init__(
self,
in_feats,
n_hidden,
n_classes,
n_layers,
activation,
dropout,
aggregator_type,
):
super(GraphSAGE, self).__init__()
self.layers = nn.ModuleList()
self.dropout = nn.Dropout(dropout)
......@@ -27,7 +32,9 @@ class GraphSAGE(nn.Module):
for i in range(n_layers - 1):
self.layers.append(SAGEConv(n_hidden, n_hidden, aggregator_type))
# output layer
self.layers.append(SAGEConv(n_hidden, n_classes, aggregator_type)) # activation None
self.layers.append(
SAGEConv(n_hidden, n_classes, aggregator_type)
) # activation None
def forward(self, graph, inputs):
h = self.dropout(inputs)
......@@ -38,8 +45,9 @@ class GraphSAGE(nn.Module):
h = self.dropout(h)
return h
@utils.benchmark('time')
@utils.parametrize('data', ['cora', 'pubmed'])
@utils.benchmark("time")
@utils.parametrize("data", ["cora", "pubmed"])
def track_time(data):
data = utils.process_data(data)
device = utils.get_bench_device()
......@@ -47,11 +55,11 @@ def track_time(data):
g = data[0].to(device)
features = g.ndata['feat']
labels = g.ndata['label']
train_mask = g.ndata['train_mask']
val_mask = g.ndata['val_mask']
test_mask = g.ndata['test_mask']
features = g.ndata["feat"]
labels = g.ndata["label"]
train_mask = g.ndata["train_mask"]
val_mask = g.ndata["val_mask"]
test_mask = g.ndata["test_mask"]
in_feats = features.shape[1]
n_classes = data.num_classes
......@@ -60,16 +68,14 @@ def track_time(data):
g = dgl.add_self_loop(g)
# create model
model = GraphSAGE(in_feats, 16, n_classes, 1, F.relu, 0.5, 'gcn')
model = GraphSAGE(in_feats, 16, n_classes, 1, F.relu, 0.5, "gcn")
loss_fcn = torch.nn.CrossEntropyLoss()
model = model.to(device)
model.train()
# optimizer
optimizer = torch.optim.Adam(model.parameters(),
lr=1e-2,
weight_decay=5e-4)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-2, weight_decay=5e-4)
# dry run
for i in range(10):
......
......@@ -9,44 +9,50 @@ Difference compared to tkipf/relation-gcn
import argparse
import gc
import logging
import time
from pathlib import Path
from types import SimpleNamespace
import numpy as np
import time
import torch as th
import torch.multiprocessing as mp
import torch.nn as nn
import torch.nn.functional as F
import torch.multiprocessing as mp
from torch.multiprocessing import Queue
from torch.nn.parallel import DistributedDataParallel
from torch.utils.data import DataLoader
import dgl
from dgl.nn import RelGraphConv
from .. import utils
# import sys
# import os
# dir_path = Path(os.path.dirname(__file__))
# sys.path.insert(0, dir_path.parent)
from .. import utils
# import utils
class EntityClassify(nn.Module):
def __init__(self,
device,
num_nodes,
h_dim,
out_dim,
num_rels,
num_bases=None,
num_hidden_layers=1,
dropout=0,
use_self_loop=False,
low_mem=True,
layer_norm=False):
def __init__(
self,
device,
num_nodes,
h_dim,
out_dim,
num_rels,
num_bases=None,
num_hidden_layers=1,
dropout=0,
use_self_loop=False,
low_mem=True,
layer_norm=False,
):
super(EntityClassify, self).__init__()
self.device = th.device(device if device >= 0 else 'cpu')
self.device = th.device(device if device >= 0 else "cpu")
self.num_nodes = num_nodes
self.h_dim = h_dim
self.out_dim = out_dim
......@@ -60,22 +66,50 @@ class EntityClassify(nn.Module):
self.layers = nn.ModuleList()
# i2h
self.layers.append(RelGraphConv(
self.h_dim, self.h_dim, self.num_rels, "basis",
self.num_bases, activation=F.relu, self_loop=self.use_self_loop,
low_mem=self.low_mem, dropout=self.dropout, layer_norm=layer_norm))
self.layers.append(
RelGraphConv(
self.h_dim,
self.h_dim,
self.num_rels,
"basis",
self.num_bases,
activation=F.relu,
self_loop=self.use_self_loop,
low_mem=self.low_mem,
dropout=self.dropout,
layer_norm=layer_norm,
)
)
# h2h
for idx in range(self.num_hidden_layers):
self.layers.append(RelGraphConv(
self.h_dim, self.h_dim, self.num_rels, "basis",
self.num_bases, activation=F.relu, self_loop=self.use_self_loop,
low_mem=self.low_mem, dropout=self.dropout, layer_norm=layer_norm))
self.layers.append(
RelGraphConv(
self.h_dim,
self.h_dim,
self.num_rels,
"basis",
self.num_bases,
activation=F.relu,
self_loop=self.use_self_loop,
low_mem=self.low_mem,
dropout=self.dropout,
layer_norm=layer_norm,
)
)
# h2o
self.layers.append(RelGraphConv(
self.h_dim, self.out_dim, self.num_rels, "basis",
self.num_bases, activation=None,
self_loop=self.use_self_loop,
low_mem=self.low_mem, layer_norm=layer_norm))
self.layers.append(
RelGraphConv(
self.h_dim,
self.out_dim,
self.num_rels,
"basis",
self.num_bases,
activation=None,
self_loop=self.use_self_loop,
low_mem=self.low_mem,
layer_norm=layer_norm,
)
)
def forward(self, blocks, feats, norm=None):
if blocks is None:
......@@ -84,22 +118,22 @@ class EntityClassify(nn.Module):
h = feats
for layer, block in zip(self.layers, blocks):
block = block.to(self.device)
h = layer(block, h, block.edata['etype'], block.edata['norm'])
h = layer(block, h, block.edata["etype"], block.edata["norm"])
return h
def gen_norm(g):
_, v, eid = g.all_edges(form='all')
_, v, eid = g.all_edges(form="all")
_, inverse_index, count = th.unique(
v, return_inverse=True, return_counts=True)
v, return_inverse=True, return_counts=True
)
degrees = count[inverse_index]
norm = th.ones(eid.shape[0], device=eid.device) / degrees
norm = norm.unsqueeze(1)
g.edata['norm'] = norm
g.edata["norm"] = norm
class NeighborSampler:
def __init__(self, g, target_idx, fanouts):
self.g = g
self.target_idx = target_idx
......@@ -127,9 +161,20 @@ class NeighborSampler:
@utils.thread_wrapped_func
def run(proc_id, n_gpus, n_cpus, args, devices, dataset, split, queue=None):
from .rgcn_model import RelGraphEmbedLayer
dev_id = devices[proc_id]
g, node_feats, num_of_ntype, num_classes, num_rels, target_idx, \
train_idx, val_idx, test_idx, labels = dataset
(
g,
node_feats,
num_of_ntype,
num_classes,
num_rels,
target_idx,
train_idx,
val_idx,
test_idx,
labels,
) = dataset
labels = labels.cuda(dev_id)
if split is not None:
train_seed, val_seed, test_seed = split
......@@ -140,64 +185,76 @@ def run(proc_id, n_gpus, n_cpus, args, devices, dataset, split, queue=None):
fanouts = args.fanout
node_tids = g.ndata[dgl.NTYPE]
sampler = NeighborSampler(g, target_idx, fanouts)
loader = DataLoader(dataset=train_idx.numpy(),
batch_size=args.batch_size,
collate_fn=sampler.sample_blocks,
shuffle=True,
num_workers=args.num_workers)
loader = DataLoader(
dataset=train_idx.numpy(),
batch_size=args.batch_size,
collate_fn=sampler.sample_blocks,
shuffle=True,
num_workers=args.num_workers,
)
world_size = n_gpus
dist_init_method = 'tcp://{master_ip}:{master_port}'.format(
master_ip='127.0.0.1', master_port='12345')
backend = 'nccl'
dist_init_method = "tcp://{master_ip}:{master_port}".format(
master_ip="127.0.0.1", master_port="12345"
)
backend = "nccl"
# using sparse embedding or usig mix_cpu_gpu model (embedding model can not be stored in GPU)
if args.dgl_sparse is False:
backend = 'gloo'
backend = "gloo"
print("backend using {}".format(backend))
th.distributed.init_process_group(backend=backend,
init_method=dist_init_method,
world_size=world_size,
rank=dev_id)
th.distributed.init_process_group(
backend=backend,
init_method=dist_init_method,
world_size=world_size,
rank=dev_id,
)
# node features
# None for one-hot feature, if not none, it should be the feature tensor.
#
embed_layer = RelGraphEmbedLayer(dev_id,
g.number_of_nodes(),
node_tids,
num_of_ntype,
node_feats,
args.n_hidden,
dgl_sparse=args.dgl_sparse)
embed_layer = RelGraphEmbedLayer(
dev_id,
g.number_of_nodes(),
node_tids,
num_of_ntype,
node_feats,
args.n_hidden,
dgl_sparse=args.dgl_sparse,
)
# create model
# all model params are in device.
model = EntityClassify(dev_id,
g.number_of_nodes(),
args.n_hidden,
num_classes,
num_rels,
num_bases=args.n_bases,
num_hidden_layers=args.n_layers - 2,
dropout=args.dropout,
use_self_loop=args.use_self_loop,
low_mem=args.low_mem,
layer_norm=args.layer_norm)
model = EntityClassify(
dev_id,
g.number_of_nodes(),
args.n_hidden,
num_classes,
num_rels,
num_bases=args.n_bases,
num_hidden_layers=args.n_layers - 2,
dropout=args.dropout,
use_self_loop=args.use_self_loop,
low_mem=args.low_mem,
layer_norm=args.layer_norm,
)
model.cuda(dev_id)
model = DistributedDataParallel(
model, device_ids=[dev_id], output_device=dev_id)
model, device_ids=[dev_id], output_device=dev_id
)
if args.dgl_sparse:
embed_layer.cuda(dev_id)
if len(list(embed_layer.parameters())) > 0:
embed_layer = DistributedDataParallel(
embed_layer, device_ids=[dev_id], output_device=dev_id)
embed_layer, device_ids=[dev_id], output_device=dev_id
)
else:
if len(list(embed_layer.parameters())) > 0:
embed_layer = DistributedDataParallel(
embed_layer, device_ids=None, output_device=None)
embed_layer, device_ids=None, output_device=None
)
# optimizer
dense_params = list(model.parameters())
......@@ -206,26 +263,34 @@ def run(proc_id, n_gpus, n_cpus, args, devices, dataset, split, queue=None):
dense_params += list(embed_layer.module.embeds.parameters())
else:
dense_params += list(embed_layer.embeds.parameters())
optimizer = th.optim.Adam(dense_params, lr=args.lr,
weight_decay=args.l2norm)
optimizer = th.optim.Adam(
dense_params, lr=args.lr, weight_decay=args.l2norm
)
if args.dgl_sparse:
all_params = list(model.parameters()) + list(embed_layer.parameters())
optimizer = th.optim.Adam(
all_params, lr=args.lr, weight_decay=args.l2norm)
all_params, lr=args.lr, weight_decay=args.l2norm
)
if n_gpus > 1 and isinstance(embed_layer, DistributedDataParallel):
dgl_emb = embed_layer.module.dgl_emb
else:
dgl_emb = embed_layer.dgl_emb
emb_optimizer = dgl.optim.SparseAdam(
params=dgl_emb, lr=args.sparse_lr, eps=1e-8) if len(dgl_emb) > 0 else None
emb_optimizer = (
dgl.optim.SparseAdam(params=dgl_emb, lr=args.sparse_lr, eps=1e-8)
if len(dgl_emb) > 0
else None
)
else:
if n_gpus > 1:
embs = list(embed_layer.module.node_embeds.parameters())
else:
embs = list(embed_layer.node_embeds.parameters())
emb_optimizer = th.optim.SparseAdam(
embs, lr=args.sparse_lr) if len(embs) > 0 else None
emb_optimizer = (
th.optim.SparseAdam(embs, lr=args.sparse_lr)
if len(embs) > 0
else None
)
# training loop
print("start training...")
......@@ -238,7 +303,7 @@ def run(proc_id, n_gpus, n_cpus, args, devices, dataset, split, queue=None):
last_val_acc = 0.0
do_test = False
if n_gpus > 1 and n_cpus - args.num_workers > 0:
th.set_num_threads(n_cpus-args.num_workers)
th.set_num_threads(n_cpus - args.num_workers)
steps = 0
time_records = []
model.train()
......@@ -248,10 +313,12 @@ def run(proc_id, n_gpus, n_cpus, args, devices, dataset, split, queue=None):
for i, sample_data in enumerate(loader):
seeds, blocks = sample_data
t0 = time.time()
feats = embed_layer(blocks[0].srcdata[dgl.NID],
blocks[0].srcdata['ntype'],
blocks[0].srcdata['type_id'],
node_feats)
feats = embed_layer(
blocks[0].srcdata[dgl.NID],
blocks[0].srcdata["ntype"],
blocks[0].srcdata["type_id"],
node_feats,
)
logits = model(blocks, feats)
loss = F.cross_entropy(logits, labels[seeds])
t1 = time.time()
......@@ -266,15 +333,17 @@ def run(proc_id, n_gpus, n_cpus, args, devices, dataset, split, queue=None):
gc.collect()
if i >= 3:
break
# real time
for i, sample_data in enumerate(loader):
seeds, blocks = sample_data
t0 = time.time()
feats = embed_layer(blocks[0].srcdata[dgl.NID],
blocks[0].srcdata['ntype'],
blocks[0].srcdata['type_id'],
node_feats)
feats = embed_layer(
blocks[0].srcdata[dgl.NID],
blocks[0].srcdata["ntype"],
blocks[0].srcdata["type_id"],
node_feats,
)
logits = model(blocks, feats)
loss = F.cross_entropy(logits, labels[seeds])
t1 = time.time()
......@@ -302,10 +371,10 @@ def run(proc_id, n_gpus, n_cpus, args, devices, dataset, split, queue=None):
@utils.skip_if_not_4gpu()
@utils.benchmark('time', timeout=600)
@utils.parametrize('data', ['am', 'ogbn-mag'])
@utils.parametrize('low_mem', [True, False])
@utils.parametrize('dgl_sparse', [True, False])
@utils.benchmark("time", timeout=600)
@utils.parametrize("data", ["am", "ogbn-mag"])
@utils.parametrize("low_mem", [True, False])
@utils.parametrize("dgl_sparse", [True, False])
def track_time(data, low_mem, dgl_sparse):
# load graph data
dataset = utils.process_data(data)
......@@ -316,10 +385,10 @@ def track_time(data, low_mem, dgl_sparse):
args.dataset = dataset
ogb_dataset = False
if data == 'am':
if data == "am":
args.n_bases = 40
args.l2norm = 5e-4
elif data == 'ogbn-mag':
elif data == "ogbn-mag":
args.n_bases = 2
args.l2norm = 0
else:
......@@ -327,29 +396,29 @@ def track_time(data, low_mem, dgl_sparse):
if ogb_dataset is True:
split_idx = dataset.get_idx_split()
train_idx = split_idx["train"]['paper']
val_idx = split_idx["valid"]['paper']
test_idx = split_idx["test"]['paper']
train_idx = split_idx["train"]["paper"]
val_idx = split_idx["valid"]["paper"]
test_idx = split_idx["test"]["paper"]
hg_orig, labels = dataset[0]
subgs = {}
for etype in hg_orig.canonical_etypes:
u, v = hg_orig.all_edges(etype=etype)
subgs[etype] = (u, v)
subgs[(etype[2], 'rev-'+etype[1], etype[0])] = (v, u)
subgs[(etype[2], "rev-" + etype[1], etype[0])] = (v, u)
hg = dgl.heterograph(subgs)
hg.nodes['paper'].data['feat'] = hg_orig.nodes['paper'].data['feat']
labels = labels['paper'].squeeze()
hg.nodes["paper"].data["feat"] = hg_orig.nodes["paper"].data["feat"]
labels = labels["paper"].squeeze()
num_rels = len(hg.canonical_etypes)
num_of_ntype = len(hg.ntypes)
num_classes = dataset.num_classes
if args.dataset == 'ogbn-mag':
category = 'paper'
print('Number of relations: {}'.format(num_rels))
print('Number of class: {}'.format(num_classes))
print('Number of train: {}'.format(len(train_idx)))
print('Number of valid: {}'.format(len(val_idx)))
print('Number of test: {}'.format(len(test_idx)))
if args.dataset == "ogbn-mag":
category = "paper"
print("Number of relations: {}".format(num_rels))
print("Number of class: {}".format(num_classes))
print("Number of train: {}".format(len(train_idx)))
print("Number of valid: {}".format(len(val_idx)))
print("Number of test: {}".format(len(test_idx)))
else:
# Load from hetero-graph
......@@ -359,9 +428,9 @@ def track_time(data, low_mem, dgl_sparse):
num_of_ntype = len(hg.ntypes)
category = dataset.predict_category
num_classes = dataset.num_classes
train_mask = hg.nodes[category].data.pop('train_mask')
test_mask = hg.nodes[category].data.pop('test_mask')
labels = hg.nodes[category].data.pop('labels')
train_mask = hg.nodes[category].data.pop("train_mask")
test_mask = hg.nodes[category].data.pop("test_mask")
labels = hg.nodes[category].data.pop("labels")
train_idx = th.nonzero(train_mask, as_tuple=False).squeeze()
test_idx = th.nonzero(test_mask, as_tuple=False).squeeze()
......@@ -369,8 +438,8 @@ def track_time(data, low_mem, dgl_sparse):
# Split train set into train and validation if args.validation is set
# otherwise use train set as the validation set.
if args.validation:
val_idx = train_idx[:len(train_idx) // 5]
train_idx = train_idx[len(train_idx) // 5:]
val_idx = train_idx[: len(train_idx) // 5]
train_idx = train_idx[len(train_idx) // 5 :]
else:
val_idx = train_idx
......@@ -380,7 +449,7 @@ def track_time(data, low_mem, dgl_sparse):
node_feats.append(hg.number_of_nodes(ntype))
else:
assert len(hg.nodes[ntype].data) == 1
feat = hg.nodes[ntype].data.pop('feat')
feat = hg.nodes[ntype].data.pop("feat")
node_feats.append(feat.share_memory_())
# get target category id
......@@ -388,20 +457,20 @@ def track_time(data, low_mem, dgl_sparse):
for i, ntype in enumerate(hg.ntypes):
if ntype == category:
category_id = i
print('{}:{}'.format(i, ntype))
print("{}:{}".format(i, ntype))
g = dgl.to_homogeneous(hg)
g.ndata['ntype'] = g.ndata[dgl.NTYPE]
g.ndata['ntype'].share_memory_()
g.edata['etype'] = g.edata[dgl.ETYPE]
g.edata['etype'].share_memory_()
g.ndata['type_id'] = g.ndata[dgl.NID]
g.ndata['type_id'].share_memory_()
g.ndata["ntype"] = g.ndata[dgl.NTYPE]
g.ndata["ntype"].share_memory_()
g.edata["etype"] = g.edata[dgl.ETYPE]
g.edata["etype"].share_memory_()
g.ndata["type_id"] = g.ndata[dgl.NID]
g.ndata["type_id"].share_memory_()
node_ids = th.arange(g.number_of_nodes())
# find out the target node ids
node_tids = g.ndata[dgl.NTYPE]
loc = (node_tids == category_id)
loc = node_tids == category_id
target_idx = node_ids[loc]
target_idx.share_memory_()
train_idx.share_memory_()
......@@ -414,7 +483,7 @@ def track_time(data, low_mem, dgl_sparse):
n_gpus = len(devices)
n_cpus = mp.cpu_count()
ctx = mp.get_context('fork')
ctx = mp.get_context("fork")
queue = ctx.Queue()
procs = []
num_train_seeds = train_idx.shape[0]
......@@ -430,35 +499,56 @@ def track_time(data, low_mem, dgl_sparse):
for proc_id in range(n_gpus):
# we have multi-gpu for training, evaluation and testing
# so split trian set, valid set and test set into num-of-gpu parts.
proc_train_seeds = train_seeds[proc_id * tseeds_per_proc:
(proc_id + 1) * tseeds_per_proc
if (proc_id + 1) * tseeds_per_proc < num_train_seeds
else num_train_seeds]
proc_valid_seeds = valid_seeds[proc_id * vseeds_per_proc:
(proc_id + 1) * vseeds_per_proc
if (proc_id + 1) * vseeds_per_proc < num_valid_seeds
else num_valid_seeds]
proc_test_seeds = test_seeds[proc_id * tstseeds_per_proc:
(proc_id + 1) * tstseeds_per_proc
if (proc_id + 1) * tstseeds_per_proc < num_test_seeds
else num_test_seeds]
p = ctx.Process(target=run, args=(proc_id, n_gpus, n_cpus // n_gpus, args, devices,
(g, node_feats, num_of_ntype, num_classes, num_rels, target_idx,
train_idx, val_idx, test_idx, labels),
(proc_train_seeds,
proc_valid_seeds, proc_test_seeds),
queue))
proc_train_seeds = train_seeds[
proc_id * tseeds_per_proc : (proc_id + 1) * tseeds_per_proc
if (proc_id + 1) * tseeds_per_proc < num_train_seeds
else num_train_seeds
]
proc_valid_seeds = valid_seeds[
proc_id * vseeds_per_proc : (proc_id + 1) * vseeds_per_proc
if (proc_id + 1) * vseeds_per_proc < num_valid_seeds
else num_valid_seeds
]
proc_test_seeds = test_seeds[
proc_id * tstseeds_per_proc : (proc_id + 1) * tstseeds_per_proc
if (proc_id + 1) * tstseeds_per_proc < num_test_seeds
else num_test_seeds
]
p = ctx.Process(
target=run,
args=(
proc_id,
n_gpus,
n_cpus // n_gpus,
args,
devices,
(
g,
node_feats,
num_of_ntype,
num_classes,
num_rels,
target_idx,
train_idx,
val_idx,
test_idx,
labels,
),
(proc_train_seeds, proc_valid_seeds, proc_test_seeds),
queue,
),
)
p.start()
procs.append(p)
for p in procs:
p.join()
time_records = queue.get(block=False)
num_exclude = 10 # exclude first 10 iterations
num_exclude = 10 # exclude first 10 iterations
if len(time_records) < 15:
# exclude less if less records
num_exclude = int(len(time_records)*0.3)
num_exclude = int(len(time_records) * 0.3)
return np.mean(time_records[num_exclude:])
......@@ -528,5 +618,5 @@ def config():
return args
if __name__ == '__main__':
track_time('am')
if __name__ == "__main__":
track_time("am")
......@@ -3,10 +3,20 @@ import torch.nn as nn
import dgl
class BaseRGCN(nn.Module):
def __init__(self, num_nodes, h_dim, out_dim, num_rels, num_bases,
num_hidden_layers=1, dropout=0,
use_self_loop=False, use_cuda=False):
def __init__(
self,
num_nodes,
h_dim,
out_dim,
num_rels,
num_bases,
num_hidden_layers=1,
dropout=0,
use_self_loop=False,
use_cuda=False,
):
super(BaseRGCN, self).__init__()
self.num_nodes = num_nodes
self.h_dim = h_dim
......@@ -50,10 +60,12 @@ class BaseRGCN(nn.Module):
h = layer(g, h, r, norm)
return h
def initializer(emb):
emb.uniform_(-1.0, 1.0)
return emb
class RelGraphEmbedLayer(nn.Module):
r"""Embedding layer for featureless heterograph.
Parameters
......@@ -74,16 +86,19 @@ class RelGraphEmbedLayer(nn.Module):
dgl_sparse : bool, optional
If true, use dgl.nn.NodeEmbedding otherwise use torch.nn.Embedding
"""
def __init__(self,
dev_id,
num_nodes,
node_tids,
num_of_ntype,
input_size,
embed_size,
dgl_sparse=False):
def __init__(
self,
dev_id,
num_nodes,
node_tids,
num_of_ntype,
input_size,
embed_size,
dgl_sparse=False,
):
super(RelGraphEmbedLayer, self).__init__()
self.dev_id = th.device(dev_id if dev_id >= 0 else 'cpu')
self.dev_id = th.device(dev_id if dev_id >= 0 else "cpu")
self.embed_size = embed_size
self.num_nodes = num_nodes
self.dgl_sparse = dgl_sparse
......@@ -96,10 +111,16 @@ class RelGraphEmbedLayer(nn.Module):
for ntype in range(num_of_ntype):
if isinstance(input_size[ntype], int):
if dgl_sparse:
self.node_embeds[str(ntype)] = dgl.nn.NodeEmbedding(input_size[ntype], embed_size, name=str(ntype),
init_func=initializer)
self.node_embeds[str(ntype)] = dgl.nn.NodeEmbedding(
input_size[ntype],
embed_size,
name=str(ntype),
init_func=initializer,
)
else:
sparse_emb = th.nn.Embedding(input_size[ntype], embed_size, sparse=True)
sparse_emb = th.nn.Embedding(
input_size[ntype], embed_size, sparse=True
)
nn.init.uniform_(sparse_emb.weight, -1.0, 1.0)
self.node_embeds[str(ntype)] = sparse_emb
else:
......@@ -110,8 +131,7 @@ class RelGraphEmbedLayer(nn.Module):
@property
def dgl_emb(self):
"""
"""
""" """
if self.dgl_sparse:
embs = [emb for emb in self.node_embeds.values()]
return embs
......@@ -137,15 +157,23 @@ class RelGraphEmbedLayer(nn.Module):
embeddings as the input of the next layer
"""
tsd_ids = node_ids.to(self.dev_id)
embeds = th.empty(node_ids.shape[0], self.embed_size, device=self.dev_id)
embeds = th.empty(
node_ids.shape[0], self.embed_size, device=self.dev_id
)
for ntype in range(self.num_of_ntype):
loc = node_tids == ntype
if isinstance(features[ntype], int):
if self.dgl_sparse:
embeds[loc] = self.node_embeds[str(ntype)](type_ids[loc], self.dev_id)
embeds[loc] = self.node_embeds[str(ntype)](
type_ids[loc], self.dev_id
)
else:
embeds[loc] = self.node_embeds[str(ntype)](type_ids[loc]).to(self.dev_id)
embeds[loc] = self.node_embeds[str(ntype)](
type_ids[loc]
).to(self.dev_id)
else:
embeds[loc] = features[ntype][type_ids[loc]].to(self.dev_id) @ self.embeds[str(ntype)].to(self.dev_id)
embeds[loc] = features[ntype][type_ids[loc]].to(
self.dev_id
) @ self.embeds[str(ntype)].to(self.dev_id)
return embeds
import dgl
from dgl.nn.pytorch import RelGraphConv
import torch
import torch.nn as nn
import torch.nn.functional as F
import dgl
from dgl.nn.pytorch import RelGraphConv
from . import utils
class RGCN(nn.Module):
def __init__(self, num_nodes, h_dim, out_dim, num_rels,
regularizer="basis", num_bases=-1, dropout=0.,
self_loop=False,
ns_mode=False):
def __init__(
self,
num_nodes,
h_dim,
out_dim,
num_rels,
regularizer="basis",
num_bases=-1,
dropout=0.0,
self_loop=False,
ns_mode=False,
):
super(RGCN, self).__init__()
if num_bases == -1:
num_bases = num_rels
self.emb = nn.Embedding(num_nodes, h_dim)
self.conv1 = RelGraphConv(h_dim, h_dim, num_rels, regularizer,
num_bases, self_loop=self_loop)
self.conv1 = RelGraphConv(
h_dim, h_dim, num_rels, regularizer, num_bases, self_loop=self_loop
)
self.conv2 = RelGraphConv(
h_dim, out_dim, num_rels, regularizer, num_bases, self_loop=self_loop)
h_dim,
out_dim,
num_rels,
regularizer,
num_bases,
self_loop=self_loop,
)
self.dropout = nn.Dropout(dropout)
self.ns_mode = ns_mode
......@@ -26,15 +44,15 @@ class RGCN(nn.Module):
if self.ns_mode:
# forward for neighbor sampling
x = self.emb(g[0].srcdata[dgl.NID])
h = self.conv1(g[0], x, g[0].edata[dgl.ETYPE], g[0].edata['norm'])
h = self.conv1(g[0], x, g[0].edata[dgl.ETYPE], g[0].edata["norm"])
h = self.dropout(F.relu(h))
h = self.conv2(g[1], h, g[1].edata[dgl.ETYPE], g[1].edata['norm'])
h = self.conv2(g[1], h, g[1].edata[dgl.ETYPE], g[1].edata["norm"])
return h
else:
x = self.emb.weight if nids is None else self.emb(nids)
h = self.conv1(g, x, g.edata[dgl.ETYPE], g.edata['norm'])
h = self.conv1(g, x, g.edata[dgl.ETYPE], g.edata["norm"])
h = self.dropout(F.relu(h))
h = self.conv2(g, h, g.edata[dgl.ETYPE], g.edata['norm'])
h = self.conv2(g, h, g.edata[dgl.ETYPE], g.edata["norm"])
return h
......@@ -47,9 +65,9 @@ def load_data(data_name, get_norm=False, inv_target=False):
num_rels = len(hg.canonical_etypes)
category = dataset.predict_category
num_classes = dataset.num_classes
labels = hg.nodes[category].data.pop('labels')
train_mask = hg.nodes[category].data.pop('train_mask')
test_mask = hg.nodes[category].data.pop('test_mask')
labels = hg.nodes[category].data.pop("labels")
train_mask = hg.nodes[category].data.pop("train_mask")
test_mask = hg.nodes[category].data.pop("test_mask")
train_idx = torch.nonzero(train_mask, as_tuple=False).squeeze()
test_idx = torch.nonzero(test_mask, as_tuple=False).squeeze()
......@@ -57,9 +75,10 @@ def load_data(data_name, get_norm=False, inv_target=False):
# Calculate normalization weight for each edge,
# 1. / d, d is the degree of the destination node
for cetype in hg.canonical_etypes:
hg.edges[cetype].data['norm'] = dgl.norm_by_dst(
hg, cetype).unsqueeze(1)
edata = ['norm']
hg.edges[cetype].data["norm"] = dgl.norm_by_dst(
hg, cetype
).unsqueeze(1)
edata = ["norm"]
else:
edata = None
......@@ -68,20 +87,30 @@ def load_data(data_name, get_norm=False, inv_target=False):
g = dgl.to_homogeneous(hg, edata=edata)
# Rename the fields as they can be changed by for example DataLoader
g.ndata['ntype'] = g.ndata.pop(dgl.NTYPE)
g.ndata['type_id'] = g.ndata.pop(dgl.NID)
g.ndata["ntype"] = g.ndata.pop(dgl.NTYPE)
g.ndata["type_id"] = g.ndata.pop(dgl.NID)
node_ids = torch.arange(g.num_nodes())
# find out the target node ids in g
loc = (g.ndata['ntype'] == category_id)
loc = g.ndata["ntype"] == category_id
target_idx = node_ids[loc]
if inv_target:
# Map global node IDs to type-specific node IDs. This is required for
# looking up type-specific labels in a minibatch
inv_target = torch.empty((g.num_nodes(),), dtype=torch.int64)
inv_target[target_idx] = torch.arange(0, target_idx.shape[0],
dtype=inv_target.dtype)
return g, num_rels, num_classes, labels, train_idx, test_idx, target_idx, inv_target
inv_target[target_idx] = torch.arange(
0, target_idx.shape[0], dtype=inv_target.dtype
)
return (
g,
num_rels,
num_classes,
labels,
train_idx,
test_idx,
target_idx,
inv_target,
)
else:
return g, num_rels, num_classes, labels, train_idx, test_idx, target_idx
from timeit import default_timer
import inspect
import json
import os
import pickle
import shutil
import time
import zipfile
import requests
import inspect
from functools import partial, reduce, wraps
from timeit import default_timer
import numpy as np
import pandas
import dgl
import requests
import torch
import time
from ogb.nodeproppred import DglNodePropPredDataset
from functools import partial, reduce, wraps
import dgl
def _download(url, path, filename):
......@@ -23,18 +24,20 @@ def _download(url, path, filename):
os.makedirs(path, exist_ok=True)
f_remote = requests.get(url, stream=True)
sz = f_remote.headers.get('content-length')
assert f_remote.status_code == 200, 'fail to open {}'.format(url)
with open(fn, 'wb') as writer:
for chunk in f_remote.iter_content(chunk_size=1024*1024):
sz = f_remote.headers.get("content-length")
assert f_remote.status_code == 200, "fail to open {}".format(url)
with open(fn, "wb") as writer:
for chunk in f_remote.iter_content(chunk_size=1024 * 1024):
writer.write(chunk)
print('Download finished.')
print("Download finished.")
import traceback
from _thread import start_new_thread
# GRAPH_CACHE = {}
import torch.multiprocessing as mp
from _thread import start_new_thread
import traceback
def thread_wrapped_func(func):
"""
......@@ -64,20 +67,21 @@ def thread_wrapped_func(func):
return decorated_function
def get_graph(name, format = None):
def get_graph(name, format=None):
# global GRAPH_CACHE
# if name in GRAPH_CACHE:
# return GRAPH_CACHE[name].to(format)
if isinstance(format, str):
format = [format] # didn't specify format
format = [format] # didn't specify format
if format is None:
format = ['csc', 'csr', 'coo']
format = ["csc", "csr", "coo"]
g = None
if name == 'cora':
if name == "cora":
g = dgl.data.CoraGraphDataset(verbose=False)[0]
elif name == 'pubmed':
elif name == "pubmed":
g = dgl.data.PubmedGraphDataset(verbose=False)[0]
elif name == 'livejournal':
elif name == "livejournal":
bin_path = "/tmp/dataset/livejournal/livejournal_{}.bin".format(format)
if os.path.exists(bin_path):
g_list, _ = dgl.load_graphs(bin_path)
......@@ -112,32 +116,50 @@ def get_graph(name, format = None):
def get_ogb_graph(name):
os.symlink('/tmp/dataset/', os.path.join(os.getcwd(), 'dataset'))
os.symlink("/tmp/dataset/", os.path.join(os.getcwd(), "dataset"))
data = DglNodePropPredDataset(name=name)
return data[0][0]
def get_livejournal():
# Same as https://snap.stanford.edu/data/soc-LiveJournal1.txt.gz
_download('https://dgl-asv-data.s3-us-west-2.amazonaws.com/dataset/livejournal/soc-LiveJournal1.txt.gz',
'/tmp/dataset/livejournal', 'soc-LiveJournal1.txt.gz')
df = pandas.read_csv('/tmp/dataset/livejournal/soc-LiveJournal1.txt.gz', sep='\t', skiprows=4, header=None,
names=['src', 'dst'], compression='gzip')
src = df['src'].values
dst = df['dst'].values
print('construct the graph')
_download(
"https://dgl-asv-data.s3-us-west-2.amazonaws.com/dataset/livejournal/soc-LiveJournal1.txt.gz",
"/tmp/dataset/livejournal",
"soc-LiveJournal1.txt.gz",
)
df = pandas.read_csv(
"/tmp/dataset/livejournal/soc-LiveJournal1.txt.gz",
sep="\t",
skiprows=4,
header=None,
names=["src", "dst"],
compression="gzip",
)
src = df["src"].values
dst = df["dst"].values
print("construct the graph")
return dgl.graph((src, dst))
def get_friendster():
# Same as https://snap.stanford.edu/data/bigdata/communities/com-friendster.ungraph.txt.gz
_download('https://dgl-asv-data.s3-us-west-2.amazonaws.com/dataset/friendster/com-friendster.ungraph.txt.gz',
'/tmp/dataset/friendster', 'com-friendster.ungraph.txt.gz')
df = pandas.read_csv('/tmp/dataset/friendster/com-friendster.ungraph.txt.gz', sep='\t', skiprows=4, header=None,
names=['src', 'dst'], compression='gzip')
src = df['src'].values
dst = df['dst'].values
print('construct the graph')
_download(
"https://dgl-asv-data.s3-us-west-2.amazonaws.com/dataset/friendster/com-friendster.ungraph.txt.gz",
"/tmp/dataset/friendster",
"com-friendster.ungraph.txt.gz",
)
df = pandas.read_csv(
"/tmp/dataset/friendster/com-friendster.ungraph.txt.gz",
sep="\t",
skiprows=4,
header=None,
names=["src", "dst"],
compression="gzip",
)
src = df["src"].values
dst = df["dst"].values
print("construct the graph")
return dgl.graph((src, dst))
......@@ -164,68 +186,73 @@ class OGBDataset(object):
def load_ogb_product():
name = 'ogbn-products'
os.symlink('/tmp/dataset/', os.path.join(os.getcwd(), 'dataset'))
name = "ogbn-products"
os.symlink("/tmp/dataset/", os.path.join(os.getcwd(), "dataset"))
print('load', name)
print("load", name)
data = DglNodePropPredDataset(name=name)
print('finish loading', name)
print("finish loading", name)
splitted_idx = data.get_idx_split()
graph, labels = data[0]
labels = labels[:, 0]
graph.ndata['label'] = labels
in_feats = graph.ndata['feat'].shape[1]
num_labels = len(torch.unique(
labels[torch.logical_not(torch.isnan(labels))]))
graph.ndata["label"] = labels
in_feats = graph.ndata["feat"].shape[1]
num_labels = len(
torch.unique(labels[torch.logical_not(torch.isnan(labels))])
)
# Find the node IDs in the training, validation, and test set.
train_nid, val_nid, test_nid = splitted_idx['train'], splitted_idx['valid'], splitted_idx['test']
train_nid, val_nid, test_nid = (
splitted_idx["train"],
splitted_idx["valid"],
splitted_idx["test"],
)
train_mask = torch.zeros((graph.number_of_nodes(),), dtype=torch.bool)
train_mask[train_nid] = True
val_mask = torch.zeros((graph.number_of_nodes(),), dtype=torch.bool)
val_mask[val_nid] = True
test_mask = torch.zeros((graph.number_of_nodes(),), dtype=torch.bool)
test_mask[test_nid] = True
graph.ndata['train_mask'] = train_mask
graph.ndata['val_mask'] = val_mask
graph.ndata['test_mask'] = test_mask
graph.ndata["train_mask"] = train_mask
graph.ndata["val_mask"] = val_mask
graph.ndata["test_mask"] = test_mask
return OGBDataset(graph, num_labels)
def load_ogb_mag():
name = 'ogbn-mag'
os.symlink('/tmp/dataset/', os.path.join(os.getcwd(), 'dataset'))
name = "ogbn-mag"
os.symlink("/tmp/dataset/", os.path.join(os.getcwd(), "dataset"))
print('load', name)
print("load", name)
dataset = DglNodePropPredDataset(name=name)
print('finish loading', name)
print("finish loading", name)
split_idx = dataset.get_idx_split()
train_idx = split_idx["train"]['paper']
val_idx = split_idx["valid"]['paper']
test_idx = split_idx["test"]['paper']
train_idx = split_idx["train"]["paper"]
val_idx = split_idx["valid"]["paper"]
test_idx = split_idx["test"]["paper"]
hg_orig, labels = dataset[0]
subgs = {}
for etype in hg_orig.canonical_etypes:
u, v = hg_orig.all_edges(etype=etype)
subgs[etype] = (u, v)
subgs[(etype[2], 'rev-'+etype[1], etype[0])] = (v, u)
subgs[(etype[2], "rev-" + etype[1], etype[0])] = (v, u)
hg = dgl.heterograph(subgs)
hg.nodes['paper'].data['feat'] = hg_orig.nodes['paper'].data['feat']
hg.nodes['paper'].data['labels'] = labels['paper'].squeeze()
train_mask = torch.zeros((hg.number_of_nodes('paper'),), dtype=torch.bool)
hg.nodes["paper"].data["feat"] = hg_orig.nodes["paper"].data["feat"]
hg.nodes["paper"].data["labels"] = labels["paper"].squeeze()
train_mask = torch.zeros((hg.number_of_nodes("paper"),), dtype=torch.bool)
train_mask[train_idx] = True
val_mask = torch.zeros((hg.number_of_nodes('paper'),), dtype=torch.bool)
val_mask = torch.zeros((hg.number_of_nodes("paper"),), dtype=torch.bool)
val_mask[val_idx] = True
test_mask = torch.zeros((hg.number_of_nodes('paper'),), dtype=torch.bool)
test_mask = torch.zeros((hg.number_of_nodes("paper"),), dtype=torch.bool)
test_mask[test_idx] = True
hg.nodes['paper'].data['train_mask'] = train_mask
hg.nodes['paper'].data['val_mask'] = val_mask
hg.nodes['paper'].data['test_mask'] = test_mask
hg.nodes["paper"].data["train_mask"] = train_mask
hg.nodes["paper"].data["val_mask"] = val_mask
hg.nodes["paper"].data["test_mask"] = test_mask
num_classes = dataset.num_classes
return OGBDataset(hg, num_classes, 'paper')
return OGBDataset(hg, num_classes, "paper")
class PinsageDataset:
......@@ -253,30 +280,28 @@ class PinsageDataset:
def load_nowplaying_rs():
import torchtext.legacy as torchtext
# follow examples/pytorch/pinsage/README to create train_g.bin
name = 'train_g.bin'
dataset_dir = os.path.join(os.getcwd(), 'dataset')
os.symlink('/tmp/dataset/', dataset_dir)
name = "train_g.bin"
dataset_dir = os.path.join(os.getcwd(), "dataset")
os.symlink("/tmp/dataset/", dataset_dir)
dataset_path = os.path.join(dataset_dir, "nowplaying_rs", name)
g_list, _ = dgl.load_graphs(dataset_path)
g = g_list[0]
user_ntype = 'user'
item_ntype = 'track'
user_ntype = "user"
item_ntype = "track"
# Assign user and movie IDs and use them as features (to learn an individual trainable
# embedding for each entity)
g.nodes[user_ntype].data['id'] = torch.arange(
g.number_of_nodes(user_ntype))
g.nodes[item_ntype].data['id'] = torch.arange(
g.number_of_nodes(item_ntype))
g.nodes[user_ntype].data["id"] = torch.arange(g.number_of_nodes(user_ntype))
g.nodes[item_ntype].data["id"] = torch.arange(g.number_of_nodes(item_ntype))
# Prepare torchtext dataset and vocabulary
fields = {}
examples = []
for i in range(g.number_of_nodes(item_ntype)):
example = torchtext.data.Example.fromlist(
[], [])
example = torchtext.data.Example.fromlist([], [])
examples.append(example)
textset = torchtext.data.Dataset(examples, fields)
......@@ -284,32 +309,32 @@ def load_nowplaying_rs():
def process_data(name):
if name == 'cora':
if name == "cora":
return dgl.data.CoraGraphDataset()
elif name == 'pubmed':
elif name == "pubmed":
return dgl.data.PubmedGraphDataset()
elif name == 'aifb':
elif name == "aifb":
return dgl.data.AIFBDataset()
elif name == 'mutag':
elif name == "mutag":
return dgl.data.MUTAGDataset()
elif name == 'bgs':
elif name == "bgs":
return dgl.data.BGSDataset()
elif name == 'am':
elif name == "am":
return dgl.data.AMDataset()
elif name == 'reddit':
elif name == "reddit":
return dgl.data.RedditDataset(self_loop=True)
elif name == 'ogbn-products':
elif name == "ogbn-products":
return load_ogb_product()
elif name == 'ogbn-mag':
elif name == "ogbn-mag":
return load_ogb_mag()
elif name == 'nowplaying_rs':
elif name == "nowplaying_rs":
return load_nowplaying_rs()
else:
raise ValueError('Invalid dataset name:', name)
raise ValueError("Invalid dataset name:", name)
def get_bench_device():
device = os.environ.get('DGL_BENCH_DEVICE', 'cpu')
device = os.environ.get("DGL_BENCH_DEVICE", "cpu")
if device.lower() == "gpu":
return "cuda:0"
else:
......@@ -335,15 +360,15 @@ def setup_track_flops(*args, **kwargs):
TRACK_UNITS = {
'time': 's',
'acc': '%',
'flops': 'GFLOPS',
"time": "s",
"acc": "%",
"flops": "GFLOPS",
}
TRACK_SETUP = {
'time': setup_track_time,
'acc': setup_track_acc,
'flops': setup_track_flops,
"time": setup_track_time,
"acc": setup_track_acc,
"flops": setup_track_flops,
}
......@@ -390,12 +415,13 @@ def parametrize(param_name, params):
def foo(x, y):
pass
"""
def _wrapper(func):
sig_params = inspect.signature(func).parameters.keys()
num_params = len(sig_params)
if getattr(func, 'params', None) is None:
if getattr(func, "params", None) is None:
func.params = [None] * num_params
if getattr(func, 'param_names', None) is None:
if getattr(func, "param_names", None) is None:
func.param_names = [None] * num_params
found_param = False
for i, sig_param in enumerate(sig_params):
......@@ -405,16 +431,18 @@ def parametrize(param_name, params):
found_param = True
break
if not found_param:
raise ValueError('Invalid parameter name:', param_name)
raise ValueError("Invalid parameter name:", param_name)
return func
return _wrapper
def noop_decorator(param_name, params):
"""noop decorator
"""
"""noop decorator"""
def _wrapper(func):
return func
return _wrapper
......@@ -423,18 +451,21 @@ class TestFilter:
self.conf = None
if "DGL_REG_CONF" in os.environ:
current_dir = os.path.dirname(os.path.abspath(__file__))
path = os.path.join(current_dir, "../../",
os.environ["DGL_REG_CONF"])
path = os.path.join(
current_dir, "../../", os.environ["DGL_REG_CONF"]
)
with open(path, "r") as f:
self.conf = json.load(f)
if "INSTANCE_TYPE" in os.environ:
instance_type = os.environ["INSTANCE_TYPE"]
else:
raise Exception(
"Must set both DGL_REG_CONF and INSTANCE_TYPE as env")
"Must set both DGL_REG_CONF and INSTANCE_TYPE as env"
)
self.enabled_tests = self.conf[instance_type]["tests"]
else:
import logging
logging.warning("No regression test conf file specified")
def check(self, func):
......@@ -451,7 +482,7 @@ class TestFilter:
filter = TestFilter()
device = os.environ.get('DGL_BENCH_DEVICE', 'cpu')
device = os.environ.get("DGL_BENCH_DEVICE", "cpu")
if device == "cpu":
parametrize_cpu = parametrize
......@@ -461,38 +492,46 @@ elif device == "gpu":
parametrize_gpu = parametrize
else:
raise Exception(
"Unknown device. Must be one of ['cpu', 'gpu'], but got {}".format(device))
"Unknown device. Must be one of ['cpu', 'gpu'], but got {}".format(
device
)
)
def skip_if_gpu():
"""skip if DGL_BENCH_DEVICE is gpu
"""
device = os.environ.get('DGL_BENCH_DEVICE', 'cpu')
"""skip if DGL_BENCH_DEVICE is gpu"""
device = os.environ.get("DGL_BENCH_DEVICE", "cpu")
def _wrapper(func):
if device == "gpu":
# skip if not enabled
func.benchmark_name = "skip_" + func.__name__
return func
return _wrapper
def _cuda_device_count(q):
import torch
q.put(torch.cuda.device_count())
def get_num_gpu():
import multiprocessing as mp
q = mp.Queue()
p = mp.Process(target=_cuda_device_count, args=(q, ))
p = mp.Process(target=_cuda_device_count, args=(q,))
p.start()
p.join()
return q.get(block=False)
GPU_COUNT = get_num_gpu()
def skip_if_not_4gpu():
"""skip if DGL_BENCH_DEVICE is gpu
"""
"""skip if DGL_BENCH_DEVICE is gpu"""
def _wrapper(func):
if GPU_COUNT != 4:
......@@ -500,6 +539,7 @@ def skip_if_not_4gpu():
print("Skip {}".format(func.__name__))
func.benchmark_name = "skip_" + func.__name__
return func
return _wrapper
......@@ -525,7 +565,7 @@ def benchmark(track_type, timeout=60):
def foo():
pass
"""
assert track_type in ['time', 'acc', 'flops']
assert track_type in ["time", "acc", "flops"]
def _wrapper(func):
func.unit = TRACK_UNITS[track_type]
......@@ -535,8 +575,10 @@ def benchmark(track_type, timeout=60):
# skip if not enabled
func.benchmark_name = "skip_" + func.__name__
return func
return _wrapper
#####################################
# Timer
#####################################
......@@ -551,7 +593,7 @@ class Timer:
self.device = device
def __enter__(self):
if self.device == 'cuda:0':
if self.device == "cuda:0":
self.start_event = torch.cuda.Event(enable_timing=True)
self.end_event = torch.cuda.Event(enable_timing=True)
self.start_event.record()
......@@ -560,10 +602,11 @@ class Timer:
return self
def __exit__(self, type, value, traceback):
if self.device == 'cuda:0':
if self.device == "cuda:0":
self.end_event.record()
torch.cuda.synchronize() # Wait for the events to be recorded!
self.elapsed_secs = self.start_event.elapsed_time(
self.end_event) / 1e3
self.elapsed_secs = (
self.start_event.elapsed_time(self.end_event) / 1e3
)
else:
self.elapsed_secs = self.timer() - self.tic
from pathlib import Path
import json
from pathlib import Path
def main():
result_dir = Path(__file__).parent/ ".." / Path("results/")
result_dir = Path(__file__).parent / ".." / Path("results/")
for per_machine_dir in result_dir.iterdir():
if per_machine_dir.is_dir():
try:
machine_json = json.loads((per_machine_dir/"machine.json").read_text())
machine_json = json.loads(
(per_machine_dir / "machine.json").read_text()
)
ram = machine_json["ram"]
for f in per_machine_dir.glob("*.json"):
if f.stem != "machine":
......@@ -21,4 +24,5 @@ def main():
except Exception as e:
print(e)
main()
\ No newline at end of file
main()
import pandas as pd
import json
from pathlib import Path
from itertools import product
from pathlib import Path
import pandas as pd
def get_branch_name_from_hash(hash):
import subprocess
process = subprocess.Popen(['git', 'name-rev', '--name-only', hash],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
process = subprocess.Popen(
["git", "name-rev", "--name-only", hash],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
stdout, stderr = process.communicate()
if len(stderr) > 0:
return hash[:10]
else:
return stdout.decode("utf-8") .strip("\n")
return stdout.decode("utf-8").strip("\n")
def main():
......@@ -28,22 +32,30 @@ def main():
# commit_results_dict = {}
per_machine_result = {}
commit_results_json_paths = [
f for f in machine.glob("*") if f.name != "machine.json"]
f for f in machine.glob("*") if f.name != "machine.json"
]
for commit in commit_results_json_paths:
with commit.open() as f:
commit_result = json.load(f)
commit_hash = commit_result['commit_hash']
commit_hash = commit_result["commit_hash"]
per_commit_result = {}
for test_name, result in commit_result['results'].items():
for test_name, result in commit_result["results"].items():
per_commit_result[test_name] = []
if result['result'] is None:
for test_args in product(*result['params']):
if result["result"] is None:
for test_args in product(*result["params"]):
per_commit_result[test_name].append(
{"params": ", ".join(test_args), "result": None})
{"params": ", ".join(test_args), "result": None}
)
else:
for test_args, performance_number in zip(product(*result['params']), result['result']):
for test_args, performance_number in zip(
product(*result["params"]), result["result"]
):
per_commit_result[test_name].append(
{"params": ", ".join(test_args), "result": performance_number})
{
"params": ", ".join(test_args),
"result": performance_number,
}
)
per_machine_result[commit_hash] = per_commit_result
output_results_dict[machine.name] = per_machine_result
return output_results_dict
......@@ -54,8 +66,8 @@ def dict_to_csv(output_results_dict):
benchmark_conf = json.load(f)
unit_dict = {}
for k, v in benchmark_conf.items():
if k != 'version':
unit_dict[k] = v['unit']
if k != "version":
unit_dict[k] = v["unit"]
result_list = []
for machine, per_machine_result in output_results_dict.items():
for commit, test_cases in per_machine_result.items():
......@@ -65,19 +77,34 @@ def dict_to_csv(output_results_dict):
for test_case_name, results in test_cases.items():
for result in results:
result_list.append(
{"test_name": test_case_name, 'params': result['params'], 'unit': unit_dict[test_case_name], "number": result['result'], 'commit': branch_name, 'machine': machine})
{
"test_name": test_case_name,
"params": result["params"],
"unit": unit_dict[test_case_name],
"number": result["result"],
"commit": branch_name,
"machine": machine,
}
)
df = pd.DataFrame(result_list)
return df
def side_by_side_view(df):
commits = df['commit'].unique().tolist()
full_df = df.loc[df['commit'] == commits[0]]
commits = df["commit"].unique().tolist()
full_df = df.loc[df["commit"] == commits[0]]
for commit in commits[1:]:
per_commit_df = df.loc[df['commit'] == commit]
per_commit_df = df.loc[df["commit"] == commit]
full_df: pd.DataFrame = full_df.merge(
per_commit_df, on=['test_name', 'params', 'machine', 'unit'], how='outer', suffixes=("_{}".format(full_df.iloc[0]["commit"]), "_{}".format(per_commit_df.iloc[0]["commit"])))
full_df = full_df.loc[:, ~full_df.columns.str.startswith('commit')]
per_commit_df,
on=["test_name", "params", "machine", "unit"],
how="outer",
suffixes=(
"_{}".format(full_df.iloc[0]["commit"]),
"_{}".format(per_commit_df.iloc[0]["commit"]),
),
)
full_df = full_df.loc[:, ~full_df.columns.str.startswith("commit")]
return full_df
......
import argparse
import json
import os
import re
def json_minify(string, strip_space=True):
'''
"""
Based on JSON.minify.js:
https://github.com/getify/JSON.minify
Contributers:
- Pradyun S. Gedam (conditions and variable names changed)
'''
"""
tokenizer = re.compile(r'"|(/\*)|(\*/)|(//)|\n|\r')
in_string = False
in_multi = False
......@@ -24,44 +22,48 @@ def json_minify(string, strip_space=True):
for match in re.finditer(tokenizer, string):
if not (in_multi or in_single):
tmp = string[index:match.start()]
tmp = string[index : match.start()]
if not in_string and strip_space:
# replace white space as defined in standard
tmp = re.sub('[ \t\n\r]+', '', tmp)
tmp = re.sub("[ \t\n\r]+", "", tmp)
new_str.append(tmp)
index = match.end()
val = match.group()
if val == '"' and not (in_multi or in_single):
escaped = re.search(r'(\\)*$', string[:match.start()])
escaped = re.search(r"(\\)*$", string[: match.start()])
# start of string or unescaped quote character to end string
if not in_string or (escaped is None or len(escaped.group()) % 2 == 0):
if not in_string or (
escaped is None or len(escaped.group()) % 2 == 0
):
in_string = not in_string
index -= 1 # include " character in next catch
elif not (in_string or in_multi or in_single):
if val == '/*':
if val == "/*":
in_multi = True
elif val == '//':
elif val == "//":
in_single = True
elif val == '*/' and in_multi and not (in_string or in_single):
elif val == "*/" and in_multi and not (in_string or in_single):
in_multi = False
elif val in '\r\n' and not (in_multi or in_string) and in_single:
elif val in "\r\n" and not (in_multi or in_string) and in_single:
in_single = False
elif not ((in_multi or in_single) or (val in ' \r\n\t' and strip_space)):
elif not (
(in_multi or in_single) or (val in " \r\n\t" and strip_space)
):
new_str.append(val)
new_str.append(string[index:])
content = ''.join(new_str)
content = "".join(new_str)
content = content.replace(",]", "]")
content = content.replace(",}", "}")
return content
def add_prefix(branch_name):
if '/' not in branch_name:
return "origin/"+branch_name
if "/" not in branch_name:
return "origin/" + branch_name
else:
return branch_name
......
import argparse, time
import numpy as np
import dgl
import argparse
import time
import mxnet as mx
from mxnet import nd, gluon
import numpy as np
from mxnet import gluon, nd
from mxnet.gluon import nn
import dgl
from dgl.data import register_data_args
from dgl.data import CoraGraphDataset, CiteseerGraphDataset, PubmedGraphDataset
from dgl.data import (CiteseerGraphDataset, CoraGraphDataset,
PubmedGraphDataset, register_data_args)
from dgl.nn.mxnet.conv import APPNPConv
class APPNP(nn.Block):
def __init__(self,
g,
in_feats,
hiddens,
n_classes,
activation,
feat_drop,
edge_drop,
alpha,
k):
def __init__(
self,
g,
in_feats,
hiddens,
n_classes,
activation,
feat_drop,
edge_drop,
alpha,
k,
):
super(APPNP, self).__init__()
self.g = g
......@@ -51,21 +56,23 @@ class APPNP(nn.Block):
h = self.propagate(self.g, h)
return h
def evaluate(model, features, labels, mask):
pred = model(features).argmax(axis=1)
accuracy = ((pred == labels) * mask).sum() / mask.sum().asscalar()
return accuracy.asscalar()
def main(args):
# load and preprocess dataset
if args.dataset == 'cora':
if args.dataset == "cora":
data = CoraGraphDataset()
elif args.dataset == 'citeseer':
elif args.dataset == "citeseer":
data = CiteseerGraphDataset()
elif args.dataset == 'pubmed':
elif args.dataset == "pubmed":
data = PubmedGraphDataset()
else:
raise ValueError('Unknown dataset: {}'.format(args.dataset))
raise ValueError("Unknown dataset: {}".format(args.dataset))
g = data[0]
if args.gpu < 0:
......@@ -76,39 +83,46 @@ def main(args):
ctx = mx.gpu(args.gpu)
g = g.to(ctx)
features = g.ndata['feat']
labels = mx.nd.array(g.ndata['label'], dtype="float32", ctx=ctx)
train_mask = g.ndata['train_mask']
val_mask = g.ndata['val_mask']
test_mask = g.ndata['test_mask']
features = g.ndata["feat"]
labels = mx.nd.array(g.ndata["label"], dtype="float32", ctx=ctx)
train_mask = g.ndata["train_mask"]
val_mask = g.ndata["val_mask"]
test_mask = g.ndata["test_mask"]
in_feats = features.shape[1]
n_classes = data.num_labels
n_edges = data.graph.number_of_edges()
print("""----Data statistics------'
print(
"""----Data statistics------'
#Edges %d
#Classes %d
#Train samples %d
#Val samples %d
#Test samples %d""" %
(n_edges, n_classes,
train_mask.sum().asscalar(),
val_mask.sum().asscalar(),
test_mask.sum().asscalar()))
#Test samples %d"""
% (
n_edges,
n_classes,
train_mask.sum().asscalar(),
val_mask.sum().asscalar(),
test_mask.sum().asscalar(),
)
)
# add self loop
g = dgl.remove_self_loop(g)
g = dgl.add_self_loop(g)
# create APPNP model
model = APPNP(g,
in_feats,
args.hidden_sizes,
n_classes,
nd.relu,
args.in_drop,
args.edge_drop,
args.alpha,
args.k)
model = APPNP(
g,
in_feats,
args.hidden_sizes,
n_classes,
nd.relu,
args.in_drop,
args.edge_drop,
args.alpha,
args.k,
)
model.initialize(ctx=ctx)
n_train_samples = train_mask.sum().asscalar()
......@@ -116,8 +130,11 @@ def main(args):
# use optimizer
print(model.collect_params())
trainer = gluon.Trainer(model.collect_params(), 'adam',
{'learning_rate': args.lr, 'wd': args.weight_decay})
trainer = gluon.Trainer(
model.collect_params(),
"adam",
{"learning_rate": args.lr, "wd": args.weight_decay},
)
# initialize graph
dur = []
......@@ -137,36 +154,53 @@ def main(args):
loss.asscalar()
dur.append(time.time() - t0)
acc = evaluate(model, features, labels, val_mask)
print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | "
"ETputs(KTEPS) {:.2f}". format(
epoch, np.mean(dur), loss.asscalar(), acc, n_edges / np.mean(dur) / 1000))
print(
"Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | "
"ETputs(KTEPS) {:.2f}".format(
epoch,
np.mean(dur),
loss.asscalar(),
acc,
n_edges / np.mean(dur) / 1000,
)
)
# test set accuracy
acc = evaluate(model, features, labels, test_mask)
print("Test accuracy {:.2%}".format(acc))
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='APPNP')
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="APPNP")
register_data_args(parser)
parser.add_argument("--in-drop", type=float, default=0.5,
help="input feature dropout")
parser.add_argument("--edge-drop", type=float, default=0.5,
help="edge propagation dropout")
parser.add_argument("--gpu", type=int, default=-1,
help="gpu")
parser.add_argument("--lr", type=float, default=1e-2,
help="learning rate")
parser.add_argument("--n-epochs", type=int, default=200,
help="number of training epochs")
parser.add_argument("--hidden_sizes", type=int, nargs='+', default=[64],
help="hidden unit sizes for appnp")
parser.add_argument("--k", type=int, default=10,
help="Number of propagation steps")
parser.add_argument("--alpha", type=float, default=0.1,
help="Teleport Probability")
parser.add_argument("--weight-decay", type=float, default=5e-4,
help="Weight for L2 loss")
parser.add_argument(
"--in-drop", type=float, default=0.5, help="input feature dropout"
)
parser.add_argument(
"--edge-drop", type=float, default=0.5, help="edge propagation dropout"
)
parser.add_argument("--gpu", type=int, default=-1, help="gpu")
parser.add_argument("--lr", type=float, default=1e-2, help="learning rate")
parser.add_argument(
"--n-epochs", type=int, default=200, help="number of training epochs"
)
parser.add_argument(
"--hidden_sizes",
type=int,
nargs="+",
default=[64],
help="hidden unit sizes for appnp",
)
parser.add_argument(
"--k", type=int, default=10, help="Number of propagation steps"
)
parser.add_argument(
"--alpha", type=float, default=0.1, help="Teleport Probability"
)
parser.add_argument(
"--weight-decay", type=float, default=5e-4, help="Weight for L2 loss"
)
args = parser.parse_args()
print(args)
main(args)
\ No newline at end of file
main(args)
......@@ -8,41 +8,62 @@ Pytorch implementation: https://github.com/Diego999/pyGAT
"""
import mxnet.gluon.nn as nn
from dgl.nn.mxnet.conv import GATConv
class GAT(nn.Block):
def __init__(self,
g,
num_layers,
in_dim,
num_hidden,
num_classes,
heads,
activation,
feat_drop,
attn_drop,
alpha,
residual):
def __init__(
self,
g,
num_layers,
in_dim,
num_hidden,
num_classes,
heads,
activation,
feat_drop,
attn_drop,
alpha,
residual,
):
super(GAT, self).__init__()
self.g = g
self.num_layers = num_layers
self.gat_layers = []
self.activation = activation
# input projection (no residual)
self.gat_layers.append(GATConv(
in_dim, num_hidden, heads[0],
feat_drop, attn_drop, alpha, False))
self.gat_layers.append(
GATConv(
in_dim, num_hidden, heads[0], feat_drop, attn_drop, alpha, False
)
)
# hidden layers
for l in range(1, num_layers):
# due to multi-head, the in_dim = num_hidden * num_heads
self.gat_layers.append(GATConv(
num_hidden * heads[l-1], num_hidden, heads[l],
feat_drop, attn_drop, alpha, residual))
self.gat_layers.append(
GATConv(
num_hidden * heads[l - 1],
num_hidden,
heads[l],
feat_drop,
attn_drop,
alpha,
residual,
)
)
# output projection
self.gat_layers.append(GATConv(
num_hidden * heads[-2], num_classes, heads[-1],
feat_drop, attn_drop, alpha, residual))
self.gat_layers.append(
GATConv(
num_hidden * heads[-2],
num_classes,
heads[-1],
feat_drop,
attn_drop,
alpha,
residual,
)
)
for i, layer in enumerate(self.gat_layers):
self.register_child(layer, "gat_layer_{}".format(i))
......
......@@ -9,20 +9,22 @@ Pytorch implementation: https://github.com/Diego999/pyGAT
"""
import argparse
import networkx as nx
import time
import mxnet as mx
from mxnet import gluon
import networkx as nx
import numpy as np
import dgl
from dgl.data import register_data_args
from dgl.data import CoraGraphDataset, CiteseerGraphDataset, PubmedGraphDataset
from gat import GAT
from mxnet import gluon
from utils import EarlyStopping
import dgl
from dgl.data import (CiteseerGraphDataset, CoraGraphDataset,
PubmedGraphDataset, register_data_args)
def elu(data):
return mx.nd.LeakyReLU(data, act_type='elu')
return mx.nd.LeakyReLU(data, act_type="elu")
def evaluate(model, features, labels, mask):
......@@ -36,14 +38,14 @@ def evaluate(model, features, labels, mask):
def main(args):
# load and preprocess dataset
if args.dataset == 'cora':
if args.dataset == "cora":
data = CoraGraphDataset()
elif args.dataset == 'citeseer':
elif args.dataset == "citeseer":
data = CiteseerGraphDataset()
elif args.dataset == 'pubmed':
elif args.dataset == "pubmed":
data = PubmedGraphDataset()
else:
raise ValueError('Unknown dataset: {}'.format(args.dataset))
raise ValueError("Unknown dataset: {}".format(args.dataset))
g = data[0]
if args.gpu < 0:
......@@ -54,14 +56,14 @@ def main(args):
ctx = mx.gpu(args.gpu)
g = g.to(ctx)
features = g.ndata['feat']
labels = mx.nd.array(g.ndata['label'], dtype="float32", ctx=ctx)
mask = g.ndata['train_mask']
features = g.ndata["feat"]
labels = mx.nd.array(g.ndata["label"], dtype="float32", ctx=ctx)
mask = g.ndata["train_mask"]
mask = mx.nd.array(np.nonzero(mask.asnumpy())[0], ctx=ctx)
val_mask = g.ndata['val_mask']
val_mask = mx.nd.array(np.nonzero(val_mask.asnumpy())[0], ctx=ctx)
test_mask = g.ndata['test_mask']
test_mask = mx.nd.array(np.nonzero(test_mask.asnumpy())[0], ctx=ctx)
val_mask = g.ndata["val_mask"]
val_mask = mx.nd.array(np.nonzero(val_mask.asnumpy())[0], ctx=ctx)
test_mask = g.ndata["test_mask"]
test_mask = mx.nd.array(np.nonzero(test_mask.asnumpy())[0], ctx=ctx)
in_feats = features.shape[1]
n_classes = data.num_labels
n_edges = data.graph.number_of_edges()
......@@ -70,24 +72,28 @@ def main(args):
g = dgl.add_self_loop(g)
# create model
heads = ([args.num_heads] * args.num_layers) + [args.num_out_heads]
model = GAT(g,
args.num_layers,
in_feats,
args.num_hidden,
n_classes,
heads,
elu,
args.in_drop,
args.attn_drop,
args.alpha,
args.residual)
model = GAT(
g,
args.num_layers,
in_feats,
args.num_hidden,
n_classes,
heads,
elu,
args.in_drop,
args.attn_drop,
args.alpha,
args.residual,
)
if args.early_stop:
stopper = EarlyStopping(patience=100)
model.initialize(ctx=ctx)
# use optimizer
trainer = gluon.Trainer(model.collect_params(), 'adam', {'learning_rate': args.lr})
trainer = gluon.Trainer(
model.collect_params(), "adam", {"learning_rate": args.lr}
)
dur = []
for epoch in range(args.epochs):
......@@ -96,14 +102,22 @@ def main(args):
# forward
with mx.autograd.record():
logits = model(features)
loss = mx.nd.softmax_cross_entropy(logits[mask].squeeze(), labels[mask].squeeze())
loss = mx.nd.softmax_cross_entropy(
logits[mask].squeeze(), labels[mask].squeeze()
)
loss.backward()
trainer.step(mask.shape[0])
if epoch >= 3:
dur.append(time.time() - t0)
print("Epoch {:05d} | Loss {:.4f} | Time(s) {:.4f} | ETputs(KTEPS) {:.2f}".format(
epoch, loss.asnumpy()[0], np.mean(dur), n_edges / np.mean(dur) / 1000))
print(
"Epoch {:05d} | Loss {:.4f} | Time(s) {:.4f} | ETputs(KTEPS) {:.2f}".format(
epoch,
loss.asnumpy()[0],
np.mean(dur),
n_edges / np.mean(dur) / 1000,
)
)
val_accuracy = evaluate(model, features, labels, val_mask)
print("Validation Accuracy {:.4f}".format(val_accuracy))
if args.early_stop:
......@@ -112,41 +126,70 @@ def main(args):
print()
if args.early_stop:
model.load_parameters('model.param')
model.load_parameters("model.param")
test_accuracy = evaluate(model, features, labels, test_mask)
print("Test Accuracy {:.4f}".format(test_accuracy))
if __name__ == '__main__':
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='GAT')
parser = argparse.ArgumentParser(description="GAT")
register_data_args(parser)
parser.add_argument("--gpu", type=int, default=-1,
help="which GPU to use. Set -1 to use CPU.")
parser.add_argument("--epochs", type=int, default=200,
help="number of training epochs")
parser.add_argument("--num-heads", type=int, default=8,
help="number of hidden attention heads")
parser.add_argument("--num-out-heads", type=int, default=1,
help="number of output attention heads")
parser.add_argument("--num-layers", type=int, default=1,
help="number of hidden layers")
parser.add_argument("--num-hidden", type=int, default=8,
help="number of hidden units")
parser.add_argument("--residual", action="store_true", default=False,
help="use residual connection")
parser.add_argument("--in-drop", type=float, default=.6,
help="input feature dropout")
parser.add_argument("--attn-drop", type=float, default=.6,
help="attention dropout")
parser.add_argument("--lr", type=float, default=0.005,
help="learning rate")
parser.add_argument('--weight-decay', type=float, default=5e-4,
help="weight decay")
parser.add_argument('--alpha', type=float, default=0.2,
help="the negative slop of leaky relu")
parser.add_argument('--early-stop', action='store_true', default=False,
help="indicates whether to use early stop or not")
parser.add_argument(
"--gpu",
type=int,
default=-1,
help="which GPU to use. Set -1 to use CPU.",
)
parser.add_argument(
"--epochs", type=int, default=200, help="number of training epochs"
)
parser.add_argument(
"--num-heads",
type=int,
default=8,
help="number of hidden attention heads",
)
parser.add_argument(
"--num-out-heads",
type=int,
default=1,
help="number of output attention heads",
)
parser.add_argument(
"--num-layers", type=int, default=1, help="number of hidden layers"
)
parser.add_argument(
"--num-hidden", type=int, default=8, help="number of hidden units"
)
parser.add_argument(
"--residual",
action="store_true",
default=False,
help="use residual connection",
)
parser.add_argument(
"--in-drop", type=float, default=0.6, help="input feature dropout"
)
parser.add_argument(
"--attn-drop", type=float, default=0.6, help="attention dropout"
)
parser.add_argument("--lr", type=float, default=0.005, help="learning rate")
parser.add_argument(
"--weight-decay", type=float, default=5e-4, help="weight decay"
)
parser.add_argument(
"--alpha",
type=float,
default=0.2,
help="the negative slop of leaky relu",
)
parser.add_argument(
"--early-stop",
action="store_true",
default=False,
help="indicates whether to use early stop or not",
)
args = parser.parse_args()
print(args)
......
import numpy as np
class EarlyStopping:
def __init__(self, patience=10):
self.patience = patience
......@@ -14,7 +15,9 @@ class EarlyStopping:
self.save_checkpoint(model)
elif score < self.best_score:
self.counter += 1
print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
print(
f"EarlyStopping counter: {self.counter} out of {self.patience}"
)
if self.counter >= self.patience:
self.early_stop = True
else:
......@@ -24,5 +27,5 @@ class EarlyStopping:
return self.early_stop
def save_checkpoint(self, model):
'''Saves model when validation loss decrease.'''
model.save_parameters('model.param')
"""Saves model when validation loss decrease."""
model.save_parameters("model.param")
......@@ -7,18 +7,15 @@ References:
"""
import mxnet as mx
from mxnet import gluon
import dgl
from dgl.nn.mxnet import GraphConv
class GCN(gluon.Block):
def __init__(self,
g,
in_feats,
n_hidden,
n_classes,
n_layers,
activation,
dropout):
def __init__(
self, g, in_feats, n_hidden, n_classes, n_layers, activation, dropout
):
super(GCN, self).__init__()
self.g = g
self.layers = gluon.nn.Sequential()
......@@ -26,7 +23,9 @@ class GCN(gluon.Block):
self.layers.add(GraphConv(in_feats, n_hidden, activation=activation))
# hidden layers
for i in range(n_layers - 1):
self.layers.add(GraphConv(n_hidden, n_hidden, activation=activation))
self.layers.add(
GraphConv(n_hidden, n_hidden, activation=activation)
)
# output layer
self.layers.add(GraphConv(n_hidden, n_classes))
self.dropout = gluon.nn.Dropout(rate=dropout)
......
......@@ -8,14 +8,15 @@ References:
import mxnet as mx
from mxnet import gluon
def gcn_msg(edge):
msg = edge.src['h'] * edge.src['norm']
return {'m': msg}
msg = edge.src["h"] * edge.src["norm"]
return {"m": msg}
def gcn_reduce(node):
accum = mx.nd.sum(node.mailbox['m'], 1) * node.data['norm']
return {'h': accum}
accum = mx.nd.sum(node.mailbox["m"], 1) * node.data["norm"]
return {"h": accum}
class NodeUpdate(gluon.Block):
......@@ -23,66 +24,59 @@ class NodeUpdate(gluon.Block):
super(NodeUpdate, self).__init__()
with self.name_scope():
if bias:
self.bias = self.params.get('bias', shape=(out_feats,),
init=mx.init.Zero())
self.bias = self.params.get(
"bias", shape=(out_feats,), init=mx.init.Zero()
)
else:
self.bias = None
self.activation = activation
def forward(self, node):
h = node.data['h']
h = node.data["h"]
if self.bias is not None:
h = h + self.bias.data(h.context)
if self.activation:
h = self.activation(h)
return {'h': h}
return {"h": h}
class GCNLayer(gluon.Block):
def __init__(self,
g,
in_feats,
out_feats,
activation,
dropout,
bias=True):
def __init__(self, g, in_feats, out_feats, activation, dropout, bias=True):
super(GCNLayer, self).__init__()
self.g = g
self.dropout = dropout
with self.name_scope():
self.weight = self.params.get('weight', shape=(in_feats, out_feats),
init=mx.init.Xavier())
self.weight = self.params.get(
"weight", shape=(in_feats, out_feats), init=mx.init.Xavier()
)
self.node_update = NodeUpdate(out_feats, activation, bias)
def forward(self, h):
if self.dropout:
h = mx.nd.Dropout(h, p=self.dropout)
h = mx.nd.dot(h, self.weight.data(h.context))
self.g.ndata['h'] = h
self.g.ndata["h"] = h
self.g.update_all(gcn_msg, gcn_reduce, self.node_update)
h = self.g.ndata.pop('h')
h = self.g.ndata.pop("h")
return h
class GCN(gluon.Block):
def __init__(self,
g,
in_feats,
n_hidden,
n_classes,
n_layers,
activation,
dropout):
def __init__(
self, g, in_feats, n_hidden, n_classes, n_layers, activation, dropout
):
super(GCN, self).__init__()
self.layers = gluon.nn.Sequential()
# input layer
self.layers.add(GCNLayer(g, in_feats, n_hidden, activation, 0))
# hidden layers
for i in range(n_layers - 1):
self.layers.add(GCNLayer(g, n_hidden, n_hidden, activation, dropout))
self.layers.add(
GCNLayer(g, n_hidden, n_hidden, activation, dropout)
)
# output layer
self.layers.add(GCNLayer(g, n_hidden, n_classes, None, dropout))
def forward(self, features):
h = features
for layer in self.layers:
......
"""Training GCN model on citation graphs."""
import argparse, time
import numpy as np
import argparse
import time
import mxnet as mx
import numpy as np
from gcn import GCN
from mxnet import gluon
import dgl
from dgl.data import CoraGraphDataset, CiteseerGraphDataset, PubmedGraphDataset
from dgl.data import CiteseerGraphDataset, CoraGraphDataset, PubmedGraphDataset
# from gcn_mp import GCN
# from gcn_spmv import GCN
from gcn import GCN
#from gcn_mp import GCN
#from gcn_spmv import GCN
def evaluate(model, features, labels, mask):
pred = model(features).argmax(axis=1)
accuracy = ((pred == labels) * mask).sum() / mask.sum().asscalar()
return accuracy.asscalar()
def main(args):
# load and preprocess dataset
if args.dataset == 'cora':
if args.dataset == "cora":
data = CoraGraphDataset()
elif args.dataset == 'citeseer':
elif args.dataset == "citeseer":
data = CiteseerGraphDataset()
elif args.dataset == 'pubmed':
elif args.dataset == "pubmed":
data = PubmedGraphDataset()
else:
raise ValueError('Unknown dataset: {}'.format(args.dataset))
raise ValueError("Unknown dataset: {}".format(args.dataset))
g = data[0]
if args.gpu < 0:
......@@ -36,51 +40,61 @@ def main(args):
ctx = mx.gpu(args.gpu)
g = g.int().to(ctx)
features = g.ndata['feat']
labels = mx.nd.array(g.ndata['label'], dtype="float32", ctx=ctx)
train_mask = g.ndata['train_mask']
val_mask = g.ndata['val_mask']
test_mask = g.ndata['test_mask']
features = g.ndata["feat"]
labels = mx.nd.array(g.ndata["label"], dtype="float32", ctx=ctx)
train_mask = g.ndata["train_mask"]
val_mask = g.ndata["val_mask"]
test_mask = g.ndata["test_mask"]
in_feats = features.shape[1]
n_classes = data.num_labels
n_edges = data.graph.number_of_edges()
print("""----Data statistics------'
print(
"""----Data statistics------'
#Edges %d
#Classes %d
#Train samples %d
#Val samples %d
#Test samples %d""" %
(n_edges, n_classes,
train_mask.sum().asscalar(),
val_mask.sum().asscalar(),
test_mask.sum().asscalar()))
#Test samples %d"""
% (
n_edges,
n_classes,
train_mask.sum().asscalar(),
val_mask.sum().asscalar(),
test_mask.sum().asscalar(),
)
)
# add self loop
if args.self_loop:
g = dgl.remove_self_loop(g)
g = dgl.add_self_loop(g)
# normalization
degs = g.in_degrees().astype('float32')
degs = g.in_degrees().astype("float32")
norm = mx.nd.power(degs, -0.5)
if cuda:
norm = norm.as_in_context(ctx)
g.ndata['norm'] = mx.nd.expand_dims(norm, 1)
model = GCN(g,
in_feats,
args.n_hidden,
n_classes,
args.n_layers,
mx.nd.relu,
args.dropout)
g.ndata["norm"] = mx.nd.expand_dims(norm, 1)
model = GCN(
g,
in_feats,
args.n_hidden,
n_classes,
args.n_layers,
mx.nd.relu,
args.dropout,
)
model.initialize(ctx=ctx)
n_train_samples = train_mask.sum().asscalar()
loss_fcn = gluon.loss.SoftmaxCELoss()
# use optimizer
print(model.collect_params())
trainer = gluon.Trainer(model.collect_params(), 'adam',
{'learning_rate': args.lr, 'wd': args.weight_decay})
trainer = gluon.Trainer(
model.collect_params(),
"adam",
{"learning_rate": args.lr, "wd": args.weight_decay},
)
# initialize graph
dur = []
......@@ -100,34 +114,52 @@ def main(args):
loss.asscalar()
dur.append(time.time() - t0)
acc = evaluate(model, features, labels, val_mask)
print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | "
"ETputs(KTEPS) {:.2f}". format(
epoch, np.mean(dur), loss.asscalar(), acc, n_edges / np.mean(dur) / 1000))
print(
"Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | "
"ETputs(KTEPS) {:.2f}".format(
epoch,
np.mean(dur),
loss.asscalar(),
acc,
n_edges / np.mean(dur) / 1000,
)
)
# test set accuracy
acc = evaluate(model, features, labels, test_mask)
print("Test accuracy {:.2%}".format(acc))
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='GCN')
parser.add_argument("--dataset", type=str, default="cora",
help="Dataset name ('cora', 'citeseer', 'pubmed').")
parser.add_argument("--dropout", type=float, default=0.5,
help="dropout probability")
parser.add_argument("--gpu", type=int, default=-1,
help="gpu")
parser.add_argument("--lr", type=float, default=3e-2,
help="learning rate")
parser.add_argument("--n-epochs", type=int, default=200,
help="number of training epochs")
parser.add_argument("--n-hidden", type=int, default=16,
help="number of hidden gcn units")
parser.add_argument("--n-layers", type=int, default=1,
help="number of hidden gcn layers")
parser.add_argument("--weight-decay", type=float, default=5e-4,
help="Weight for L2 loss")
parser.add_argument("--self-loop", action='store_true',
help="graph self-loop (default=False)")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="GCN")
parser.add_argument(
"--dataset",
type=str,
default="cora",
help="Dataset name ('cora', 'citeseer', 'pubmed').",
)
parser.add_argument(
"--dropout", type=float, default=0.5, help="dropout probability"
)
parser.add_argument("--gpu", type=int, default=-1, help="gpu")
parser.add_argument("--lr", type=float, default=3e-2, help="learning rate")
parser.add_argument(
"--n-epochs", type=int, default=200, help="number of training epochs"
)
parser.add_argument(
"--n-hidden", type=int, default=16, help="number of hidden gcn units"
)
parser.add_argument(
"--n-layers", type=int, default=1, help="number of hidden gcn layers"
)
parser.add_argument(
"--weight-decay", type=float, default=5e-4, help="Weight for L2 loss"
)
parser.add_argument(
"--self-loop",
action="store_true",
help="graph self-loop (default=False)",
)
parser.set_defaults(self_loop=False)
args = parser.parse_args()
......
......@@ -2,24 +2,29 @@
MxNet compatible dataloader
"""
from mxnet.gluon.data import DataLoader, Sampler
import math
import numpy as np
from mxnet import nd
from mxnet.gluon.data import DataLoader, Sampler
from sklearn.model_selection import StratifiedKFold
import dgl
class SubsetRandomSampler(Sampler):
def __init__(self, indices):
self.indices = indices
def __iter__(self):
return iter([self.indices[i] for i in np.random.permutation(len(self.indices))])
return iter(
[self.indices[i] for i in np.random.permutation(len(self.indices))]
)
def __len__(self):
return len(self.indices)
# default collate function
def collate(samples):
# The input `samples` is a list of pairs (graph, label).
......@@ -34,28 +39,33 @@ def collate(samples):
labels = nd.concat(*labels, dim=0)
return batched_graph, labels
class GraphDataLoader():
def __init__(self,
dataset,
batch_size,
collate_fn=collate,
seed=0,
shuffle=True,
split_name='fold10',
fold_idx=0,
split_ratio=0.7):
class GraphDataLoader:
def __init__(
self,
dataset,
batch_size,
collate_fn=collate,
seed=0,
shuffle=True,
split_name="fold10",
fold_idx=0,
split_ratio=0.7,
):
self.shuffle = shuffle
self.seed = seed
labels = [l for _, l in dataset]
if split_name == 'fold10':
if split_name == "fold10":
train_idx, valid_idx = self._split_fold10(
labels, fold_idx, seed, shuffle)
elif split_name == 'rand':
labels, fold_idx, seed, shuffle
)
elif split_name == "rand":
train_idx, valid_idx = self._split_rand(
labels, split_ratio, seed, shuffle)
labels, split_ratio, seed, shuffle
)
else:
raise NotImplementedError()
......@@ -63,29 +73,36 @@ class GraphDataLoader():
valid_sampler = SubsetRandomSampler(valid_idx)
self.train_loader = DataLoader(
dataset, sampler=train_sampler,
batch_size=batch_size, batchify_fn=collate_fn)
dataset,
sampler=train_sampler,
batch_size=batch_size,
batchify_fn=collate_fn,
)
self.valid_loader = DataLoader(
dataset, sampler=valid_sampler,
batch_size=batch_size, batchify_fn=collate_fn)
dataset,
sampler=valid_sampler,
batch_size=batch_size,
batchify_fn=collate_fn,
)
def train_valid_loader(self):
return self.train_loader, self.valid_loader
def _split_fold10(self, labels, fold_idx=0, seed=0, shuffle=True):
''' 10 flod '''
"""10 flod"""
assert 0 <= fold_idx and fold_idx < 10, print(
"fold_idx must be from 0 to 9.")
"fold_idx must be from 0 to 9."
)
skf = StratifiedKFold(n_splits=10, shuffle=shuffle, random_state=seed)
idx_list = []
for idx in skf.split(np.zeros(len(labels)), [label.asnumpy() for label in labels]): # split(x, y)
for idx in skf.split(
np.zeros(len(labels)), [label.asnumpy() for label in labels]
): # split(x, y)
idx_list.append(idx)
train_idx, valid_idx = idx_list[fold_idx]
print(
"train_set : test_set = %d : %d",
len(train_idx), len(valid_idx))
print("train_set : test_set = %d : %d", len(train_idx), len(valid_idx))
return train_idx, valid_idx
......@@ -97,8 +114,6 @@ class GraphDataLoader():
split = int(math.floor(split_ratio * num_entries))
train_idx, valid_idx = indices[:split], indices[split:]
print(
"train_set : test_set = %d : %d",
len(train_idx), len(valid_idx))
print("train_set : test_set = %d : %d", len(train_idx), len(valid_idx))
return train_idx, valid_idx
......@@ -6,14 +6,16 @@ Author's implementation: https://github.com/weihua916/powerful-gnns
"""
import mxnet as mx
from mxnet import nd, gluon
from mxnet import gluon, nd
from mxnet.gluon import nn
from dgl.nn.mxnet.conv import GINConv
from dgl.nn.mxnet.glob import SumPooling, AvgPooling, MaxPooling
from dgl.nn.mxnet.glob import AvgPooling, MaxPooling, SumPooling
class ApplyNodeFunc(nn.Block):
"""Update the node feature hv with MLP, BN and ReLU."""
def __init__(self, mlp):
super(ApplyNodeFunc, self).__init__()
with self.name_scope():
......@@ -29,6 +31,7 @@ class ApplyNodeFunc(nn.Block):
class MLP(nn.Block):
"""MLP with linear output"""
def __init__(self, num_layers, input_dim, hidden_dim, output_dim):
"""MLP layers construction
......@@ -79,9 +82,19 @@ class MLP(nn.Block):
class GIN(nn.Block):
"""GIN model"""
def __init__(self, num_layers, num_mlp_layers, input_dim, hidden_dim,
output_dim, final_dropout, learn_eps, graph_pooling_type,
neighbor_pooling_type):
def __init__(
self,
num_layers,
num_mlp_layers,
input_dim,
hidden_dim,
output_dim,
final_dropout,
learn_eps,
graph_pooling_type,
neighbor_pooling_type,
):
"""model parameters setting
Paramters
......@@ -120,27 +133,39 @@ class GIN(nn.Block):
if i == 0:
mlp = MLP(num_mlp_layers, input_dim, hidden_dim, hidden_dim)
else:
mlp = MLP(num_mlp_layers, hidden_dim, hidden_dim, hidden_dim)
mlp = MLP(
num_mlp_layers, hidden_dim, hidden_dim, hidden_dim
)
self.ginlayers.add(
GINConv(ApplyNodeFunc(mlp), neighbor_pooling_type, 0, self.learn_eps))
GINConv(
ApplyNodeFunc(mlp),
neighbor_pooling_type,
0,
self.learn_eps,
)
)
self.batch_norms.add(nn.BatchNorm(in_channels=hidden_dim))
self.linears_prediction = nn.Sequential()
for i in range(num_layers):
if i == 0:
self.linears_prediction.add(nn.Dense(output_dim, in_units=input_dim))
self.linears_prediction.add(
nn.Dense(output_dim, in_units=input_dim)
)
else:
self.linears_prediction.add(nn.Dense(output_dim, in_units=hidden_dim))
self.linears_prediction.add(
nn.Dense(output_dim, in_units=hidden_dim)
)
self.drop = nn.Dropout(final_dropout)
if graph_pooling_type == 'sum':
if graph_pooling_type == "sum":
self.pool = SumPooling()
elif graph_pooling_type == 'mean':
elif graph_pooling_type == "mean":
self.pool = AvgPooling()
elif graph_pooling_type == 'max':
elif graph_pooling_type == "max":
self.pool = MaxPooling()
else:
raise NotImplementedError
......@@ -158,6 +183,8 @@ class GIN(nn.Block):
# perform pooling over all nodes in each graph in every layer
for i, h in enumerate(hidden_rep):
pooled_h = self.pool(g, h)
score_over_layer = score_over_layer + self.drop(self.linears_prediction[i](pooled_h))
score_over_layer = score_over_layer + self.drop(
self.linears_prediction[i](pooled_h)
)
return score_over_layer
import sys
import numpy as np
from tqdm import tqdm
from parser import Parser
import mxnet as mx
import numpy as np
from dataloader import GraphDataLoader, collate
from gin import GIN
from mxnet import gluon, nd
from mxnet.gluon import nn
from tqdm import tqdm
from dgl.data.gindt import GINDataset
from dataloader import GraphDataLoader, collate
from parser import Parser
from gin import GIN
def train(args, net, trainloader, trainer, criterion, epoch):
running_loss = 0
total_iters = len(trainloader)
# setup the offset to avoid the overlap with mouse cursor
bar = tqdm(range(total_iters), unit='batch', position=2, file=sys.stdout)
bar = tqdm(range(total_iters), unit="batch", position=2, file=sys.stdout)
for pos, (graphs, labels) in zip(bar, trainloader):
# batch graphs will be shipped to device in forward part of model
labels = labels.as_in_context(args.device)
feat = graphs.ndata['attr'].as_in_context(args.device)
feat = graphs.ndata["attr"].as_in_context(args.device)
with mx.autograd.record():
graphs = graphs.to(args.device)
......@@ -36,7 +36,7 @@ def train(args, net, trainloader, trainer, criterion, epoch):
trainer.step(batch_size=1)
# report
bar.set_description('epoch-{}'.format(epoch))
bar.set_description("epoch-{}".format(epoch))
bar.close()
# the final batch will be aligned
running_loss = running_loss / total_iters
......@@ -52,20 +52,20 @@ def eval_net(args, net, dataloader, criterion):
for data in dataloader:
graphs, labels = data
labels = labels.as_in_context(args.device)
feat = graphs.ndata['attr'].as_in_context(args.device)
feat = graphs.ndata["attr"].as_in_context(args.device)
total += len(labels)
graphs = graphs.to(args.device)
outputs = net(graphs, feat)
predicted = nd.argmax(outputs, axis=1)
predicted = predicted.astype('int64')
predicted = predicted.astype("int64")
total_correct += (predicted == labels).sum().asscalar()
loss = criterion(outputs, labels)
# crossentropy(reduce=True) for default
total_loss += loss.sum().asscalar()
loss, acc = 1.0 * total_loss / total, 1.0*total_correct / total
loss, acc = 1.0 * total_loss / total, 1.0 * total_correct / total
return loss, acc
......@@ -76,8 +76,6 @@ def main(args):
mx.random.seed(0)
np.random.seed(seed=0)
if args.device >= 0:
args.device = mx.gpu(args.device)
else:
......@@ -86,75 +84,102 @@ def main(args):
dataset = GINDataset(args.dataset, not args.learn_eps)
trainloader, validloader = GraphDataLoader(
dataset, batch_size=args.batch_size,
collate_fn=collate, seed=args.seed, shuffle=True,
split_name='fold10', fold_idx=args.fold_idx).train_valid_loader()
dataset,
batch_size=args.batch_size,
collate_fn=collate,
seed=args.seed,
shuffle=True,
split_name="fold10",
fold_idx=args.fold_idx,
).train_valid_loader()
# or split_name='rand', split_ratio=0.7
model = GIN(
args.num_layers, args.num_mlp_layers,
dataset.dim_nfeats, args.hidden_dim, dataset.gclasses,
args.final_dropout, args.learn_eps,
args.graph_pooling_type, args.neighbor_pooling_type)
args.num_layers,
args.num_mlp_layers,
dataset.dim_nfeats,
args.hidden_dim,
dataset.gclasses,
args.final_dropout,
args.learn_eps,
args.graph_pooling_type,
args.neighbor_pooling_type,
)
model.initialize(ctx=args.device)
criterion = gluon.loss.SoftmaxCELoss()
print(model.collect_params())
lr_scheduler = mx.lr_scheduler.FactorScheduler(50, 0.5)
trainer = gluon.Trainer(model.collect_params(), 'adam',
{'lr_scheduler': lr_scheduler})
trainer = gluon.Trainer(
model.collect_params(), "adam", {"lr_scheduler": lr_scheduler}
)
# it's not cost-effective to hanle the cursor and init 0
# https://stackoverflow.com/a/23121189
tbar = tqdm(range(args.epochs), unit="epoch", position=3, ncols=0, file=sys.stdout)
vbar = tqdm(range(args.epochs), unit="epoch", position=4, ncols=0, file=sys.stdout)
lrbar = tqdm(range(args.epochs), unit="epoch", position=5, ncols=0, file=sys.stdout)
tbar = tqdm(
range(args.epochs), unit="epoch", position=3, ncols=0, file=sys.stdout
)
vbar = tqdm(
range(args.epochs), unit="epoch", position=4, ncols=0, file=sys.stdout
)
lrbar = tqdm(
range(args.epochs), unit="epoch", position=5, ncols=0, file=sys.stdout
)
for epoch, _, _ in zip(tbar, vbar, lrbar):
train(args, model, trainloader, trainer, criterion, epoch)
train_loss, train_acc = eval_net(
args, model, trainloader, criterion)
train_loss, train_acc = eval_net(args, model, trainloader, criterion)
tbar.set_description(
'train set - average loss: {:.4f}, accuracy: {:.0f}%'
.format(train_loss, 100. * train_acc))
"train set - average loss: {:.4f}, accuracy: {:.0f}%".format(
train_loss, 100.0 * train_acc
)
)
valid_loss, valid_acc = eval_net(
args, model, validloader, criterion)
valid_loss, valid_acc = eval_net(args, model, validloader, criterion)
vbar.set_description(
'valid set - average loss: {:.4f}, accuracy: {:.0f}%'
.format(valid_loss, 100. * valid_acc))
"valid set - average loss: {:.4f}, accuracy: {:.0f}%".format(
valid_loss, 100.0 * valid_acc
)
)
if not args.filename == "":
with open(args.filename, 'a') as f:
f.write('%s %s %s %s' % (
args.dataset,
args.learn_eps,
args.neighbor_pooling_type,
args.graph_pooling_type
))
with open(args.filename, "a") as f:
f.write(
"%s %s %s %s"
% (
args.dataset,
args.learn_eps,
args.neighbor_pooling_type,
args.graph_pooling_type,
)
)
f.write("\n")
f.write("%f %f %f %f" % (
train_loss,
train_acc,
valid_loss,
valid_acc
))
f.write(
"%f %f %f %f"
% (train_loss, train_acc, valid_loss, valid_acc)
)
f.write("\n")
lrbar.set_description(
"Learning eps with learn_eps={}: {}".format(
args.learn_eps, [layer.eps.data(args.device).asscalar() for layer in model.ginlayers]))
args.learn_eps,
[
layer.eps.data(args.device).asscalar()
for layer in model.ginlayers
],
)
)
tbar.close()
vbar.close()
lrbar.close()
if __name__ == '__main__':
args = Parser(description='GIN').args
print('show all arguments configuration...')
if __name__ == "__main__":
args = Parser(description="GIN").args
print("show all arguments configuration...")
print(args)
main(args)
......@@ -5,12 +5,11 @@ Put all arguments in one file and group similar arguments
import argparse
class Parser():
class Parser:
def __init__(self, description):
'''
arguments parser
'''
"""
arguments parser
"""
self.parser = argparse.ArgumentParser(description=description)
self.args = None
self._parse()
......@@ -18,69 +17,109 @@ class Parser():
def _parse(self):
# dataset
self.parser.add_argument(
'--dataset', type=str, default="MUTAG",
help='name of dataset (default: MUTAG)')
self.parser.add_argument(
'--batch_size', type=int, default=32,
help='batch size for training and validation (default: 32)')
self.parser.add_argument(
'--fold_idx', type=int, default=0,
help='the index(<10) of fold in 10-fold validation.')
self.parser.add_argument(
'--filename', type=str, default="",
help='output file')
"--dataset",
type=str,
default="MUTAG",
help="name of dataset (default: MUTAG)",
)
self.parser.add_argument(
"--batch_size",
type=int,
default=32,
help="batch size for training and validation (default: 32)",
)
self.parser.add_argument(
"--fold_idx",
type=int,
default=0,
help="the index(<10) of fold in 10-fold validation.",
)
self.parser.add_argument(
"--filename", type=str, default="", help="output file"
)
# device
self.parser.add_argument(
'--disable-cuda', action='store_true',
help='Disable CUDA')
"--disable-cuda", action="store_true", help="Disable CUDA"
)
self.parser.add_argument(
'--device', type=int, default=0,
help='which gpu device to use (default: 0)')
"--device",
type=int,
default=0,
help="which gpu device to use (default: 0)",
)
# net
self.parser.add_argument(
'--net', type=str, default="gin",
help='gnn net (default: gin)')
self.parser.add_argument(
'--num_layers', type=int, default=5,
help='number of layers (default: 5)')
self.parser.add_argument(
'--num_mlp_layers', type=int, default=2,
help='number of MLP layers(default: 2). 1 means linear model.')
self.parser.add_argument(
'--hidden_dim', type=int, default=64,
help='number of hidden units (default: 64)')
"--net", type=str, default="gin", help="gnn net (default: gin)"
)
self.parser.add_argument(
"--num_layers",
type=int,
default=5,
help="number of layers (default: 5)",
)
self.parser.add_argument(
"--num_mlp_layers",
type=int,
default=2,
help="number of MLP layers(default: 2). 1 means linear model.",
)
self.parser.add_argument(
"--hidden_dim",
type=int,
default=64,
help="number of hidden units (default: 64)",
)
# graph
self.parser.add_argument(
'--graph_pooling_type', type=str,
default="sum", choices=["sum", "mean", "max"],
help='type of graph pooling: sum, mean or max')
self.parser.add_argument(
'--neighbor_pooling_type', type=str,
default="sum", choices=["sum", "mean", "max"],
help='type of neighboring pooling: sum, mean or max')
self.parser.add_argument(
'--learn_eps', action="store_true",
help='learn the epsilon weighting')
self.parser.add_argument(
'--degree_as_tag', action="store_true",
help='take the degree of nodes as input feature')
"--graph_pooling_type",
type=str,
default="sum",
choices=["sum", "mean", "max"],
help="type of graph pooling: sum, mean or max",
)
self.parser.add_argument(
"--neighbor_pooling_type",
type=str,
default="sum",
choices=["sum", "mean", "max"],
help="type of neighboring pooling: sum, mean or max",
)
self.parser.add_argument(
"--learn_eps",
action="store_true",
help="learn the epsilon weighting",
)
self.parser.add_argument(
"--degree_as_tag",
action="store_true",
help="take the degree of nodes as input feature",
)
# learning
self.parser.add_argument(
'--seed', type=int, default=0,
help='random seed (default: 0)')
self.parser.add_argument(
'--epochs', type=int, default=350,
help='number of epochs to train (default: 350)')
self.parser.add_argument(
'--lr', type=float, default=0.01,
help='learning rate (default: 0.01)')
self.parser.add_argument(
'--final_dropout', type=float, default=0.5,
help='final layer dropout (default: 0.5)')
"--seed", type=int, default=0, help="random seed (default: 0)"
)
self.parser.add_argument(
"--epochs",
type=int,
default=350,
help="number of epochs to train (default: 350)",
)
self.parser.add_argument(
"--lr",
type=float,
default=0.01,
help="learning rate (default: 0.01)",
)
self.parser.add_argument(
"--final_dropout",
type=float,
default=0.5,
help="final layer dropout (default: 0.5)",
)
# done
self.args = self.parser.parse_args()
\ No newline at end of file
self.args = self.parser.parse_args()
......@@ -6,39 +6,67 @@ Simple reference implementation of GraphSAGE.
"""
import argparse
import time
import numpy as np
import networkx as nx
import mxnet as mx
from mxnet import nd, gluon
import networkx as nx
import numpy as np
from mxnet import gluon, nd
from mxnet.gluon import nn
import dgl
from dgl.data import register_data_args
from dgl.data import CoraGraphDataset, CiteseerGraphDataset, PubmedGraphDataset
from dgl.data import (CiteseerGraphDataset, CoraGraphDataset,
PubmedGraphDataset, register_data_args)
from dgl.nn.mxnet.conv import SAGEConv
class GraphSAGE(nn.Block):
def __init__(self,
g,
in_feats,
n_hidden,
n_classes,
n_layers,
activation,
dropout,
aggregator_type):
def __init__(
self,
g,
in_feats,
n_hidden,
n_classes,
n_layers,
activation,
dropout,
aggregator_type,
):
super(GraphSAGE, self).__init__()
self.g = g
with self.name_scope():
self.layers = nn.Sequential()
# input layer
self.layers.add(SAGEConv(in_feats, n_hidden, aggregator_type, feat_drop=dropout, activation=activation))
self.layers.add(
SAGEConv(
in_feats,
n_hidden,
aggregator_type,
feat_drop=dropout,
activation=activation,
)
)
# hidden layers
for i in range(n_layers - 1):
self.layers.add(SAGEConv(n_hidden, n_hidden, aggregator_type, feat_drop=dropout, activation=activation))
self.layers.add(
SAGEConv(
n_hidden,
n_hidden,
aggregator_type,
feat_drop=dropout,
activation=activation,
)
)
# output layer
self.layers.add(SAGEConv(n_hidden, n_classes, aggregator_type, feat_drop=dropout, activation=None)) # activation None
self.layers.add(
SAGEConv(
n_hidden,
n_classes,
aggregator_type,
feat_drop=dropout,
activation=None,
)
) # activation None
def forward(self, features):
h = features
......@@ -46,21 +74,23 @@ class GraphSAGE(nn.Block):
h = layer(self.g, h)
return h
def evaluate(model, features, labels, mask):
pred = model(features).argmax(axis=1)
accuracy = ((pred == labels) * mask).sum() / mask.sum().asscalar()
return accuracy.asscalar()
def main(args):
# load and preprocess dataset
if args.dataset == 'cora':
if args.dataset == "cora":
data = CoraGraphDataset()
elif args.dataset == 'citeseer':
elif args.dataset == "citeseer":
data = CiteseerGraphDataset()
elif args.dataset == 'pubmed':
elif args.dataset == "pubmed":
data = PubmedGraphDataset()
else:
raise ValueError('Unknown dataset: {}'.format(args.dataset))
raise ValueError("Unknown dataset: {}".format(args.dataset))
g = data[0]
if args.gpu < 0:
......@@ -71,24 +101,29 @@ def main(args):
ctx = mx.gpu(args.gpu)
g = g.int().to(ctx)
features = g.ndata['feat']
labels = mx.nd.array(g.ndata['label'], dtype="float32", ctx=ctx)
train_mask = g.ndata['train_mask']
val_mask = g.ndata['val_mask']
test_mask = g.ndata['test_mask']
features = g.ndata["feat"]
labels = mx.nd.array(g.ndata["label"], dtype="float32", ctx=ctx)
train_mask = g.ndata["train_mask"]
val_mask = g.ndata["val_mask"]
test_mask = g.ndata["test_mask"]
in_feats = features.shape[1]
n_classes = data.num_labels
n_edges = data.graph.number_of_edges()
print("""----Data statistics------'
print(
"""----Data statistics------'
#Edges %d
#Classes %d
#Train samples %d
#Val samples %d
#Test samples %d""" %
(n_edges, n_classes,
train_mask.sum().asscalar(),
val_mask.sum().asscalar(),
test_mask.sum().asscalar()))
#Test samples %d"""
% (
n_edges,
n_classes,
train_mask.sum().asscalar(),
val_mask.sum().asscalar(),
test_mask.sum().asscalar(),
)
)
# add self loop
g = dgl.remove_self_loop(g)
......@@ -96,24 +131,27 @@ def main(args):
n_edges = g.number_of_edges()
# create GraphSAGE model
model = GraphSAGE(g,
in_feats,
args.n_hidden,
n_classes,
args.n_layers,
nd.relu,
args.dropout,
args.aggregator_type
)
model = GraphSAGE(
g,
in_feats,
args.n_hidden,
n_classes,
args.n_layers,
nd.relu,
args.dropout,
args.aggregator_type,
)
model.initialize(ctx=ctx)
n_train_samples = train_mask.sum().asscalar()
loss_fcn = gluon.loss.SoftmaxCELoss()
print(model.collect_params())
trainer = gluon.Trainer(model.collect_params(), 'adam',
{'learning_rate': args.lr, 'wd': args.weight_decay})
trainer = gluon.Trainer(
model.collect_params(),
"adam",
{"learning_rate": args.lr, "wd": args.weight_decay},
)
# initialize graph
dur = []
......@@ -133,34 +171,48 @@ def main(args):
loss.asscalar()
dur.append(time.time() - t0)
acc = evaluate(model, features, labels, val_mask)
print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | "
"ETputs(KTEPS) {:.2f}". format(
epoch, np.mean(dur), loss.asscalar(), acc, n_edges / np.mean(dur) / 1000))
print(
"Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | "
"ETputs(KTEPS) {:.2f}".format(
epoch,
np.mean(dur),
loss.asscalar(),
acc,
n_edges / np.mean(dur) / 1000,
)
)
# test set accuracy
acc = evaluate(model, features, labels, test_mask)
print("Test accuracy {:.2%}".format(acc))
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='GraphSAGE')
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="GraphSAGE")
register_data_args(parser)
parser.add_argument("--dropout", type=float, default=0.5,
help="dropout probability")
parser.add_argument("--gpu", type=int, default=-1,
help="gpu")
parser.add_argument("--lr", type=float, default=1e-2,
help="learning rate")
parser.add_argument("--n-epochs", type=int, default=200,
help="number of training epochs")
parser.add_argument("--n-hidden", type=int, default=16,
help="number of hidden gcn units")
parser.add_argument("--n-layers", type=int, default=1,
help="number of hidden gcn layers")
parser.add_argument("--weight-decay", type=float, default=5e-4,
help="Weight for L2 loss")
parser.add_argument("--aggregator-type", type=str, default="gcn",
help="Aggregator type: mean/gcn/pool/lstm")
parser.add_argument(
"--dropout", type=float, default=0.5, help="dropout probability"
)
parser.add_argument("--gpu", type=int, default=-1, help="gpu")
parser.add_argument("--lr", type=float, default=1e-2, help="learning rate")
parser.add_argument(
"--n-epochs", type=int, default=200, help="number of training epochs"
)
parser.add_argument(
"--n-hidden", type=int, default=16, help="number of hidden gcn units"
)
parser.add_argument(
"--n-layers", type=int, default=1, help="number of hidden gcn layers"
)
parser.add_argument(
"--weight-decay", type=float, default=5e-4, help="Weight for L2 loss"
)
parser.add_argument(
"--aggregator-type",
type=str,
default="gcn",
help="Aggregator type: mean/gcn/pool/lstm",
)
args = parser.parse_args()
print(args)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment