Unverified Commit a9f2acf3 authored by Hongzhi (Steve), Chen's avatar Hongzhi (Steve), Chen Committed by GitHub
Browse files

[Misc] Black auto fix. (#4641)



* [Misc] Black auto fix.

* sort
Co-authored-by: default avatarSteve <ubuntu@ip-172-31-34-29.ap-northeast-1.compute.internal>
parent 08c50eb7
import argparse, time
import numpy as np
import networkx as nx
import argparse
import time
import mxnet as mx
import networkx as nx
import numpy as np
from mxnet import gluon
from tagcn import TAGCN
import dgl
from dgl.data import register_data_args
from dgl.data import CoraGraphDataset, CiteseerGraphDataset, PubmedGraphDataset
from dgl.data import (CiteseerGraphDataset, CoraGraphDataset,
PubmedGraphDataset, register_data_args)
from tagcn import TAGCN
def evaluate(model, features, labels, mask):
pred = model(features).argmax(axis=1)
accuracy = ((pred == labels) * mask).sum() / mask.sum().asscalar()
return accuracy.asscalar()
def main(args):
# load and preprocess dataset
if args.dataset == 'cora':
if args.dataset == "cora":
data = CoraGraphDataset()
elif args.dataset == 'citeseer':
elif args.dataset == "citeseer":
data = CiteseerGraphDataset()
elif args.dataset == 'pubmed':
elif args.dataset == "pubmed":
data = PubmedGraphDataset()
else:
raise ValueError('Unknown dataset: {}'.format(args.dataset))
raise ValueError("Unknown dataset: {}".format(args.dataset))
g = data[0]
if args.gpu < 0:
......@@ -35,37 +38,44 @@ def main(args):
ctx = mx.gpu(args.gpu)
g = g.to(ctx)
features = g.ndata['feat']
labels = mx.nd.array(g.ndata['label'], dtype="float32", ctx=ctx)
train_mask = g.ndata['train_mask']
val_mask = g.ndata['val_mask']
test_mask = g.ndata['test_mask']
features = g.ndata["feat"]
labels = mx.nd.array(g.ndata["label"], dtype="float32", ctx=ctx)
train_mask = g.ndata["train_mask"]
val_mask = g.ndata["val_mask"]
test_mask = g.ndata["test_mask"]
in_feats = features.shape[1]
n_classes = data.num_labels
n_edges = data.graph.number_of_edges()
print("""----Data statistics------'
print(
"""----Data statistics------'
#Edges %d
#Classes %d
#Train samples %d
#Val samples %d
#Test samples %d""" %
(n_edges, n_classes,
#Test samples %d"""
% (
n_edges,
n_classes,
train_mask.sum().asscalar(),
val_mask.sum().asscalar(),
test_mask.sum().asscalar()))
test_mask.sum().asscalar(),
)
)
# add self loop
g = dgl.remove_self_loop(g)
g = dgl.add_self_loop(g)
# create TAGCN model
model = TAGCN(g,
model = TAGCN(
g,
in_feats,
args.n_hidden,
n_classes,
args.n_layers,
mx.nd.relu,
args.dropout)
args.dropout,
)
model.initialize(ctx=ctx)
n_train_samples = train_mask.sum().asscalar()
......@@ -73,8 +83,11 @@ def main(args):
# use optimizer
print(model.collect_params())
trainer = gluon.Trainer(model.collect_params(), 'adam',
{'learning_rate': args.lr, 'wd': args.weight_decay})
trainer = gluon.Trainer(
model.collect_params(),
"adam",
{"learning_rate": args.lr, "wd": args.weight_decay},
)
# initialize graph
dur = []
......@@ -94,33 +107,47 @@ def main(args):
loss.asscalar()
dur.append(time.time() - t0)
acc = evaluate(model, features, labels, val_mask)
print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | "
"ETputs(KTEPS) {:.2f}". format(
epoch, np.mean(dur), loss.asscalar(), acc, n_edges / np.mean(dur) / 1000))
print(
"Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | "
"ETputs(KTEPS) {:.2f}".format(
epoch,
np.mean(dur),
loss.asscalar(),
acc,
n_edges / np.mean(dur) / 1000,
)
)
print()
acc = evaluate(model, features, labels, val_mask)
print("Test accuracy {:.2%}".format(acc))
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='TAGCN')
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="TAGCN")
register_data_args(parser)
parser.add_argument("--dropout", type=float, default=0.5,
help="dropout probability")
parser.add_argument("--gpu", type=int, default=-1,
help="gpu")
parser.add_argument("--lr", type=float, default=1e-2,
help="learning rate")
parser.add_argument("--n-epochs", type=int, default=200,
help="number of training epochs")
parser.add_argument("--n-hidden", type=int, default=16,
help="number of hidden tagcn units")
parser.add_argument("--n-layers", type=int, default=1,
help="number of hidden tagcn layers")
parser.add_argument("--weight-decay", type=float, default=5e-4,
help="Weight for L2 loss")
parser.add_argument("--self-loop", action='store_true',
help="graph self-loop (default=False)")
parser.add_argument(
"--dropout", type=float, default=0.5, help="dropout probability"
)
parser.add_argument("--gpu", type=int, default=-1, help="gpu")
parser.add_argument("--lr", type=float, default=1e-2, help="learning rate")
parser.add_argument(
"--n-epochs", type=int, default=200, help="number of training epochs"
)
parser.add_argument(
"--n-hidden", type=int, default=16, help="number of hidden tagcn units"
)
parser.add_argument(
"--n-layers", type=int, default=1, help="number of hidden tagcn layers"
)
parser.add_argument(
"--weight-decay", type=float, default=5e-4, help="Weight for L2 loss"
)
parser.add_argument(
"--self-loop",
action="store_true",
help="graph self-loop (default=False)",
)
parser.set_defaults(self_loop=False)
args = parser.parse_args()
print(args)
......
import argparse
import collections
import os
import time
import warnings
import zipfile
import os
import collections
os.environ['DGLBACKEND'] = 'mxnet'
os.environ['MXNET_GPU_MEM_POOL_TYPE'] = 'Round'
os.environ["DGLBACKEND"] = "mxnet"
os.environ["MXNET_GPU_MEM_POOL_TYPE"] = "Round"
import numpy as np
import mxnet as mx
import numpy as np
from mxnet import gluon
from tree_lstm import TreeLSTM
import dgl
import dgl.data as data
from tree_lstm import TreeLSTM
SSTBatch = collections.namedtuple(
"SSTBatch", ["graph", "mask", "wordid", "label"]
)
SSTBatch = collections.namedtuple('SSTBatch', ['graph', 'mask', 'wordid', 'label'])
def batcher(ctx):
def batcher_dev(batch):
batch_trees = dgl.batch(batch)
return SSTBatch(graph=batch_trees,
mask=batch_trees.ndata['mask'].as_in_context(ctx),
wordid=batch_trees.ndata['x'].as_in_context(ctx),
label=batch_trees.ndata['y'].as_in_context(ctx))
return SSTBatch(
graph=batch_trees,
mask=batch_trees.ndata["mask"].as_in_context(ctx),
wordid=batch_trees.ndata["x"].as_in_context(ctx),
label=batch_trees.ndata["y"].as_in_context(ctx),
)
return batcher_dev
def prepare_glove():
if not (os.path.exists('glove.840B.300d.txt')
and data.utils.check_sha1('glove.840B.300d.txt',
sha1_hash='294b9f37fa64cce31f9ebb409c266fc379527708')):
zip_path = data.utils.download('http://nlp.stanford.edu/data/glove.840B.300d.zip',
sha1_hash='8084fbacc2dee3b1fd1ca4cc534cbfff3519ed0d')
with zipfile.ZipFile(zip_path, 'r') as zf:
if not (
os.path.exists("glove.840B.300d.txt")
and data.utils.check_sha1(
"glove.840B.300d.txt",
sha1_hash="294b9f37fa64cce31f9ebb409c266fc379527708",
)
):
zip_path = data.utils.download(
"http://nlp.stanford.edu/data/glove.840B.300d.zip",
sha1_hash="8084fbacc2dee3b1fd1ca4cc534cbfff3519ed0d",
)
with zipfile.ZipFile(zip_path, "r") as zf:
zf.extractall()
if not data.utils.check_sha1('glove.840B.300d.txt',
sha1_hash='294b9f37fa64cce31f9ebb409c266fc379527708'):
warnings.warn('The downloaded glove embedding file checksum mismatch. File content '
'may be corrupted.')
if not data.utils.check_sha1(
"glove.840B.300d.txt",
sha1_hash="294b9f37fa64cce31f9ebb409c266fc379527708",
):
warnings.warn(
"The downloaded glove embedding file checksum mismatch. File content "
"may be corrupted."
)
def main(args):
np.random.seed(args.seed)
......@@ -53,7 +70,11 @@ def main(args):
if args.gpu in mx.test_utils.list_gpus():
ctx = mx.gpu(args.gpu)
else:
print('Requested GPU id {} was not found. Defaulting to CPU implementation'.format(args.gpu))
print(
"Requested GPU id {} was not found. Defaulting to CPU implementation".format(
args.gpu
)
)
ctx = mx.cpu()
else:
ctx = mx.cpu()
......@@ -62,45 +83,63 @@ def main(args):
prepare_glove()
trainset = data.SSTDataset()
train_loader = gluon.data.DataLoader(dataset=trainset,
train_loader = gluon.data.DataLoader(
dataset=trainset,
batch_size=args.batch_size,
batchify_fn=batcher(ctx),
shuffle=True,
num_workers=0)
devset = data.SSTDataset(mode='dev')
dev_loader = gluon.data.DataLoader(dataset=devset,
num_workers=0,
)
devset = data.SSTDataset(mode="dev")
dev_loader = gluon.data.DataLoader(
dataset=devset,
batch_size=100,
batchify_fn=batcher(ctx),
shuffle=True,
num_workers=0)
num_workers=0,
)
testset = data.SSTDataset(mode='test')
test_loader = gluon.data.DataLoader(dataset=testset,
testset = data.SSTDataset(mode="test")
test_loader = gluon.data.DataLoader(
dataset=testset,
batch_size=100,
batchify_fn=batcher(ctx),
shuffle=False, num_workers=0)
shuffle=False,
num_workers=0,
)
model = TreeLSTM(trainset.vocab_size,
model = TreeLSTM(
trainset.vocab_size,
args.x_size,
args.h_size,
trainset.num_classes,
args.dropout,
cell_type='childsum' if args.child_sum else 'nary',
pretrained_emb = trainset.pretrained_emb,
ctx=ctx)
cell_type="childsum" if args.child_sum else "nary",
pretrained_emb=trainset.pretrained_emb,
ctx=ctx,
)
print(model)
params_ex_emb =[x for x in model.collect_params().values()
if x.grad_req != 'null' and x.shape[0] != trainset.vocab_size]
params_ex_emb = [
x
for x in model.collect_params().values()
if x.grad_req != "null" and x.shape[0] != trainset.vocab_size
]
params_emb = list(model.embedding.collect_params().values())
for p in params_emb:
p.lr_mult = 0.1
model.initialize(mx.init.Xavier(magnitude=1), ctx=ctx)
model.hybridize()
trainer = gluon.Trainer(model.collect_params('^(?!embedding).*$'), 'adagrad',
{'learning_rate': args.lr, 'wd': args.weight_decay})
trainer_emb = gluon.Trainer(model.collect_params('^embedding.*$'), 'adagrad',
{'learning_rate': args.lr})
trainer = gluon.Trainer(
model.collect_params("^(?!embedding).*$"),
"adagrad",
{"learning_rate": args.lr, "wd": args.weight_decay},
)
trainer_emb = gluon.Trainer(
model.collect_params("^embedding.*$"),
"adagrad",
{"learning_rate": args.lr},
)
dur = []
L = gluon.loss.SoftmaxCrossEntropyLoss(axis=1)
......@@ -129,12 +168,30 @@ def main(args):
if step > 0 and step % args.log_every == 0:
pred = pred.argmax(axis=1).astype(batch.label.dtype)
acc = (batch.label == pred).sum()
root_ids = [i for i in range(batch.graph.number_of_nodes()) if batch.graph.out_degree(i)==0]
root_acc = np.sum(batch.label.asnumpy()[root_ids] == pred.asnumpy()[root_ids])
root_ids = [
i
for i in range(batch.graph.number_of_nodes())
if batch.graph.out_degree(i) == 0
]
root_acc = np.sum(
batch.label.asnumpy()[root_ids] == pred.asnumpy()[root_ids]
)
print("Epoch {:05d} | Step {:05d} | Loss {:.4f} | Acc {:.4f} | Root Acc {:.4f} | Time(s) {:.4f}".format(
epoch, step, loss.sum().asscalar(), 1.0*acc.asscalar()/len(batch.label), 1.0*root_acc/len(root_ids), np.mean(dur)))
print('Epoch {:05d} training time {:.4f}s'.format(epoch, time.time() - t_epoch))
print(
"Epoch {:05d} | Step {:05d} | Loss {:.4f} | Acc {:.4f} | Root Acc {:.4f} | Time(s) {:.4f}".format(
epoch,
step,
loss.sum().asscalar(),
1.0 * acc.asscalar() / len(batch.label),
1.0 * root_acc / len(root_ids),
np.mean(dur),
)
)
print(
"Epoch {:05d} training time {:.4f}s".format(
epoch, time.time() - t_epoch
)
)
# eval on dev set
accs = []
......@@ -148,31 +205,48 @@ def main(args):
acc = (batch.label == pred).sum().asscalar()
accs.append([acc, len(batch.label)])
root_ids = [i for i in range(batch.graph.number_of_nodes()) if batch.graph.out_degree(i)==0]
root_acc = np.sum(batch.label.asnumpy()[root_ids] == pred.asnumpy()[root_ids])
root_ids = [
i
for i in range(batch.graph.number_of_nodes())
if batch.graph.out_degree(i) == 0
]
root_acc = np.sum(
batch.label.asnumpy()[root_ids] == pred.asnumpy()[root_ids]
)
root_accs.append([root_acc, len(root_ids)])
dev_acc = 1.0*np.sum([x[0] for x in accs])/np.sum([x[1] for x in accs])
dev_root_acc = 1.0*np.sum([x[0] for x in root_accs])/np.sum([x[1] for x in root_accs])
print("Epoch {:05d} | Dev Acc {:.4f} | Root Acc {:.4f}".format(
epoch, dev_acc, dev_root_acc))
dev_acc = (
1.0 * np.sum([x[0] for x in accs]) / np.sum([x[1] for x in accs])
)
dev_root_acc = (
1.0
* np.sum([x[0] for x in root_accs])
/ np.sum([x[1] for x in root_accs])
)
print(
"Epoch {:05d} | Dev Acc {:.4f} | Root Acc {:.4f}".format(
epoch, dev_acc, dev_root_acc
)
)
if dev_root_acc > best_dev_acc:
best_dev_acc = dev_root_acc
best_epoch = epoch
model.save_parameters('best_{}.params'.format(args.seed))
model.save_parameters("best_{}.params".format(args.seed))
else:
if best_epoch <= epoch - 10:
break
# lr decay
trainer.set_learning_rate(max(1e-5, trainer.learning_rate*0.99))
trainer.set_learning_rate(max(1e-5, trainer.learning_rate * 0.99))
print(trainer.learning_rate)
trainer_emb.set_learning_rate(max(1e-5, trainer_emb.learning_rate*0.99))
trainer_emb.set_learning_rate(
max(1e-5, trainer_emb.learning_rate * 0.99)
)
print(trainer_emb.learning_rate)
# test
model.load_parameters('best_{}.params'.format(args.seed))
model.load_parameters("best_{}.params".format(args.seed))
accs = []
root_accs = []
for step, batch in enumerate(test_loader):
......@@ -184,30 +258,46 @@ def main(args):
acc = (batch.label == pred).sum().asscalar()
accs.append([acc, len(batch.label)])
root_ids = [i for i in range(batch.graph.number_of_nodes()) if batch.graph.out_degree(i)==0]
root_acc = np.sum(batch.label.asnumpy()[root_ids] == pred.asnumpy()[root_ids])
root_ids = [
i
for i in range(batch.graph.number_of_nodes())
if batch.graph.out_degree(i) == 0
]
root_acc = np.sum(
batch.label.asnumpy()[root_ids] == pred.asnumpy()[root_ids]
)
root_accs.append([root_acc, len(root_ids)])
test_acc = 1.0*np.sum([x[0] for x in accs])/np.sum([x[1] for x in accs])
test_root_acc = 1.0*np.sum([x[0] for x in root_accs])/np.sum([x[1] for x in root_accs])
print('------------------------------------------------------------------------------------')
print("Epoch {:05d} | Test Acc {:.4f} | Root Acc {:.4f}".format(
best_epoch, test_acc, test_root_acc))
test_acc = 1.0 * np.sum([x[0] for x in accs]) / np.sum([x[1] for x in accs])
test_root_acc = (
1.0
* np.sum([x[0] for x in root_accs])
/ np.sum([x[1] for x in root_accs])
)
print(
"------------------------------------------------------------------------------------"
)
print(
"Epoch {:05d} | Test Acc {:.4f} | Root Acc {:.4f}".format(
best_epoch, test_acc, test_root_acc
)
)
if __name__ == '__main__':
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('--gpu', type=int, default=0)
parser.add_argument('--seed', type=int, default=41)
parser.add_argument('--batch-size', type=int, default=256)
parser.add_argument('--child-sum', action='store_true')
parser.add_argument('--x-size', type=int, default=300)
parser.add_argument('--h-size', type=int, default=150)
parser.add_argument('--epochs', type=int, default=100)
parser.add_argument('--log-every', type=int, default=5)
parser.add_argument('--lr', type=float, default=0.05)
parser.add_argument('--weight-decay', type=float, default=1e-4)
parser.add_argument('--dropout', type=float, default=0.5)
parser.add_argument('--use-glove', action='store_true')
parser.add_argument("--gpu", type=int, default=0)
parser.add_argument("--seed", type=int, default=41)
parser.add_argument("--batch-size", type=int, default=256)
parser.add_argument("--child-sum", action="store_true")
parser.add_argument("--x-size", type=int, default=300)
parser.add_argument("--h-size", type=int, default=150)
parser.add_argument("--epochs", type=int, default=100)
parser.add_argument("--log-every", type=int, default=5)
parser.add_argument("--lr", type=float, default=0.05)
parser.add_argument("--weight-decay", type=float, default=1e-4)
parser.add_argument("--dropout", type=float, default=0.5)
parser.add_argument("--use-glove", action="store_true")
args = parser.parse_args()
print(args)
main(args)
......@@ -2,14 +2,17 @@
Improved Semantic Representations From Tree-Structured Long Short-Term Memory Networks
https://arxiv.org/abs/1503.00075
"""
import time
import itertools
import time
import mxnet as mx
import networkx as nx
import numpy as np
import mxnet as mx
from mxnet import gluon
import dgl
class _TreeLSTMCellNodeFunc(gluon.HybridBlock):
def hybrid_forward(self, F, iou, b_iou, c):
iou = F.broadcast_add(iou, b_iou)
......@@ -20,6 +23,7 @@ class _TreeLSTMCellNodeFunc(gluon.HybridBlock):
return h, c
class _TreeLSTMCellReduceFunc(gluon.HybridBlock):
def __init__(self, U_iou, U_f):
super(_TreeLSTMCellReduceFunc, self).__init__()
......@@ -33,34 +37,39 @@ class _TreeLSTMCellReduceFunc(gluon.HybridBlock):
iou = self.U_iou(h_cat)
return iou, c
class _TreeLSTMCell(gluon.HybridBlock):
def __init__(self, h_size):
super(_TreeLSTMCell, self).__init__()
self._apply_node_func = _TreeLSTMCellNodeFunc()
self.b_iou = self.params.get('bias', shape=(1, 3 * h_size),
init='zeros')
self.b_iou = self.params.get(
"bias", shape=(1, 3 * h_size), init="zeros"
)
def message_func(self, edges):
return {'h': edges.src['h'], 'c': edges.src['c']}
return {"h": edges.src["h"], "c": edges.src["c"]}
def apply_node_func(self, nodes):
iou = nodes.data['iou']
b_iou, c = self.b_iou.data(iou.context), nodes.data['c']
iou = nodes.data["iou"]
b_iou, c = self.b_iou.data(iou.context), nodes.data["c"]
h, c = self._apply_node_func(iou, b_iou, c)
return {'h' : h, 'c' : c}
return {"h": h, "c": c}
class TreeLSTMCell(_TreeLSTMCell):
def __init__(self, x_size, h_size):
super(TreeLSTMCell, self).__init__(h_size)
self._reduce_func = _TreeLSTMCellReduceFunc(
gluon.nn.Dense(3 * h_size, use_bias=False),
gluon.nn.Dense(2 * h_size))
gluon.nn.Dense(2 * h_size),
)
self.W_iou = gluon.nn.Dense(3 * h_size, use_bias=False)
def reduce_func(self, nodes):
h, c = nodes.mailbox['h'], nodes.mailbox['c']
h, c = nodes.mailbox["h"], nodes.mailbox["c"]
iou, c = self._reduce_func(h, c)
return {'iou': iou, 'c': c}
return {"iou": iou, "c": c}
class ChildSumTreeLSTMCell(_TreeLSTMCell):
def __init__(self, x_size, h_size):
......@@ -70,31 +79,34 @@ class ChildSumTreeLSTMCell(_TreeLSTMCell):
self.U_f = gluon.nn.Dense(h_size)
def reduce_func(self, nodes):
h_tild = nodes.mailbox['h'].sum(axis=1)
f = self.U_f(nodes.mailbox['h']).sigmoid()
c = (f * nodes.mailbox['c']).sum(axis=1)
return {'iou': self.U_iou(h_tild), 'c': c}
h_tild = nodes.mailbox["h"].sum(axis=1)
f = self.U_f(nodes.mailbox["h"]).sigmoid()
c = (f * nodes.mailbox["c"]).sum(axis=1)
return {"iou": self.U_iou(h_tild), "c": c}
class TreeLSTM(gluon.nn.Block):
def __init__(self,
def __init__(
self,
num_vocabs,
x_size,
h_size,
num_classes,
dropout,
cell_type='nary',
cell_type="nary",
pretrained_emb=None,
ctx=None):
ctx=None,
):
super(TreeLSTM, self).__init__()
self.x_size = x_size
self.embedding = gluon.nn.Embedding(num_vocabs, x_size)
if pretrained_emb is not None:
print('Using glove')
print("Using glove")
self.embedding.initialize(ctx=ctx)
self.embedding.weight.set_data(pretrained_emb)
self.dropout = gluon.nn.Dropout(dropout)
self.linear = gluon.nn.Dense(num_classes)
cell = TreeLSTMCell if cell_type == 'nary' else ChildSumTreeLSTMCell
cell = TreeLSTMCell if cell_type == "nary" else ChildSumTreeLSTMCell
self.cell = cell(x_size, h_size)
self.ctx = ctx
......@@ -118,15 +130,17 @@ class TreeLSTM(gluon.nn.Block):
# feed embedding
embeds = self.embedding(batch.wordid * batch.mask)
wiou = self.cell.W_iou(self.dropout(embeds))
g.ndata['iou'] = wiou * batch.mask.expand_dims(-1).astype(wiou.dtype)
g.ndata['h'] = h
g.ndata['c'] = c
g.ndata["iou"] = wiou * batch.mask.expand_dims(-1).astype(wiou.dtype)
g.ndata["h"] = h
g.ndata["c"] = c
# propagate
dgl.prop_nodes_topo(g,
dgl.prop_nodes_topo(
g,
message_func=self.cell.message_func,
reduce_func=self.cell.reduce_func,
apply_node_func=self.cell.apply_node_func)
apply_node_func=self.cell.apply_node_func,
)
# compute logits
h = self.dropout(g.ndata.pop('h'))
h = self.dropout(g.ndata.pop("h"))
logits = self.linear(h)
return logits
......@@ -6,11 +6,13 @@ Paper: https://arxiv.org/abs/1810.05997
Author's code: https://github.com/klicperajo/ppnp
"""
import torch.nn as nn
from dgl.nn.pytorch.conv import APPNPConv
class APPNP(nn.Module):
def __init__(self,
def __init__(
self,
g,
in_feats,
hiddens,
......@@ -19,7 +21,8 @@ class APPNP(nn.Module):
feat_drop,
edge_drop,
alpha,
k):
k,
):
super(APPNP, self).__init__()
self.g = g
self.layers = nn.ModuleList()
......
import argparse, time
import argparse
import time
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from dgl.data import register_data_args
from dgl.data import CoraGraphDataset, CiteseerGraphDataset, PubmedGraphDataset
import dgl
from appnp import APPNP
import dgl
from dgl.data import (CiteseerGraphDataset, CoraGraphDataset,
PubmedGraphDataset, register_data_args)
def evaluate(model, features, labels, mask):
model.eval()
......@@ -22,14 +25,14 @@ def evaluate(model, features, labels, mask):
def main(args):
# load and preprocess dataset
if args.dataset == 'cora':
if args.dataset == "cora":
data = CoraGraphDataset()
elif args.dataset == 'citeseer':
elif args.dataset == "citeseer":
data = CiteseerGraphDataset()
elif args.dataset == 'pubmed':
elif args.dataset == "pubmed":
data = PubmedGraphDataset()
else:
raise ValueError('Unknown dataset: {}'.format(args.dataset))
raise ValueError("Unknown dataset: {}".format(args.dataset))
g = data[0]
if args.gpu < 0:
......@@ -38,24 +41,29 @@ def main(args):
cuda = True
g = g.to(args.gpu)
features = g.ndata['feat']
labels = g.ndata['label']
train_mask = g.ndata['train_mask']
val_mask = g.ndata['val_mask']
test_mask = g.ndata['test_mask']
features = g.ndata["feat"]
labels = g.ndata["label"]
train_mask = g.ndata["train_mask"]
val_mask = g.ndata["val_mask"]
test_mask = g.ndata["test_mask"]
in_feats = features.shape[1]
n_classes = data.num_labels
n_edges = g.number_of_edges()
print("""----Data statistics------'
print(
"""----Data statistics------'
#Edges %d
#Classes %d
#Train samples %d
#Val samples %d
#Test samples %d""" %
(n_edges, n_classes,
#Test samples %d"""
% (
n_edges,
n_classes,
train_mask.int().sum().item(),
val_mask.int().sum().item(),
test_mask.int().sum().item()))
test_mask.int().sum().item(),
)
)
n_edges = g.number_of_edges()
# add self loop
......@@ -63,7 +71,8 @@ def main(args):
g = dgl.add_self_loop(g)
# create APPNP model
model = APPNP(g,
model = APPNP(
g,
in_feats,
args.hidden_sizes,
n_classes,
......@@ -71,16 +80,17 @@ def main(args):
args.in_drop,
args.edge_drop,
args.alpha,
args.k)
args.k,
)
if cuda:
model.cuda()
loss_fcn = torch.nn.CrossEntropyLoss()
# use optimizer
optimizer = torch.optim.Adam(model.parameters(),
lr=args.lr,
weight_decay=args.weight_decay)
optimizer = torch.optim.Adam(
model.parameters(), lr=args.lr, weight_decay=args.weight_decay
)
# initialize graph
dur = []
......@@ -100,36 +110,52 @@ def main(args):
dur.append(time.time() - t0)
acc = evaluate(model, features, labels, val_mask)
print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | "
"ETputs(KTEPS) {:.2f}".format(epoch, np.mean(dur), loss.item(),
acc, n_edges / np.mean(dur) / 1000))
print(
"Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | "
"ETputs(KTEPS) {:.2f}".format(
epoch,
np.mean(dur),
loss.item(),
acc,
n_edges / np.mean(dur) / 1000,
)
)
print()
acc = evaluate(model, features, labels, test_mask)
print("Test Accuracy {:.4f}".format(acc))
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='APPNP')
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="APPNP")
register_data_args(parser)
parser.add_argument("--in-drop", type=float, default=0.5,
help="input feature dropout")
parser.add_argument("--edge-drop", type=float, default=0.5,
help="edge propagation dropout")
parser.add_argument("--gpu", type=int, default=-1,
help="gpu")
parser.add_argument("--lr", type=float, default=1e-2,
help="learning rate")
parser.add_argument("--n-epochs", type=int, default=200,
help="number of training epochs")
parser.add_argument("--hidden_sizes", type=int, nargs='+', default=[64],
help="hidden unit sizes for appnp")
parser.add_argument("--k", type=int, default=10,
help="Number of propagation steps")
parser.add_argument("--alpha", type=float, default=0.1,
help="Teleport Probability")
parser.add_argument("--weight-decay", type=float, default=5e-4,
help="Weight for L2 loss")
parser.add_argument(
"--in-drop", type=float, default=0.5, help="input feature dropout"
)
parser.add_argument(
"--edge-drop", type=float, default=0.5, help="edge propagation dropout"
)
parser.add_argument("--gpu", type=int, default=-1, help="gpu")
parser.add_argument("--lr", type=float, default=1e-2, help="learning rate")
parser.add_argument(
"--n-epochs", type=int, default=200, help="number of training epochs"
)
parser.add_argument(
"--hidden_sizes",
type=int,
nargs="+",
default=[64],
help="hidden unit sizes for appnp",
)
parser.add_argument(
"--k", type=int, default=10, help="Number of propagation steps"
)
parser.add_argument(
"--alpha", type=float, default=0.1, help="Teleport Probability"
)
parser.add_argument(
"--weight-decay", type=float, default=5e-4, help="Weight for L2 loss"
)
args = parser.parse_args()
print(args)
......
......@@ -2,46 +2,52 @@
import argparse
import copy
import numpy as np
import torch
import torch.optim as optim
import torch.nn as nn
import numpy as np
from dgl.data import CoraGraphDataset, CiteseerGraphDataset, PubmedGraphDataset
from tqdm import trange
import torch.optim as optim
from model import ARMA4NC
from tqdm import trange
from dgl.data import CiteseerGraphDataset, CoraGraphDataset, PubmedGraphDataset
def main(args):
# Step 1: Prepare graph data and retrieve train/validation/test index ============================= #
# Load from DGL dataset
if args.dataset == 'Cora':
if args.dataset == "Cora":
dataset = CoraGraphDataset()
elif args.dataset == 'Citeseer':
elif args.dataset == "Citeseer":
dataset = CiteseerGraphDataset()
elif args.dataset == 'Pubmed':
elif args.dataset == "Pubmed":
dataset = PubmedGraphDataset()
else:
raise ValueError('Dataset {} is invalid.'.format(args.dataset))
raise ValueError("Dataset {} is invalid.".format(args.dataset))
graph = dataset[0]
# check cuda
device = f'cuda:{args.gpu}' if args.gpu >= 0 and torch.cuda.is_available() else 'cpu'
device = (
f"cuda:{args.gpu}"
if args.gpu >= 0 and torch.cuda.is_available()
else "cpu"
)
# retrieve the number of classes
n_classes = dataset.num_classes
# retrieve labels of ground truth
labels = graph.ndata.pop('label').to(device).long()
labels = graph.ndata.pop("label").to(device).long()
# Extract node features
feats = graph.ndata.pop('feat').to(device)
feats = graph.ndata.pop("feat").to(device)
n_features = feats.shape[-1]
# retrieve masks for train/validation/test
train_mask = graph.ndata.pop('train_mask')
val_mask = graph.ndata.pop('val_mask')
test_mask = graph.ndata.pop('test_mask')
train_mask = graph.ndata.pop("train_mask")
val_mask = graph.ndata.pop("val_mask")
test_mask = graph.ndata.pop("test_mask")
train_idx = torch.nonzero(train_mask, as_tuple=False).squeeze().to(device)
val_idx = torch.nonzero(val_mask, as_tuple=False).squeeze().to(device)
......@@ -50,13 +56,15 @@ def main(args):
graph = graph.to(device)
# Step 2: Create model =================================================================== #
model = ARMA4NC(in_dim=n_features,
model = ARMA4NC(
in_dim=n_features,
hid_dim=args.hid_dim,
out_dim=n_classes,
num_stacks=args.num_stacks,
num_layers=args.num_layers,
activation=nn.ReLU(),
dropout=args.dropout).to(device)
dropout=args.dropout,
).to(device)
best_model = copy.deepcopy(model)
......@@ -67,7 +75,7 @@ def main(args):
# Step 4: training epoches =============================================================== #
acc = 0
no_improvement = 0
epochs = trange(args.epochs, desc='Accuracy & Loss')
epochs = trange(args.epochs, desc="Accuracy & Loss")
for _ in epochs:
# Training using a full graph
......@@ -77,7 +85,9 @@ def main(args):
# compute loss
train_loss = loss_fn(logits[train_idx], labels[train_idx])
train_acc = torch.sum(logits[train_idx].argmax(dim=1) == labels[train_idx]).item() / len(train_idx)
train_acc = torch.sum(
logits[train_idx].argmax(dim=1) == labels[train_idx]
).item() / len(train_idx)
# backward
opt.zero_grad()
......@@ -89,16 +99,21 @@ def main(args):
with torch.no_grad():
valid_loss = loss_fn(logits[val_idx], labels[val_idx])
valid_acc = torch.sum(logits[val_idx].argmax(dim=1) == labels[val_idx]).item() / len(val_idx)
valid_acc = torch.sum(
logits[val_idx].argmax(dim=1) == labels[val_idx]
).item() / len(val_idx)
# Print out performance
epochs.set_description('Train Acc {:.4f} | Train Loss {:.4f} | Val Acc {:.4f} | Val loss {:.4f}'.format(
train_acc, train_loss.item(), valid_acc, valid_loss.item()))
epochs.set_description(
"Train Acc {:.4f} | Train Loss {:.4f} | Val Acc {:.4f} | Val loss {:.4f}".format(
train_acc, train_loss.item(), valid_acc, valid_loss.item()
)
)
if valid_acc < acc:
no_improvement += 1
if no_improvement == args.early_stopping:
print('Early stop.')
print("Early stop.")
break
else:
no_improvement = 0
......@@ -107,31 +122,56 @@ def main(args):
best_model.eval()
logits = best_model(graph, feats)
test_acc = torch.sum(logits[test_idx].argmax(dim=1) == labels[test_idx]).item() / len(test_idx)
test_acc = torch.sum(
logits[test_idx].argmax(dim=1) == labels[test_idx]
).item() / len(test_idx)
print("Test Acc {:.4f}".format(test_acc))
return test_acc
if __name__ == "__main__":
"""
ARMA Model Hyperparameters
"""
parser = argparse.ArgumentParser(description='ARMA GCN')
parser = argparse.ArgumentParser(description="ARMA GCN")
# data source params
parser.add_argument('--dataset', type=str, default='Cora', help='Name of dataset.')
parser.add_argument(
"--dataset", type=str, default="Cora", help="Name of dataset."
)
# cuda params
parser.add_argument('--gpu', type=int, default=-1, help='GPU index. Default: -1, using CPU.')
parser.add_argument(
"--gpu", type=int, default=-1, help="GPU index. Default: -1, using CPU."
)
# training params
parser.add_argument('--epochs', type=int, default=2000, help='Training epochs.')
parser.add_argument('--early-stopping', type=int, default=100, help='Patient epochs to wait before early stopping.')
parser.add_argument('--lr', type=float, default=0.01, help='Learning rate.')
parser.add_argument('--lamb', type=float, default=5e-4, help='L2 reg.')
parser.add_argument(
"--epochs", type=int, default=2000, help="Training epochs."
)
parser.add_argument(
"--early-stopping",
type=int,
default=100,
help="Patient epochs to wait before early stopping.",
)
parser.add_argument("--lr", type=float, default=0.01, help="Learning rate.")
parser.add_argument("--lamb", type=float, default=5e-4, help="L2 reg.")
# model params
parser.add_argument("--hid-dim", type=int, default=16, help='Hidden layer dimensionalities.')
parser.add_argument("--num-stacks", type=int, default=2, help='Number of K.')
parser.add_argument("--num-layers", type=int, default=1, help='Number of T.')
parser.add_argument("--dropout", type=float, default=0.75, help='Dropout applied at all layers.')
parser.add_argument(
"--hid-dim", type=int, default=16, help="Hidden layer dimensionalities."
)
parser.add_argument(
"--num-stacks", type=int, default=2, help="Number of K."
)
parser.add_argument(
"--num-layers", type=int, default=1, help="Number of T."
)
parser.add_argument(
"--dropout",
type=float,
default=0.75,
help="Dropout applied at all layers.",
)
args = parser.parse_args()
print(args)
......@@ -143,6 +183,6 @@ if __name__ == "__main__":
mean = np.around(np.mean(acc_lists, axis=0), decimals=3)
std = np.around(np.std(acc_lists, axis=0), decimals=3)
print('Total acc: ', acc_lists)
print('mean', mean)
print('std', std)
\ No newline at end of file
print("Total acc: ", acc_lists)
print("mean", mean)
print("std", std)
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
import dgl.function as fn
import math
def glorot(tensor):
if tensor is not None:
stdv = math.sqrt(6.0 / (tensor.size(-2) + tensor.size(-1)))
tensor.data.uniform_(-stdv, stdv)
def zeros(tensor):
if tensor is not None:
tensor.data.fill_(0)
class ARMAConv(nn.Module):
def __init__(self,
def __init__(
self,
in_dim,
out_dim,
num_stacks,
num_layers,
activation=None,
dropout=0.0,
bias=True):
bias=True,
):
super(ARMAConv, self).__init__()
self.in_dim = in_dim
......@@ -32,22 +39,33 @@ class ARMAConv(nn.Module):
self.dropout = nn.Dropout(p=dropout)
# init weight
self.w_0 = nn.ModuleDict({
str(k): nn.Linear(in_dim, out_dim, bias=False) for k in range(self.K)
})
self.w_0 = nn.ModuleDict(
{
str(k): nn.Linear(in_dim, out_dim, bias=False)
for k in range(self.K)
}
)
# deeper weight
self.w = nn.ModuleDict({
str(k): nn.Linear(out_dim, out_dim, bias=False) for k in range(self.K)
})
self.w = nn.ModuleDict(
{
str(k): nn.Linear(out_dim, out_dim, bias=False)
for k in range(self.K)
}
)
# v
self.v = nn.ModuleDict({
str(k): nn.Linear(in_dim, out_dim, bias=False) for k in range(self.K)
})
self.v = nn.ModuleDict(
{
str(k): nn.Linear(in_dim, out_dim, bias=False)
for k in range(self.K)
}
)
# bias
if bias:
self.bias = nn.Parameter(torch.Tensor(self.K, self.T, 1, self.out_dim))
self.bias = nn.Parameter(
torch.Tensor(self.K, self.T, 1, self.out_dim)
)
else:
self.register_parameter('bias', None)
self.register_parameter("bias", None)
self.reset_parameters()
......@@ -70,9 +88,9 @@ class ARMAConv(nn.Module):
feats = init_feats
for t in range(self.T):
feats = feats * norm
g.ndata['h'] = feats
g.update_all(fn.copy_u('h', 'm'), fn.sum('m', 'h'))
feats = g.ndata.pop('h')
g.ndata["h"] = feats
g.update_all(fn.copy_u("h", "m"), fn.sum("m", "h"))
feats = g.ndata.pop("h")
feats = feats * norm
if t == 0:
......@@ -92,30 +110,37 @@ class ARMAConv(nn.Module):
return torch.stack(output).mean(dim=0)
class ARMA4NC(nn.Module):
def __init__(self,
def __init__(
self,
in_dim,
hid_dim,
out_dim,
num_stacks,
num_layers,
activation=None,
dropout=0.0):
dropout=0.0,
):
super(ARMA4NC, self).__init__()
self.conv1 = ARMAConv(in_dim=in_dim,
self.conv1 = ARMAConv(
in_dim=in_dim,
out_dim=hid_dim,
num_stacks=num_stacks,
num_layers=num_layers,
activation=activation,
dropout=dropout)
dropout=dropout,
)
self.conv2 = ARMAConv(in_dim=hid_dim,
self.conv2 = ARMAConv(
in_dim=hid_dim,
out_dim=out_dim,
num_stacks=num_stacks,
num_layers=num_layers,
activation=activation,
dropout=dropout)
dropout=dropout,
)
self.dropout = nn.Dropout(p=dropout)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment