Commit cffa4034 authored by Ziyue Huang's avatar Ziyue Huang Committed by Minjie Wang
Browse files

[Model] fix GCN (#305)

* mxnet gcn spmv

* update readme

* fix gcn

* pytorch gcn

* update readme
parent e2926544
...@@ -6,10 +6,13 @@ Author's code repo: [https://github.com/tkipf/gcn](https://github.com/tkipf/gcn) ...@@ -6,10 +6,13 @@ Author's code repo: [https://github.com/tkipf/gcn](https://github.com/tkipf/gcn)
Codes Codes
----- -----
The folder contains two implementations of GCN. `gcn_batch.py` uses user-defined The folder contains two implementations of GCN. `gcn.py` uses user-defined
message and reduce functions. `gcn_spmv.py` uses DGL's builtin functions so message and reduce functions. `gcn_spmv.py` uses DGL's builtin functions so
SPMV optimization could be applied. SPMV optimization could be applied.
The provided implementation in `gcn_concat.py` is a bit different from the
original paper for better performance, credit to @yifeim and @ZiyueHuang.
Results Results
------- -------
These results are based on single-run training to minimize the cross-entropy These results are based on single-run training to minimize the cross-entropy
......
"""
Semi-Supervised Classification with Graph Convolutional Networks
Paper: https://arxiv.org/abs/1609.02907
Code: https://github.com/tkipf/gcn
GCN with SPMV optimization
"""
import argparse, time, math
import numpy as np
import mxnet as mx
from mxnet import gluon
import dgl
from dgl import DGLGraph
from dgl.data import register_data_args, load_data
def gcn_msg(edge):
msg = edge.src['h'] * edge.src['norm']
return {'m': msg}
def gcn_reduce(node):
accum = mx.nd.sum(node.mailbox['m'], 1) * node.data['norm']
return {'h': accum}
class NodeUpdate(gluon.Block):
def __init__(self, out_feats, activation=None, bias=True):
super(NodeUpdate, self).__init__()
with self.name_scope():
if bias:
self.bias = self.params.get('bias', shape=(out_feats,),
init=mx.init.Zero())
else:
self.bias = None
self.activation = activation
def forward(self, node):
h = node.data['h']
if self.bias is not None:
h = h + self.bias.data(h.context)
if self.activation:
h = self.activation(h)
return {'h': h}
class GCNLayer(gluon.Block):
def __init__(self,
g,
in_feats,
out_feats,
activation,
dropout,
bias=True):
super(GCNLayer, self).__init__()
self.g = g
self.dropout = dropout
with self.name_scope():
self.weight = self.params.get('weight', shape=(in_feats, out_feats),
init=mx.init.Xavier())
self.node_update = NodeUpdate(out_feats, activation, bias)
def forward(self, h):
if self.dropout:
h = mx.nd.Dropout(h, p=self.dropout)
h = mx.nd.dot(h, self.weight.data(h.context))
self.g.ndata['h'] = h
self.g.update_all(gcn_msg, gcn_reduce, self.node_update)
h = self.g.ndata.pop('h')
return h
class GCN(gluon.Block):
def __init__(self,
g,
in_feats,
n_hidden,
n_classes,
n_layers,
activation,
dropout,
normalization):
super(GCN, self).__init__()
self.layers = gluon.nn.Sequential()
# input layer
self.layers.add(GCNLayer(g, in_feats, n_hidden, activation, 0))
# hidden layers
for i in range(n_layers - 1):
self.layers.add(GCNLayer(g, n_hidden, n_hidden, activation, dropout))
# output layer
self.layers.add(GCNLayer(g, n_hidden, n_classes, None, dropout))
def forward(self, features):
h = features
for layer in self.layers:
h = layer(h)
return h
def evaluate(model, features, labels, mask):
pred = model(features).argmax(axis=1)
accuracy = ((pred == labels) * mask).sum() / mask.sum().asscalar()
return accuracy.asscalar()
def main(args):
# load and preprocess dataset
data = load_data(args)
if args.self_loop:
data.graph.add_edges_from([(i,i) for i in range(len(data.graph))])
features = mx.nd.array(data.features)
labels = mx.nd.array(data.labels)
train_mask = mx.nd.array(data.train_mask)
val_mask = mx.nd.array(data.val_mask)
test_mask = mx.nd.array(data.test_mask)
in_feats = features.shape[1]
n_classes = data.num_labels
n_edges = data.graph.number_of_edges()
print("""----Data statistics------'
#Edges %d
#Classes %d
#Train samples %d
#Val samples %d
#Test samples %d""" %
(n_edges, n_classes,
train_mask.sum().asscalar(),
val_mask.sum().asscalar(),
test_mask.sum().asscalar()))
if args.gpu < 0:
cuda = False
ctx = mx.cpu(0)
else:
cuda = True
ctx = mx.gpu(args.gpu)
features = features.as_in_context(ctx)
labels = labels.as_in_context(ctx)
train_mask = train_mask.as_in_context(ctx)
val_mask = val_mask.as_in_context(ctx)
test_mask = test_mask.as_in_context(ctx)
# create GCN model
g = DGLGraph(data.graph)
# normalization
degs = g.in_degrees().astype('float32')
norm = mx.nd.power(degs, -0.5)
if cuda:
norm = norm.as_in_context(ctx)
g.ndata['norm'] = mx.nd.expand_dims(norm, 1)
model = GCN(g,
in_feats,
args.n_hidden,
n_classes,
args.n_layers,
mx.nd.relu,
args.dropout,
args.normalization)
model.initialize(ctx=ctx)
n_train_samples = train_mask.sum().asscalar()
loss_fcn = gluon.loss.SoftmaxCELoss()
# use optimizer
print(model.collect_params())
trainer = gluon.Trainer(model.collect_params(), 'adam',
{'learning_rate': args.lr, 'wd': args.weight_decay})
# initialize graph
dur = []
for epoch in range(args.n_epochs):
if epoch >= 3:
t0 = time.time()
# forward
with mx.autograd.record():
pred = model(features)
loss = loss_fcn(pred, labels, mx.nd.expand_dims(train_mask, 1))
loss = loss.sum() / n_train_samples
loss.backward()
trainer.step(batch_size=1)
if epoch >= 3:
dur.append(time.time() - t0)
acc = evaluate(model, features, labels, val_mask)
print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | "
"ETputs(KTEPS) {:.2f}". format(
epoch, np.mean(dur), loss.asscalar(), acc, n_edges / np.mean(dur) / 1000))
# test set accuracy
acc = evaluate(model, features, labels, test_mask)
print("Test accuracy {:.2%}".format(acc))
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='GCN')
register_data_args(parser)
parser.add_argument("--dropout", type=float, default=0.5,
help="dropout probability")
parser.add_argument("--gpu", type=int, default=-1,
help="gpu")
parser.add_argument("--lr", type=float, default=1e-2,
help="learning rate")
parser.add_argument("--n-epochs", type=int, default=200,
help="number of training epochs")
parser.add_argument("--n-hidden", type=int, default=16,
help="number of hidden gcn units")
parser.add_argument("--n-layers", type=int, default=1,
help="number of hidden gcn layers")
parser.add_argument("--normalization",
choices=['sym','left'], default=None,
help="graph normalization types (default=None)")
parser.add_argument("--self-loop", action='store_true',
help="graph self-loop (default=False)")
parser.add_argument("--weight-decay", type=float, default=5e-4,
help="Weight for L2 loss")
args = parser.parse_args()
print(args)
main(args)
...@@ -2,7 +2,6 @@ ...@@ -2,7 +2,6 @@
Semi-Supervised Classification with Graph Convolutional Networks Semi-Supervised Classification with Graph Convolutional Networks
Paper: https://arxiv.org/abs/1609.02907 Paper: https://arxiv.org/abs/1609.02907
Code: https://github.com/tkipf/gcn Code: https://github.com/tkipf/gcn
GCN with batch processing GCN with batch processing
""" """
import argparse import argparse
...@@ -11,79 +10,67 @@ import time ...@@ -11,79 +10,67 @@ import time
import mxnet as mx import mxnet as mx
from mxnet import gluon from mxnet import gluon
import dgl import dgl
import dgl.function as fn
from dgl import DGLGraph from dgl import DGLGraph
from dgl.data import register_data_args, load_data from dgl.data import register_data_args, load_data
from functools import partial
def gcn_msg(edge, normalization=None): class GCNLayer(gluon.Block):
# print('h', edge.src['h'].shape, edge.src['out_degree']) def __init__(self,
msg = edge.src['h'] g,
if normalization == 'sym': out_feats,
msg = msg / edge.src['out_degree'].sqrt().reshape((-1,1)) activation,
return {'m': msg} dropout):
super(GCNLayer, self).__init__()
self.g = g
def gcn_reduce(node, normalization=None): self.dense = gluon.nn.Dense(out_feats, activation)
# print('m', node.mailbox['m'].shape, node.data['in_degree'])
accum = mx.nd.sum(node.mailbox['m'], 1)
if normalization == 'sym':
accum = accum / node.data['in_degree'].sqrt().reshape((-1,1))
elif normalization == 'left':
accum = accum / node.data['in_degree'].reshape((-1,1))
return {'accum': accum}
class NodeUpdateModule(gluon.Block):
def __init__(self, out_feats, activation=None, dropout=0):
super(NodeUpdateModule, self).__init__()
self.linear = gluon.nn.Dense(out_feats, activation=activation)
self.dropout = dropout self.dropout = dropout
def forward(self, node): def forward(self, h):
accum = self.linear(node.data['accum']) self.g.ndata['h'] = h * self.g.ndata['out_norm']
self.g.update_all(fn.copy_src(src='h', out='m'),
fn.sum(msg='m', out='accum'))
accum = self.g.ndata.pop('accum')
accum = self.dense(accum * self.g.ndata['in_norm'])
if self.dropout: if self.dropout:
accum = mx.nd.Dropout(accum, p=self.dropout) accum = mx.nd.Dropout(accum, p=self.dropout)
return {'h': mx.nd.concat(node.data['h'], accum, dim=1)} h = self.g.ndata.pop('h')
h = mx.nd.concat(h / self.g.ndata['out_norm'], accum, dim=1)
return h
class GCN(gluon.Block): class GCN(gluon.Block):
def __init__(self, def __init__(self,
g, g,
in_feats,
n_hidden, n_hidden,
n_classes, n_classes,
n_layers, n_layers,
activation, activation,
dropout, dropout):
normalization,
):
super(GCN, self).__init__() super(GCN, self).__init__()
self.g = g
self.dropout = dropout
self.inp_layer = gluon.nn.Dense(n_hidden, activation) self.inp_layer = gluon.nn.Dense(n_hidden, activation)
self.dropout = dropout
self.conv_layers = gluon.nn.Sequential() self.layers = gluon.nn.Sequential()
for i in range(n_layers): for i in range(n_layers):
self.conv_layers.add(NodeUpdateModule(n_hidden, activation, dropout)) self.layers.add(GCNLayer(g, n_hidden, activation, dropout))
self.out_layer = gluon.nn.Dense(n_classes) self.out_layer = gluon.nn.Dense(n_classes)
self.gcn_msg = partial(gcn_msg, normalization=normalization)
self.gcn_reduce = partial(gcn_reduce, normalization=normalization)
def forward(self, features): def forward(self, features):
emb_inp = [features, self.inp_layer(features)] emb_inp = [features, self.inp_layer(features)]
if self.dropout: if self.dropout:
emb_inp[-1] = mx.nd.Dropout(emb_inp[-1], p=self.dropout) emb_inp[-1] = mx.nd.Dropout(emb_inp[-1], p=self.dropout)
h = mx.nd.concat(*emb_inp, dim=1)
for layer in self.layers:
h = layer(h)
h = self.out_layer(h)
return h
self.g.ndata['h'] = mx.nd.concat(*emb_inp, dim=1)
for layer in self.conv_layers:
self.g.update_all(self.gcn_msg, self.gcn_reduce, layer)
emb_out = self.g.ndata.pop('h') def evaluate(model, features, labels, mask):
return self.out_layer(emb_out) pred = model(features).argmax(axis=1)
accuracy = ((pred == labels) * mask).sum() / mask.sum().asscalar()
return accuracy.asscalar()
def main(args): def main(args):
...@@ -95,47 +82,64 @@ def main(args): ...@@ -95,47 +82,64 @@ def main(args):
features = mx.nd.array(data.features) features = mx.nd.array(data.features)
labels = mx.nd.array(data.labels) labels = mx.nd.array(data.labels)
mask = mx.nd.array(data.train_mask) train_mask = mx.nd.array(data.train_mask)
in_degree = mx.nd.array([data.graph.in_degree(i) val_mask = mx.nd.array(data.val_mask)
for i in range(len(data.graph))]) test_mask = mx.nd.array(data.test_mask)
out_degree = mx.nd.array([data.graph.out_degree(i)
for i in range(len(data.graph))])
in_feats = features.shape[1] in_feats = features.shape[1]
n_classes = data.num_labels n_classes = data.num_labels
n_edges = data.graph.number_of_edges() n_edges = data.graph.number_of_edges()
print("""----Data statistics------'
if args.gpu <= 0: #Edges %d
#Classes %d
#Train samples %d
#Val samples %d
#Test samples %d""" %
(n_edges, n_classes,
train_mask.sum().asscalar(),
val_mask.sum().asscalar(),
test_mask.sum().asscalar()))
if args.gpu < 0:
cuda = False cuda = False
ctx = mx.cpu(0) ctx = mx.cpu(0)
else: else:
cuda = True cuda = True
features = features.as_in_context(mx.gpu(0)) ctx = mx.gpu(args.gpu)
labels = labels.as_in_context(mx.gpu(0))
mask = mask.as_in_context(mx.gpu(0)) features = features.as_in_context(ctx)
in_degree = in_degree.as_in_context(mx.gpu(0)) labels = labels.as_in_context(ctx)
out_degree = out_degree.as_in_context(mx.gpu(0)) train_mask = train_mask.as_in_context(ctx)
ctx = mx.gpu(0) val_mask = val_mask.as_in_context(ctx)
test_mask = test_mask.as_in_context(ctx)
# create GCN model # create GCN model
g = DGLGraph(data.graph) g = DGLGraph(data.graph)
g.ndata['in_degree'] = in_degree # normalization
g.ndata['out_degree'] = out_degree in_degs = g.in_degrees().astype('float32')
out_degs = g.out_degrees().astype('float32')
in_norm = mx.nd.power(in_degs, -0.5)
out_norm = mx.nd.power(out_degs, -0.5)
if cuda:
in_norm = in_norm.as_in_context(ctx)
out_norm = out_norm.as_in_context(ctx)
g.ndata['in_norm'] = mx.nd.expand_dims(in_norm, 1)
g.ndata['out_norm'] = mx.nd.expand_dims(out_norm, 1)
model = GCN(g, model = GCN(g,
in_feats,
args.n_hidden, args.n_hidden,
n_classes, n_classes,
args.n_layers, args.n_layers,
'relu', 'relu',
args.dropout, args.dropout,
args.normalization,
) )
model.initialize(ctx=ctx) model.initialize(ctx=ctx)
n_train_samples = train_mask.sum().asscalar()
loss_fcn = gluon.loss.SoftmaxCELoss() loss_fcn = gluon.loss.SoftmaxCELoss()
# use optimizer # use optimizer
trainer = gluon.Trainer(model.collect_params(), 'adam', {'learning_rate': args.lr}) print(model.collect_params())
trainer = gluon.Trainer(model.collect_params(), 'adam',
{'learning_rate': args.lr, 'wd': args.weight_decay})
# initialize graph # initialize graph
dur = [] dur = []
...@@ -145,23 +149,22 @@ def main(args): ...@@ -145,23 +149,22 @@ def main(args):
# forward # forward
with mx.autograd.record(): with mx.autograd.record():
pred = model(features) pred = model(features)
loss = loss_fcn(pred, labels, mask) loss = loss_fcn(pred, labels, mx.nd.expand_dims(train_mask, 1))
loss = loss.sum() / n_train_samples
#optimizer.zero_grad()
loss.backward() loss.backward()
trainer.step(features.shape[0]) trainer.step(batch_size=1)
if epoch >= 3: if epoch >= 3:
dur.append(time.time() - t0) dur.append(time.time() - t0)
print("Epoch {:05d} | Loss {:.4f} | Time(s) {:.4f} | ETputs(KTEPS) {:.2f}".format( acc = evaluate(model, features, labels, val_mask)
epoch, loss.asnumpy()[0], np.mean(dur), n_edges / np.mean(dur) / 1000)) print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | "
"ETputs(KTEPS) {:.2f}". format(
epoch, np.mean(dur), loss.asscalar(), acc, n_edges / np.mean(dur) / 1000))
# test set accuracy # test set accuracy
pred = model(features) acc = evaluate(model, features, labels, test_mask)
accuracy = (pred*100).softmax().pick(labels).mean() print("Test accuracy {:.2%}".format(acc))
print("Final accuracy {:.2%}".format(accuracy.mean().asscalar()))
return accuracy.mean().asscalar()
if __name__ == '__main__': if __name__ == '__main__':
parser = argparse.ArgumentParser(description='GCN') parser = argparse.ArgumentParser(description='GCN')
...@@ -170,19 +173,21 @@ if __name__ == '__main__': ...@@ -170,19 +173,21 @@ if __name__ == '__main__':
help="dropout probability") help="dropout probability")
parser.add_argument("--gpu", type=int, default=-1, parser.add_argument("--gpu", type=int, default=-1,
help="gpu") help="gpu")
parser.add_argument("--lr", type=float, default=1e-3, parser.add_argument("--lr", type=float, default=1e-2,
help="learning rate") help="learning rate")
parser.add_argument("--n-epochs", type=int, default=20, parser.add_argument("--n-epochs", type=int, default=200,
help="number of training epochs") help="number of training epochs")
parser.add_argument("--n-hidden", type=int, default=16, parser.add_argument("--n-hidden", type=int, default=16,
help="number of hidden gcn units") help="number of hidden gcn units")
parser.add_argument("--n-layers", type=int, default=2, parser.add_argument("--n-layers", type=int, default=1,
help="number of hidden gcn layers") help="number of hidden gcn layers")
parser.add_argument("--normalization", parser.add_argument("--normalization",
choices=['sym','left'], default=None, choices=['sym','left'], default=None,
help="graph normalization types (default=None)") help="graph normalization types (default=None)")
parser.add_argument("--self-loop", action='store_true', parser.add_argument("--self-loop", action='store_true',
help="graph self-loop (default=False)") help="graph self-loop (default=False)")
parser.add_argument("--weight-decay", type=float, default=5e-4,
help="Weight for L2 loss")
args = parser.parse_args() args = parser.parse_args()
print(args) print(args)
......
...@@ -2,78 +2,105 @@ ...@@ -2,78 +2,105 @@
Semi-Supervised Classification with Graph Convolutional Networks Semi-Supervised Classification with Graph Convolutional Networks
Paper: https://arxiv.org/abs/1609.02907 Paper: https://arxiv.org/abs/1609.02907
Code: https://github.com/tkipf/gcn Code: https://github.com/tkipf/gcn
GCN with SPMV specialization.
GCN with batch processing
""" """
import argparse import argparse, time, math
import numpy as np import numpy as np
import time
import torch import torch
import torch.nn as nn import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
from dgl import DGLGraph from dgl import DGLGraph
from dgl.data import register_data_args, load_data from dgl.data import register_data_args, load_data
def gcn_msg(edges):
return {'m' : edges.src['h']}
def gcn_reduce(nodes): def gcn_msg(edge):
return {'h' : torch.sum(nodes.mailbox['m'], 1)} msg = edge.src['h'] * edge.src['norm']
return {'m': msg}
def gcn_reduce(node):
accum = torch.sum(node.mailbox['m'], 1) * node.data['norm']
return {'h': accum}
class NodeApplyModule(nn.Module): class NodeApplyModule(nn.Module):
def __init__(self, in_feats, out_feats, activation=None): def __init__(self, out_feats, activation=None, bias=True):
super(NodeApplyModule, self).__init__() super(NodeApplyModule, self).__init__()
self.linear = nn.Linear(in_feats, out_feats) if bias:
self.bias = nn.Parameter(torch.Tensor(out_feats))
else:
self.bias = None
self.activation = activation self.activation = activation
self.reset_parameters()
def reset_parameters(self):
if self.bias is not None:
stdv = 1. / math.sqrt(self.bias.size(0))
self.bias.data.uniform_(-stdv, stdv)
def forward(self, nodes): def forward(self, nodes):
# normalization by square root of dst degree h = nodes.data['h']
h = nodes.data['h'] * nodes.data['norm'] if self.bias is not None:
h = self.linear(h) h = h + self.bias
if self.activation: if self.activation:
h = self.activation(h) h = self.activation(h)
return {'h' : h} return {'h': h}
class GCN(nn.Module):
class GCNLayer(nn.Module):
def __init__(self, def __init__(self,
g, g,
in_feats, in_feats,
n_hidden, out_feats,
n_classes,
n_layers,
activation, activation,
dropout): dropout,
super(GCN, self).__init__() bias=True):
super(GCNLayer, self).__init__()
self.g = g self.g = g
self.weight = nn.Parameter(torch.Tensor(in_feats, out_feats))
if dropout: if dropout:
self.dropout = nn.Dropout(p=dropout) self.dropout = nn.Dropout(p=dropout)
else: else:
self.dropout = 0. self.dropout = 0.
self.node_update = NodeApplyModule(out_feats, activation, bias)
self.reset_parameters()
self.layers = nn.ModuleList() def reset_parameters(self):
stdv = 1. / math.sqrt(self.weight.size(1))
self.weight.data.uniform_(-stdv, stdv)
# input layer def forward(self, h):
self.layers.append(NodeApplyModule(in_feats, n_hidden, activation)) if self.dropout:
h = self.dropout(h)
self.g.ndata['h'] = torch.mm(h, self.weight)
self.g.update_all(gcn_msg, gcn_reduce, self.node_update)
h = self.g.ndata.pop('h')
return h
class GCN(nn.Module):
def __init__(self,
g,
in_feats,
n_hidden,
n_classes,
n_layers,
activation,
dropout):
super(GCN, self).__init__()
self.layers = nn.ModuleList()
# input layer
self.layers.append(GCNLayer(g, in_feats, n_hidden, activation, dropout))
# hidden layers # hidden layers
for i in range(n_layers - 1): for i in range(n_layers - 1):
self.layers.append(NodeApplyModule(n_hidden, n_hidden, activation)) self.layers.append(GCNLayer(g, n_hidden, n_hidden, activation, dropout))
# output layer # output layer
self.layers.append(NodeApplyModule(n_hidden, n_classes)) self.layers.append(GCNLayer(g, n_hidden, n_classes, None, dropout))
def forward(self, features): def forward(self, features):
self.g.ndata['h'] = features h = features
for layer in self.layers:
for idx, layer in enumerate(self.layers): h = layer(h)
# apply dropout return h
if idx > 0 and self.dropout:
self.g.ndata['h'] = self.dropout(self.g.ndata['h'])
# normalization by square root of src degree
self.g.ndata['h'] = self.g.ndata['h'] * self.g.ndata['norm']
self.g.update_all(gcn_msg, gcn_reduce, layer)
return self.g.ndata.pop('h')
def evaluate(model, features, labels, mask): def evaluate(model, features, labels, mask):
model.eval() model.eval()
...@@ -88,7 +115,6 @@ def evaluate(model, features, labels, mask): ...@@ -88,7 +115,6 @@ def evaluate(model, features, labels, mask):
def main(args): def main(args):
# load and preprocess dataset # load and preprocess dataset
data = load_data(args) data = load_data(args)
features = torch.FloatTensor(data.features) features = torch.FloatTensor(data.features)
labels = torch.LongTensor(data.labels) labels = torch.LongTensor(data.labels)
train_mask = torch.ByteTensor(data.train_mask) train_mask = torch.ByteTensor(data.train_mask)
...@@ -97,6 +123,16 @@ def main(args): ...@@ -97,6 +123,16 @@ def main(args):
in_feats = features.shape[1] in_feats = features.shape[1]
n_classes = data.num_labels n_classes = data.num_labels
n_edges = data.graph.number_of_edges() n_edges = data.graph.number_of_edges()
print("""----Data statistics------'
#Edges %d
#Classes %d
#Train samples %d
#Val samples %d
#Test samples %d""" %
(n_edges, n_classes,
train_mask.sum().item(),
val_mask.sum().item(),
test_mask.sum().item()))
if args.gpu < 0: if args.gpu < 0:
cuda = False cuda = False
...@@ -133,6 +169,7 @@ def main(args): ...@@ -133,6 +169,7 @@ def main(args):
if cuda: if cuda:
model.cuda() model.cuda()
loss_fcn = torch.nn.CrossEntropyLoss()
# use optimizer # use optimizer
optimizer = torch.optim.Adam(model.parameters(), optimizer = torch.optim.Adam(model.parameters(),
...@@ -147,8 +184,7 @@ def main(args): ...@@ -147,8 +184,7 @@ def main(args):
t0 = time.time() t0 = time.time()
# forward # forward
logits = model(features) logits = model(features)
logp = F.log_softmax(logits, 1) loss = loss_fcn(logits[train_mask], labels[train_mask])
loss = F.nll_loss(logp[train_mask], labels[train_mask])
optimizer.zero_grad() optimizer.zero_grad()
loss.backward() loss.backward()
...@@ -159,8 +195,8 @@ def main(args): ...@@ -159,8 +195,8 @@ def main(args):
acc = evaluate(model, features, labels, val_mask) acc = evaluate(model, features, labels, val_mask)
print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | " print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | "
"ETputs(KTEPS) {:.2f}".format(epoch, np.mean(dur), loss.item(), "ETputs(KTEPS) {:.2f}". format(epoch, np.mean(dur), loss.item(),
acc, n_edges / np.mean(dur) / 1000)) acc, n_edges / np.mean(dur) / 1000))
print() print()
acc = evaluate(model, features, labels, test_mask) acc = evaluate(model, features, labels, test_mask)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment