"tests/git@developer.sourcefind.cn:OpenDAS/pytorch3d.git" did not exist on "fc156b50c0d6147ca00755059fb1ff96133827df"
Unverified Commit 565f0c88 authored by Minjie Wang's avatar Minjie Wang Committed by GitHub
Browse files

[WIP] [NN] Refactor NN package (#406)

* refactor graph conv

* docs & tests

* fix lint

* fix lint

* fix lint

* fix lint script

* fix lint

* Update

* Style fix

* Fix style

* Fix style

* Fix gpu case

* Fix for gpu case

* Hotfix edgesoftmax docs

* Handle repeated features

* Add docstring

* Set default arguments

* Remove dropout from nn.conv

* Fix

* add util fn for renaming

* revert gcn_spmv.py

* mx folder

* fix wierd bug

* fix mx

* fix lint
parent 8c750170
...@@ -14,3 +14,4 @@ API Reference ...@@ -14,3 +14,4 @@ API Reference
sampler sampler
data data
transform transform
nn
.. _apinn-mxnet:
dgl.nn.mxnet
============
dgl.nn.mxnet.conv
-----------------
.. automodule:: dgl.nn.mxnet.conv
.. autoclass:: dgl.nn.mxnet.conv.GraphConv
:members: weight, bias, forward
:show-inheritance:
.. _apinn-pytorch:
dgl.nn.pytorch
==============
dgl.nn.pytorch.conv
-------------------
.. automodule:: dgl.nn.pytorch.conv
.. autoclass:: dgl.nn.pytorch.conv.GraphConv
:members: weight, bias, forward, reset_parameters
:show-inheritance:
dgl.nn.pytorch.softmax
----------------------
.. automodule:: dgl.nn.pytorch.softmax
.. autoclass:: dgl.nn.pytorch.softmax.EdgeSoftmax
:members: forward
:show-inheritance:
.. _apinn:
dgl.nn
======
.. toctree::
nn.pytorch
nn.mxnet
...@@ -16,9 +16,11 @@ pip install requests ...@@ -16,9 +16,11 @@ pip install requests
Codes Codes
----- -----
The folder contains two implementations of GCN. `gcn.py` uses user-defined The folder contains three implementations of GCN:
message and reduce functions. `gcn_spmv.py` uses DGL's builtin functions so - `gcn.py` uses DGL's predefined graph convolution module.
SPMV optimization could be applied. - `gcn_mp.py` uses user-defined message and reduce functions.
- `gcn_spmv.py` improves from `gcn_mp.py` by using DGL's builtin functions
so SPMV optimization could be applied.
The provided implementation in `gcn_concat.py` is a bit different from the The provided implementation in `gcn_concat.py` is a bit different from the
original paper for better performance, credit to @yifeim and @ZiyueHuang. original paper for better performance, credit to @yifeim and @ZiyueHuang.
...@@ -27,15 +29,15 @@ Results ...@@ -27,15 +29,15 @@ Results
------- -------
Run with following (available dataset: "cora", "citeseer", "pubmed") Run with following (available dataset: "cora", "citeseer", "pubmed")
```bash ```bash
DGLBACKEND=mxnet python3 gcn_spmv.py --dataset cora --gpu 0 DGLBACKEND=mxnet python3 train.py --dataset cora --gpu 0
``` ```
* cora: ~0.810 (paper: 0.815) * cora: ~0.810 (paper: 0.815)
* citeseer: ~0.702 (paper: 0.703) * citeseer: ~0.702 (paper: 0.703)
* pubmed: ~0.780 (paper: 0.790) * pubmed: ~0.780 (paper: 0.790)
Results (`gcn_concat.py vs. gcn_spmv.py`) Results (`gcn_concat.py vs. gcn.py`)
------------------------- ------------------------------------
`gcn_concat.py` uses concatenation of hidden units to account for multi-hop `gcn_concat.py` uses concatenation of hidden units to account for multi-hop
skip-connections, while `gcn_spmv.py` uses simple additions (the original paper skip-connections, while `gcn_spmv.py` uses simple additions (the original paper
omitted this detail). We feel concatenation is superior omitted this detail). We feel concatenation is superior
...@@ -90,10 +92,10 @@ DGLBACKEND=mxnet python3 examples/mxnet/gcn/gcn_concat.py --dataset "cora" --n-e ...@@ -90,10 +92,10 @@ DGLBACKEND=mxnet python3 examples/mxnet/gcn/gcn_concat.py --dataset "cora" --n-e
DGLBACKEND=mxnet python3 examples/mxnet/gcn/gcn_concat.py --dataset "pubmed" --n-epochs 200 --n-layers 0 DGLBACKEND=mxnet python3 examples/mxnet/gcn/gcn_concat.py --dataset "pubmed" --n-epochs 200 --n-layers 0
# Final accuracy 77.40% with 2-layer GCN # Final accuracy 77.40% with 2-layer GCN
DGLBACKEND=mxnet python3 examples/mxnet/gcn/gcn_spmv.py --dataset "cora" --n-epochs 200 --n-layers 1 DGLBACKEND=mxnet python3 examples/mxnet/gcn/gcn_spmv.py --dataset "pubmed" --n-epochs 200 --n-layers 1
# Final accuracy 36.20% with 10-layer GCN # Final accuracy 36.20% with 10-layer GCN
DGLBACKEND=mxnet python3 examples/mxnet/gcn/gcn_spmv.py --dataset "cora" --n-epochs 200 --n-layers 9 DGLBACKEND=mxnet python3 examples/mxnet/gcn/gcn_spmv.py --dataset "pubmed" --n-epochs 200 --n-layers 9
# Final accuracy 78.30% with 2-layer GCN with skip connection # Final accuracy 78.30% with 2-layer GCN with skip connection
DGLBACKEND=mxnet python3 examples/mxnet/gcn/gcn_concat.py --dataset "pubmed" --n-epochs 200 --n-layers 2 --normalization 'sym' --self-loop DGLBACKEND=mxnet python3 examples/mxnet/gcn/gcn_concat.py --dataset "pubmed" --n-epochs 200 --n-layers 2 --normalization 'sym' --self-loop
......
"""GCN using DGL nn package
References:
- Semi-Supervised Classification with Graph Convolutional Networks
- Paper: https://arxiv.org/abs/1609.02907
- Code: https://github.com/tkipf/gcn
""" """
Semi-Supervised Classification with Graph Convolutional Networks
Paper: https://arxiv.org/abs/1609.02907
Code: https://github.com/tkipf/gcn
GCN with SPMV optimization
"""
import argparse, time, math
import numpy as np
import mxnet as mx import mxnet as mx
from mxnet import gluon from mxnet import gluon
import dgl import dgl
from dgl import DGLGraph from dgl.nn.mxnet import GraphConv
from dgl.data import register_data_args, load_data
def gcn_msg(edge):
msg = edge.src['h'] * edge.src['norm']
return {'m': msg}
def gcn_reduce(node):
accum = mx.nd.sum(node.mailbox['m'], 1) * node.data['norm']
return {'h': accum}
class NodeUpdate(gluon.Block):
def __init__(self, out_feats, activation=None, bias=True):
super(NodeUpdate, self).__init__()
with self.name_scope():
if bias:
self.bias = self.params.get('bias', shape=(out_feats,),
init=mx.init.Zero())
else:
self.bias = None
self.activation = activation
def forward(self, node):
h = node.data['h']
if self.bias is not None:
h = h + self.bias.data(h.context)
if self.activation:
h = self.activation(h)
return {'h': h}
class GCNLayer(gluon.Block):
def __init__(self,
g,
in_feats,
out_feats,
activation,
dropout,
bias=True):
super(GCNLayer, self).__init__()
self.g = g
self.dropout = dropout
with self.name_scope():
self.weight = self.params.get('weight', shape=(in_feats, out_feats),
init=mx.init.Xavier())
self.node_update = NodeUpdate(out_feats, activation, bias)
def forward(self, h):
if self.dropout:
h = mx.nd.Dropout(h, p=self.dropout)
h = mx.nd.dot(h, self.weight.data(h.context))
self.g.ndata['h'] = h
self.g.update_all(gcn_msg, gcn_reduce, self.node_update)
h = self.g.ndata.pop('h')
return h
class GCN(gluon.Block): class GCN(gluon.Block):
def __init__(self, def __init__(self,
...@@ -76,144 +18,23 @@ class GCN(gluon.Block): ...@@ -76,144 +18,23 @@ class GCN(gluon.Block):
n_classes, n_classes,
n_layers, n_layers,
activation, activation,
dropout, dropout):
normalization):
super(GCN, self).__init__() super(GCN, self).__init__()
self.g = g
self.layers = gluon.nn.Sequential() self.layers = gluon.nn.Sequential()
# input layer # input layer
self.layers.add(GCNLayer(g, in_feats, n_hidden, activation, 0)) self.layers.add(GraphConv(in_feats, n_hidden, activation=activation))
# hidden layers # hidden layers
for i in range(n_layers - 1): for i in range(n_layers - 1):
self.layers.add(GCNLayer(g, n_hidden, n_hidden, activation, dropout)) self.layers.add(GraphConv(n_hidden, n_hidden, activation=activation))
# output layer # output layer
self.layers.add(GCNLayer(g, n_hidden, n_classes, None, dropout)) self.layers.add(GraphConv(n_hidden, n_classes))
self.dropout = gluon.nn.Dropout(rate=dropout)
def forward(self, features): def forward(self, features):
h = features h = features
for layer in self.layers: for i, layer in enumerate(self.layers):
h = layer(h) if i != 0:
h = self.dropout(h)
h = layer(h, self.g)
return h return h
def evaluate(model, features, labels, mask):
pred = model(features).argmax(axis=1)
accuracy = ((pred == labels) * mask).sum() / mask.sum().asscalar()
return accuracy.asscalar()
def main(args):
# load and preprocess dataset
data = load_data(args)
if args.self_loop:
data.graph.add_edges_from([(i,i) for i in range(len(data.graph))])
features = mx.nd.array(data.features)
labels = mx.nd.array(data.labels)
train_mask = mx.nd.array(data.train_mask)
val_mask = mx.nd.array(data.val_mask)
test_mask = mx.nd.array(data.test_mask)
in_feats = features.shape[1]
n_classes = data.num_labels
n_edges = data.graph.number_of_edges()
print("""----Data statistics------'
#Edges %d
#Classes %d
#Train samples %d
#Val samples %d
#Test samples %d""" %
(n_edges, n_classes,
train_mask.sum().asscalar(),
val_mask.sum().asscalar(),
test_mask.sum().asscalar()))
if args.gpu < 0:
cuda = False
ctx = mx.cpu(0)
else:
cuda = True
ctx = mx.gpu(args.gpu)
features = features.as_in_context(ctx)
labels = labels.as_in_context(ctx)
train_mask = train_mask.as_in_context(ctx)
val_mask = val_mask.as_in_context(ctx)
test_mask = test_mask.as_in_context(ctx)
# create GCN model
g = DGLGraph(data.graph)
# normalization
degs = g.in_degrees().astype('float32')
norm = mx.nd.power(degs, -0.5)
if cuda:
norm = norm.as_in_context(ctx)
g.ndata['norm'] = mx.nd.expand_dims(norm, 1)
model = GCN(g,
in_feats,
args.n_hidden,
n_classes,
args.n_layers,
mx.nd.relu,
args.dropout,
args.normalization)
model.initialize(ctx=ctx)
n_train_samples = train_mask.sum().asscalar()
loss_fcn = gluon.loss.SoftmaxCELoss()
# use optimizer
print(model.collect_params())
trainer = gluon.Trainer(model.collect_params(), 'adam',
{'learning_rate': args.lr, 'wd': args.weight_decay})
# initialize graph
dur = []
for epoch in range(args.n_epochs):
if epoch >= 3:
t0 = time.time()
# forward
with mx.autograd.record():
pred = model(features)
loss = loss_fcn(pred, labels, mx.nd.expand_dims(train_mask, 1))
loss = loss.sum() / n_train_samples
loss.backward()
trainer.step(batch_size=1)
if epoch >= 3:
dur.append(time.time() - t0)
acc = evaluate(model, features, labels, val_mask)
print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | "
"ETputs(KTEPS) {:.2f}". format(
epoch, np.mean(dur), loss.asscalar(), acc, n_edges / np.mean(dur) / 1000))
# test set accuracy
acc = evaluate(model, features, labels, test_mask)
print("Test accuracy {:.2%}".format(acc))
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='GCN')
register_data_args(parser)
parser.add_argument("--dropout", type=float, default=0.5,
help="dropout probability")
parser.add_argument("--gpu", type=int, default=-1,
help="gpu")
parser.add_argument("--lr", type=float, default=3e-2,
help="learning rate")
parser.add_argument("--n-epochs", type=int, default=200,
help="number of training epochs")
parser.add_argument("--n-hidden", type=int, default=16,
help="number of hidden gcn units")
parser.add_argument("--n-layers", type=int, default=1,
help="number of hidden gcn layers")
parser.add_argument("--normalization",
choices=['sym','left'], default=None,
help="graph normalization types (default=None)")
parser.add_argument("--self-loop", action='store_true',
help="graph self-loop (default=False)")
parser.add_argument("--weight-decay", type=float, default=5e-4,
help="Weight for L2 loss")
args = parser.parse_args()
print(args)
main(args)
"""GCN using basic message passing
References:
- Semi-Supervised Classification with Graph Convolutional Networks
- Paper: https://arxiv.org/abs/1609.02907
- Code: https://github.com/tkipf/gcn
"""
import mxnet as mx
from mxnet import gluon
def gcn_msg(edge):
msg = edge.src['h'] * edge.src['norm']
return {'m': msg}
def gcn_reduce(node):
accum = mx.nd.sum(node.mailbox['m'], 1) * node.data['norm']
return {'h': accum}
class NodeUpdate(gluon.Block):
def __init__(self, out_feats, activation=None, bias=True):
super(NodeUpdate, self).__init__()
with self.name_scope():
if bias:
self.bias = self.params.get('bias', shape=(out_feats,),
init=mx.init.Zero())
else:
self.bias = None
self.activation = activation
def forward(self, node):
h = node.data['h']
if self.bias is not None:
h = h + self.bias.data(h.context)
if self.activation:
h = self.activation(h)
return {'h': h}
class GCNLayer(gluon.Block):
def __init__(self,
g,
in_feats,
out_feats,
activation,
dropout,
bias=True):
super(GCNLayer, self).__init__()
self.g = g
self.dropout = dropout
with self.name_scope():
self.weight = self.params.get('weight', shape=(in_feats, out_feats),
init=mx.init.Xavier())
self.node_update = NodeUpdate(out_feats, activation, bias)
def forward(self, h):
if self.dropout:
h = mx.nd.Dropout(h, p=self.dropout)
h = mx.nd.dot(h, self.weight.data(h.context))
self.g.ndata['h'] = h
self.g.update_all(gcn_msg, gcn_reduce, self.node_update)
h = self.g.ndata.pop('h')
return h
class GCN(gluon.Block):
def __init__(self,
g,
in_feats,
n_hidden,
n_classes,
n_layers,
activation,
dropout):
super(GCN, self).__init__()
self.layers = gluon.nn.Sequential()
# input layer
self.layers.add(GCNLayer(g, in_feats, n_hidden, activation, 0))
# hidden layers
for i in range(n_layers - 1):
self.layers.add(GCNLayer(g, n_hidden, n_hidden, activation, dropout))
# output layer
self.layers.add(GCNLayer(g, n_hidden, n_classes, None, dropout))
def forward(self, features):
h = features
for layer in self.layers:
h = layer(h)
return h
""" """GCN using builtin functions that enables SPMV optimization.
Semi-Supervised Classification with Graph Convolutional Networks
Paper: https://arxiv.org/abs/1609.02907
Code: https://github.com/tkipf/gcn
GCN with SPMV optimization References:
- Semi-Supervised Classification with Graph Convolutional Networks
- Paper: https://arxiv.org/abs/1609.02907
- Code: https://github.com/tkipf/gcn
""" """
import argparse, time, math import math
import numpy as np
import mxnet as mx import mxnet as mx
from mxnet import gluon from mxnet import gluon
import dgl import dgl
from dgl import DGLGraph
import dgl.function as fn import dgl.function as fn
from dgl.data import register_data_args, load_data
class GCNLayer(gluon.Block): class GCNLayer(gluon.Block):
def __init__(self, def __init__(self,
...@@ -63,8 +60,7 @@ class GCN(gluon.Block): ...@@ -63,8 +60,7 @@ class GCN(gluon.Block):
n_classes, n_classes,
n_layers, n_layers,
activation, activation,
dropout, dropout):
normalization):
super(GCN, self).__init__() super(GCN, self).__init__()
self.layers = gluon.nn.Sequential() self.layers = gluon.nn.Sequential()
# input layer # input layer
...@@ -81,126 +77,3 @@ class GCN(gluon.Block): ...@@ -81,126 +77,3 @@ class GCN(gluon.Block):
for layer in self.layers: for layer in self.layers:
h = layer(h) h = layer(h)
return h return h
def evaluate(model, features, labels, mask):
pred = model(features).argmax(axis=1)
accuracy = ((pred == labels) * mask).sum() / mask.sum().asscalar()
return accuracy.asscalar()
def main(args):
# load and preprocess dataset
data = load_data(args)
if args.self_loop:
data.graph.add_edges_from([(i,i) for i in range(len(data.graph))])
features = mx.nd.array(data.features)
labels = mx.nd.array(data.labels)
train_mask = mx.nd.array(data.train_mask)
val_mask = mx.nd.array(data.val_mask)
test_mask = mx.nd.array(data.test_mask)
in_feats = features.shape[1]
n_classes = data.num_labels
n_edges = data.graph.number_of_edges()
print("""----Data statistics------'
#Edges %d
#Classes %d
#Train samples %d
#Val samples %d
#Test samples %d""" %
(n_edges, n_classes,
train_mask.sum().asscalar(),
val_mask.sum().asscalar(),
test_mask.sum().asscalar()))
if args.gpu < 0:
cuda = False
ctx = mx.cpu(0)
else:
cuda = True
ctx = mx.gpu(args.gpu)
features = features.as_in_context(ctx)
labels = labels.as_in_context(ctx)
train_mask = train_mask.as_in_context(ctx)
val_mask = val_mask.as_in_context(ctx)
test_mask = test_mask.as_in_context(ctx)
# create GCN model
g = DGLGraph(data.graph)
# normalization
degs = g.in_degrees().astype('float32')
norm = mx.nd.power(degs, -0.5)
if cuda:
norm = norm.as_in_context(ctx)
g.ndata['norm'] = mx.nd.expand_dims(norm, 1)
model = GCN(g,
in_feats,
args.n_hidden,
n_classes,
args.n_layers,
mx.nd.relu,
args.dropout,
args.normalization)
model.initialize(ctx=ctx)
n_train_samples = train_mask.sum().asscalar()
loss_fcn = gluon.loss.SoftmaxCELoss()
# use optimizer
print(model.collect_params())
trainer = gluon.Trainer(model.collect_params(), 'adam',
{'learning_rate': args.lr, 'wd': args.weight_decay})
# initialize graph
dur = []
for epoch in range(args.n_epochs):
if epoch >= 3:
t0 = time.time()
# forward
with mx.autograd.record():
pred = model(features)
loss = loss_fcn(pred, labels, mx.nd.expand_dims(train_mask, 1))
loss = loss.sum() / n_train_samples
loss.backward()
trainer.step(batch_size=1)
if epoch >= 3:
dur.append(time.time() - t0)
acc = evaluate(model, features, labels, val_mask)
print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | "
"ETputs(KTEPS) {:.2f}". format(
epoch, np.mean(dur), loss.asscalar(), acc, n_edges / np.mean(dur) / 1000))
# test set accuracy
acc = evaluate(model, features, labels, test_mask)
print("Test accuracy {:.2%}".format(acc))
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='GCN')
register_data_args(parser)
parser.add_argument("--dropout", type=float, default=0.5,
help="dropout probability")
parser.add_argument("--gpu", type=int, default=-1,
help="gpu")
parser.add_argument("--lr", type=float, default=3e-2,
help="learning rate")
parser.add_argument("--n-epochs", type=int, default=200,
help="number of training epochs")
parser.add_argument("--n-hidden", type=int, default=16,
help="number of hidden gcn units")
parser.add_argument("--n-layers", type=int, default=1,
help="number of hidden gcn layers")
parser.add_argument("--normalization",
choices=['sym','left'], default=None,
help="graph normalization types (default=None)")
parser.add_argument("--self-loop", action='store_true',
help="graph self-loop (default=False)")
parser.add_argument("--weight-decay", type=float, default=5e-4,
help="Weight for L2 loss")
args = parser.parse_args()
print(args)
main(args)
"""Training GCN model on citation graphs."""
import argparse, time
import numpy as np
import mxnet as mx
from mxnet import gluon
from dgl import DGLGraph
from dgl.data import register_data_args, load_data
from gcn import GCN
#from gcn_mp import GCN
#from gcn_spmv import GCN
def evaluate(model, features, labels, mask):
pred = model(features).argmax(axis=1)
accuracy = ((pred == labels) * mask).sum() / mask.sum().asscalar()
return accuracy.asscalar()
def main(args):
# load and preprocess dataset
data = load_data(args)
features = mx.nd.array(data.features)
labels = mx.nd.array(data.labels)
train_mask = mx.nd.array(data.train_mask)
val_mask = mx.nd.array(data.val_mask)
test_mask = mx.nd.array(data.test_mask)
in_feats = features.shape[1]
n_classes = data.num_labels
n_edges = data.graph.number_of_edges()
print("""----Data statistics------'
#Edges %d
#Classes %d
#Train samples %d
#Val samples %d
#Test samples %d""" %
(n_edges, n_classes,
train_mask.sum().asscalar(),
val_mask.sum().asscalar(),
test_mask.sum().asscalar()))
if args.gpu < 0:
cuda = False
ctx = mx.cpu(0)
else:
cuda = True
ctx = mx.gpu(args.gpu)
features = features.as_in_context(ctx)
labels = labels.as_in_context(ctx)
train_mask = train_mask.as_in_context(ctx)
val_mask = val_mask.as_in_context(ctx)
test_mask = test_mask.as_in_context(ctx)
# create GCN model
g = DGLGraph(data.graph)
g.add_edges(g.nodes(), g.nodes())
# normalization
degs = g.in_degrees().astype('float32')
norm = mx.nd.power(degs, -0.5)
if cuda:
norm = norm.as_in_context(ctx)
g.ndata['norm'] = mx.nd.expand_dims(norm, 1)
model = GCN(g,
in_feats,
args.n_hidden,
n_classes,
args.n_layers,
mx.nd.relu,
args.dropout)
model.initialize(ctx=ctx)
n_train_samples = train_mask.sum().asscalar()
loss_fcn = gluon.loss.SoftmaxCELoss()
# use optimizer
print(model.collect_params())
trainer = gluon.Trainer(model.collect_params(), 'adam',
{'learning_rate': args.lr, 'wd': args.weight_decay})
# initialize graph
dur = []
for epoch in range(args.n_epochs):
if epoch >= 3:
t0 = time.time()
# forward
with mx.autograd.record():
pred = model(features)
loss = loss_fcn(pred, labels, mx.nd.expand_dims(train_mask, 1))
loss = loss.sum() / n_train_samples
loss.backward()
trainer.step(batch_size=1)
if epoch >= 3:
loss.asscalar()
dur.append(time.time() - t0)
acc = evaluate(model, features, labels, val_mask)
print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | "
"ETputs(KTEPS) {:.2f}". format(
epoch, np.mean(dur), loss.asscalar(), acc, n_edges / np.mean(dur) / 1000))
# test set accuracy
acc = evaluate(model, features, labels, test_mask)
print("Test accuracy {:.2%}".format(acc))
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='GCN')
register_data_args(parser)
parser.add_argument("--dropout", type=float, default=0.5,
help="dropout probability")
parser.add_argument("--gpu", type=int, default=-1,
help="gpu")
parser.add_argument("--lr", type=float, default=3e-2,
help="learning rate")
parser.add_argument("--n-epochs", type=int, default=200,
help="number of training epochs")
parser.add_argument("--n-hidden", type=int, default=16,
help="number of hidden gcn units")
parser.add_argument("--n-layers", type=int, default=1,
help="number of hidden gcn layers")
parser.add_argument("--weight-decay", type=float, default=5e-4,
help="Weight for L2 loss")
args = parser.parse_args()
print(args)
main(args)
...@@ -10,6 +10,7 @@ Pytorch implementation: https://github.com/Diego999/pyGAT ...@@ -10,6 +10,7 @@ Pytorch implementation: https://github.com/Diego999/pyGAT
import torch import torch
import torch.nn as nn import torch.nn as nn
import dgl.function as fn import dgl.function as fn
from dgl.nn.pytorch import EdgeSoftmax
class GraphAttention(nn.Module): class GraphAttention(nn.Module):
def __init__(self, def __init__(self,
...@@ -39,6 +40,7 @@ class GraphAttention(nn.Module): ...@@ -39,6 +40,7 @@ class GraphAttention(nn.Module):
nn.init.xavier_normal_(self.attn_l.data, gain=1.414) nn.init.xavier_normal_(self.attn_l.data, gain=1.414)
nn.init.xavier_normal_(self.attn_r.data, gain=1.414) nn.init.xavier_normal_(self.attn_r.data, gain=1.414)
self.leaky_relu = nn.LeakyReLU(alpha) self.leaky_relu = nn.LeakyReLU(alpha)
self.softmax = EdgeSoftmax()
self.residual = residual self.residual = residual
if residual: if residual:
if in_dim != out_dim: if in_dim != out_dim:
...@@ -79,14 +81,11 @@ class GraphAttention(nn.Module): ...@@ -79,14 +81,11 @@ class GraphAttention(nn.Module):
return {'a' : a} return {'a' : a}
def edge_softmax(self): def edge_softmax(self):
# compute the max scores, normalizer = self.softmax(self.g.edata['a'], self.g)
self.g.update_all(fn.copy_edge('a', 'a'), fn.max('a', 'a_max')) # Save normalizer
# minus the max and exp self.g.ndata['z'] = normalizer
self.g.apply_edges(lambda edges : {'a' : torch.exp(edges.data['a'] - edges.dst['a_max'])}) # Dropout attention scores and save them
# compute dropout self.g.edata['a_drop'] = self.attn_drop(scores)
self.g.apply_edges(lambda edges : {'a_drop' : self.attn_drop(edges.data['a'])})
# compute normalizer
self.g.update_all(fn.copy_edge('a', 'a'), fn.sum('a', 'z'))
class GAT(nn.Module): class GAT(nn.Module):
def __init__(self, def __init__(self,
......
...@@ -16,16 +16,19 @@ pip install torch requests ...@@ -16,16 +16,19 @@ pip install torch requests
Codes Codes
----- -----
The folder contains two implementations of GCN. `gcn.py` uses user-defined The folder contains three implementations of GCN:
message and reduce functions. `gcn_spmv.py` uses DGL's builtin functions so - `gcn.py` uses DGL's predefined graph convolution module.
SPMV optimization could be applied. - `gcn_mp.py` uses user-defined message and reduce functions.
- `gcn_spmv.py` improves from `gcn_mp.py` by using DGL's builtin functions
so SPMV optimization could be applied.
Modify `train.py` to switch between different implementations.
Results Results
------- -------
Run with following (available dataset: "cora", "citeseer", "pubmed") Run with following (available dataset: "cora", "citeseer", "pubmed")
```bash ```bash
python gcn_spmv.py --dataset cora --gpu 0 python train.py --dataset cora --gpu 0
``` ```
* cora: ~0.810 (0.79-0.83) (paper: 0.815) * cora: ~0.810 (0.79-0.83) (paper: 0.815)
......
"""GCN using DGL nn package
References:
- Semi-Supervised Classification with Graph Convolutional Networks
- Paper: https://arxiv.org/abs/1609.02907
- Code: https://github.com/tkipf/gcn
""" """
Semi-Supervised Classification with Graph Convolutional Networks
Paper: https://arxiv.org/abs/1609.02907
Code: https://github.com/tkipf/gcn
GCN with SPMV specialization.
"""
import argparse, time, math
import numpy as np
import torch import torch
import torch.nn as nn import torch.nn as nn
import torch.nn.functional as F from dgl.nn.pytorch import GraphConv
from dgl import DGLGraph
from dgl.data import register_data_args, load_data
def gcn_msg(edge):
msg = edge.src['h'] * edge.src['norm']
return {'m': msg}
def gcn_reduce(node):
accum = torch.sum(node.mailbox['m'], 1) * node.data['norm']
return {'h': accum}
class NodeApplyModule(nn.Module):
def __init__(self, out_feats, activation=None, bias=True):
super(NodeApplyModule, self).__init__()
if bias:
self.bias = nn.Parameter(torch.Tensor(out_feats))
else:
self.bias = None
self.activation = activation
self.reset_parameters()
def reset_parameters(self):
if self.bias is not None:
stdv = 1. / math.sqrt(self.bias.size(0))
self.bias.data.uniform_(-stdv, stdv)
def forward(self, nodes):
h = nodes.data['h']
if self.bias is not None:
h = h + self.bias
if self.activation:
h = self.activation(h)
return {'h': h}
class GCNLayer(nn.Module):
def __init__(self,
g,
in_feats,
out_feats,
activation,
dropout,
bias=True):
super(GCNLayer, self).__init__()
self.g = g
self.weight = nn.Parameter(torch.Tensor(in_feats, out_feats))
if dropout:
self.dropout = nn.Dropout(p=dropout)
else:
self.dropout = 0.
self.node_update = NodeApplyModule(out_feats, activation, bias)
self.reset_parameters()
def reset_parameters(self):
stdv = 1. / math.sqrt(self.weight.size(1))
self.weight.data.uniform_(-stdv, stdv)
def forward(self, h):
if self.dropout:
h = self.dropout(h)
self.g.ndata['h'] = torch.mm(h, self.weight)
self.g.update_all(gcn_msg, gcn_reduce, self.node_update)
h = self.g.ndata.pop('h')
return h
class GCN(nn.Module): class GCN(nn.Module):
def __init__(self, def __init__(self,
...@@ -87,140 +19,21 @@ class GCN(nn.Module): ...@@ -87,140 +19,21 @@ class GCN(nn.Module):
activation, activation,
dropout): dropout):
super(GCN, self).__init__() super(GCN, self).__init__()
self.g = g
self.layers = nn.ModuleList() self.layers = nn.ModuleList()
# input layer # input layer
self.layers.append(GCNLayer(g, in_feats, n_hidden, activation, dropout)) self.layers.append(GraphConv(in_feats, n_hidden, activation=activation))
# hidden layers # hidden layers
for i in range(n_layers - 1): for i in range(n_layers - 1):
self.layers.append(GCNLayer(g, n_hidden, n_hidden, activation, dropout)) self.layers.append(GraphConv(n_hidden, n_hidden, activation=activation))
# output layer # output layer
self.layers.append(GCNLayer(g, n_hidden, n_classes, None, dropout)) self.layers.append(GraphConv(n_hidden, n_classes))
self.dropout = nn.Dropout(p=dropout)
def forward(self, features): def forward(self, features):
h = features h = features
for layer in self.layers: for i, layer in enumerate(self.layers):
h = layer(h) if i != 0:
h = self.dropout(h)
h = layer(h, self.g)
return h return h
def evaluate(model, features, labels, mask):
model.eval()
with torch.no_grad():
logits = model(features)
logits = logits[mask]
labels = labels[mask]
_, indices = torch.max(logits, dim=1)
correct = torch.sum(indices == labels)
return correct.item() * 1.0 / len(labels)
def main(args):
# load and preprocess dataset
data = load_data(args)
features = torch.FloatTensor(data.features)
labels = torch.LongTensor(data.labels)
train_mask = torch.ByteTensor(data.train_mask)
val_mask = torch.ByteTensor(data.val_mask)
test_mask = torch.ByteTensor(data.test_mask)
in_feats = features.shape[1]
n_classes = data.num_labels
n_edges = data.graph.number_of_edges()
print("""----Data statistics------'
#Edges %d
#Classes %d
#Train samples %d
#Val samples %d
#Test samples %d""" %
(n_edges, n_classes,
train_mask.sum().item(),
val_mask.sum().item(),
test_mask.sum().item()))
if args.gpu < 0:
cuda = False
else:
cuda = True
torch.cuda.set_device(args.gpu)
features = features.cuda()
labels = labels.cuda()
train_mask = train_mask.cuda()
val_mask = val_mask.cuda()
test_mask = test_mask.cuda()
# graph preprocess and calculate normalization factor
g = DGLGraph(data.graph)
n_edges = g.number_of_edges()
# add self loop
g.add_edges(g.nodes(), g.nodes())
# normalization
degs = g.in_degrees().float()
norm = torch.pow(degs, -0.5)
norm[torch.isinf(norm)] = 0
if cuda:
norm = norm.cuda()
g.ndata['norm'] = norm.unsqueeze(1)
# create GCN model
model = GCN(g,
in_feats,
args.n_hidden,
n_classes,
args.n_layers,
F.relu,
args.dropout)
if cuda:
model.cuda()
loss_fcn = torch.nn.CrossEntropyLoss()
# use optimizer
optimizer = torch.optim.Adam(model.parameters(),
lr=args.lr,
weight_decay=args.weight_decay)
# initialize graph
dur = []
for epoch in range(args.n_epochs):
model.train()
if epoch >= 3:
t0 = time.time()
# forward
logits = model(features)
loss = loss_fcn(logits[train_mask], labels[train_mask])
optimizer.zero_grad()
loss.backward()
optimizer.step()
if epoch >= 3:
dur.append(time.time() - t0)
acc = evaluate(model, features, labels, val_mask)
print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | "
"ETputs(KTEPS) {:.2f}". format(epoch, np.mean(dur), loss.item(),
acc, n_edges / np.mean(dur) / 1000))
print()
acc = evaluate(model, features, labels, test_mask)
print("Test Accuracy {:.4f}".format(acc))
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='GCN')
register_data_args(parser)
parser.add_argument("--dropout", type=float, default=0.5,
help="dropout probability")
parser.add_argument("--gpu", type=int, default=-1,
help="gpu")
parser.add_argument("--lr", type=float, default=1e-2,
help="learning rate")
parser.add_argument("--n-epochs", type=int, default=200,
help="number of training epochs")
parser.add_argument("--n-hidden", type=int, default=16,
help="number of hidden gcn units")
parser.add_argument("--n-layers", type=int, default=1,
help="number of hidden gcn layers")
parser.add_argument("--weight-decay", type=float, default=5e-4,
help="Weight for L2 loss")
args = parser.parse_args()
print(args)
main(args)
"""GCN using basic message passing
References:
- Semi-Supervised Classification with Graph Convolutional Networks
- Paper: https://arxiv.org/abs/1609.02907
- Code: https://github.com/tkipf/gcn
"""
import argparse, time, math
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from dgl import DGLGraph
from dgl.data import register_data_args, load_data
def gcn_msg(edge):
msg = edge.src['h'] * edge.src['norm']
return {'m': msg}
def gcn_reduce(node):
accum = torch.sum(node.mailbox['m'], 1) * node.data['norm']
return {'h': accum}
class NodeApplyModule(nn.Module):
def __init__(self, out_feats, activation=None, bias=True):
super(NodeApplyModule, self).__init__()
if bias:
self.bias = nn.Parameter(torch.Tensor(out_feats))
else:
self.bias = None
self.activation = activation
self.reset_parameters()
def reset_parameters(self):
if self.bias is not None:
stdv = 1. / math.sqrt(self.bias.size(0))
self.bias.data.uniform_(-stdv, stdv)
def forward(self, nodes):
h = nodes.data['h']
if self.bias is not None:
h = h + self.bias
if self.activation:
h = self.activation(h)
return {'h': h}
class GCNLayer(nn.Module):
def __init__(self,
g,
in_feats,
out_feats,
activation,
dropout,
bias=True):
super(GCNLayer, self).__init__()
self.g = g
self.weight = nn.Parameter(torch.Tensor(in_feats, out_feats))
if dropout:
self.dropout = nn.Dropout(p=dropout)
else:
self.dropout = 0.
self.node_update = NodeApplyModule(out_feats, activation, bias)
self.reset_parameters()
def reset_parameters(self):
stdv = 1. / math.sqrt(self.weight.size(1))
self.weight.data.uniform_(-stdv, stdv)
def forward(self, h):
if self.dropout:
h = self.dropout(h)
self.g.ndata['h'] = torch.mm(h, self.weight)
self.g.update_all(gcn_msg, gcn_reduce, self.node_update)
h = self.g.ndata.pop('h')
return h
class GCN(nn.Module):
def __init__(self,
g,
in_feats,
n_hidden,
n_classes,
n_layers,
activation,
dropout):
super(GCN, self).__init__()
self.layers = nn.ModuleList()
# input layer
self.layers.append(GCNLayer(g, in_feats, n_hidden, activation, dropout))
# hidden layers
for i in range(n_layers - 1):
self.layers.append(GCNLayer(g, n_hidden, n_hidden, activation, dropout))
# output layer
self.layers.append(GCNLayer(g, n_hidden, n_classes, None, dropout))
def forward(self, features):
h = features
for layer in self.layers:
h = layer(h)
return h
def evaluate(model, features, labels, mask):
model.eval()
with torch.no_grad():
logits = model(features)
logits = logits[mask]
labels = labels[mask]
_, indices = torch.max(logits, dim=1)
correct = torch.sum(indices == labels)
return correct.item() * 1.0 / len(labels)
def main(args):
# load and preprocess dataset
data = load_data(args)
features = torch.FloatTensor(data.features)
labels = torch.LongTensor(data.labels)
train_mask = torch.ByteTensor(data.train_mask)
val_mask = torch.ByteTensor(data.val_mask)
test_mask = torch.ByteTensor(data.test_mask)
in_feats = features.shape[1]
n_classes = data.num_labels
n_edges = data.graph.number_of_edges()
print("""----Data statistics------'
#Edges %d
#Classes %d
#Train samples %d
#Val samples %d
#Test samples %d""" %
(n_edges, n_classes,
train_mask.sum().item(),
val_mask.sum().item(),
test_mask.sum().item()))
if args.gpu < 0:
cuda = False
else:
cuda = True
torch.cuda.set_device(args.gpu)
features = features.cuda()
labels = labels.cuda()
train_mask = train_mask.cuda()
val_mask = val_mask.cuda()
test_mask = test_mask.cuda()
# graph preprocess and calculate normalization factor
g = DGLGraph(data.graph)
n_edges = g.number_of_edges()
# add self loop
g.add_edges(g.nodes(), g.nodes())
# normalization
degs = g.in_degrees().float()
norm = torch.pow(degs, -0.5)
norm[torch.isinf(norm)] = 0
if cuda:
norm = norm.cuda()
g.ndata['norm'] = norm.unsqueeze(1)
# create GCN model
model = GCN(g,
in_feats,
args.n_hidden,
n_classes,
args.n_layers,
F.relu,
args.dropout)
if cuda:
model.cuda()
loss_fcn = torch.nn.CrossEntropyLoss()
# use optimizer
optimizer = torch.optim.Adam(model.parameters(),
lr=args.lr,
weight_decay=args.weight_decay)
# initialize graph
dur = []
for epoch in range(args.n_epochs):
model.train()
if epoch >= 3:
t0 = time.time()
# forward
logits = model(features)
loss = loss_fcn(logits[train_mask], labels[train_mask])
optimizer.zero_grad()
loss.backward()
optimizer.step()
if epoch >= 3:
dur.append(time.time() - t0)
acc = evaluate(model, features, labels, val_mask)
print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | "
"ETputs(KTEPS) {:.2f}". format(epoch, np.mean(dur), loss.item(),
acc, n_edges / np.mean(dur) / 1000))
print()
acc = evaluate(model, features, labels, test_mask)
print("Test Accuracy {:.4f}".format(acc))
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='GCN')
register_data_args(parser)
parser.add_argument("--dropout", type=float, default=0.5,
help="dropout probability")
parser.add_argument("--gpu", type=int, default=-1,
help="gpu")
parser.add_argument("--lr", type=float, default=1e-2,
help="learning rate")
parser.add_argument("--n-epochs", type=int, default=200,
help="number of training epochs")
parser.add_argument("--n-hidden", type=int, default=16,
help="number of hidden gcn units")
parser.add_argument("--n-layers", type=int, default=1,
help="number of hidden gcn layers")
parser.add_argument("--weight-decay", type=float, default=5e-4,
help="Weight for L2 loss")
args = parser.parse_args()
print(args)
main(args)
""" """GCN using builtin functions that enables SPMV optimization.
Semi-Supervised Classification with Graph Convolutional Networks
Paper: https://arxiv.org/abs/1609.02907
Code: https://github.com/tkipf/gcn
GCN with SPMV specialization. References:
- Semi-Supervised Classification with Graph Convolutional Networks
- Paper: https://arxiv.org/abs/1609.02907
- Code: https://github.com/tkipf/gcn
""" """
import argparse, time, math import math
import numpy as np
import torch import torch
import torch.nn as nn import torch.nn as nn
import torch.nn.functional as F
import dgl.function as fn import dgl.function as fn
from dgl import DGLGraph
from dgl.data import register_data_args, load_data
class GCNLayer(nn.Module): class GCNLayer(nn.Module):
def __init__(self, def __init__(self,
...@@ -85,126 +81,3 @@ class GCN(nn.Module): ...@@ -85,126 +81,3 @@ class GCN(nn.Module):
for layer in self.layers: for layer in self.layers:
h = layer(h) h = layer(h)
return h return h
def evaluate(model, features, labels, mask):
model.eval()
with torch.no_grad():
logits = model(features)
logits = logits[mask]
labels = labels[mask]
_, indices = torch.max(logits, dim=1)
correct = torch.sum(indices == labels)
return correct.item() * 1.0 / len(labels)
def main(args):
# load and preprocess dataset
data = load_data(args)
features = torch.FloatTensor(data.features)
labels = torch.LongTensor(data.labels)
train_mask = torch.ByteTensor(data.train_mask)
val_mask = torch.ByteTensor(data.val_mask)
test_mask = torch.ByteTensor(data.test_mask)
in_feats = features.shape[1]
n_classes = data.num_labels
n_edges = data.graph.number_of_edges()
print("""----Data statistics------'
#Edges %d
#Classes %d
#Train samples %d
#Val samples %d
#Test samples %d""" %
(n_edges, n_classes,
train_mask.sum().item(),
val_mask.sum().item(),
test_mask.sum().item()))
if args.gpu < 0:
cuda = False
else:
cuda = True
torch.cuda.set_device(args.gpu)
features = features.cuda()
labels = labels.cuda()
train_mask = train_mask.cuda()
val_mask = val_mask.cuda()
test_mask = test_mask.cuda()
# graph preprocess and calculate normalization factor
g = DGLGraph(data.graph)
n_edges = g.number_of_edges()
# add self loop
g.add_edges(g.nodes(), g.nodes())
# normalization
degs = g.in_degrees().float()
norm = torch.pow(degs, -0.5)
norm[torch.isinf(norm)] = 0
if cuda:
norm = norm.cuda()
g.ndata['norm'] = norm.unsqueeze(1)
# create GCN model
model = GCN(g,
in_feats,
args.n_hidden,
n_classes,
args.n_layers,
F.relu,
args.dropout)
if cuda:
model.cuda()
loss_fcn = torch.nn.CrossEntropyLoss()
# use optimizer
optimizer = torch.optim.Adam(model.parameters(),
lr=args.lr,
weight_decay=args.weight_decay)
# initialize graph
dur = []
for epoch in range(args.n_epochs):
model.train()
if epoch >= 3:
t0 = time.time()
# forward
logits = model(features)
loss = loss_fcn(logits[train_mask], labels[train_mask])
optimizer.zero_grad()
loss.backward()
optimizer.step()
if epoch >= 3:
dur.append(time.time() - t0)
acc = evaluate(model, features, labels, val_mask)
print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | "
"ETputs(KTEPS) {:.2f}". format(epoch, np.mean(dur), loss.item(),
acc, n_edges / np.mean(dur) / 1000))
print()
acc = evaluate(model, features, labels, test_mask)
print("Test Accuracy {:.4f}".format(acc))
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='GCN')
register_data_args(parser)
parser.add_argument("--dropout", type=float, default=0.5,
help="dropout probability")
parser.add_argument("--gpu", type=int, default=-1,
help="gpu")
parser.add_argument("--lr", type=float, default=1e-2,
help="learning rate")
parser.add_argument("--n-epochs", type=int, default=200,
help="number of training epochs")
parser.add_argument("--n-hidden", type=int, default=16,
help="number of hidden gcn units")
parser.add_argument("--n-layers", type=int, default=1,
help="number of hidden gcn layers")
parser.add_argument("--weight-decay", type=float, default=5e-4,
help="Weight for L2 loss")
args = parser.parse_args()
print(args)
main(args)
import argparse, time
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from dgl import DGLGraph
from dgl.data import register_data_args, load_data
from gcn import GCN
#from gcn_mp import GCN
#from gcn_spmv import GCN
def evaluate(model, features, labels, mask):
model.eval()
with torch.no_grad():
logits = model(features)
logits = logits[mask]
labels = labels[mask]
_, indices = torch.max(logits, dim=1)
correct = torch.sum(indices == labels)
return correct.item() * 1.0 / len(labels)
def main(args):
# load and preprocess dataset
data = load_data(args)
features = torch.FloatTensor(data.features)
labels = torch.LongTensor(data.labels)
train_mask = torch.ByteTensor(data.train_mask)
val_mask = torch.ByteTensor(data.val_mask)
test_mask = torch.ByteTensor(data.test_mask)
in_feats = features.shape[1]
n_classes = data.num_labels
n_edges = data.graph.number_of_edges()
print("""----Data statistics------'
#Edges %d
#Classes %d
#Train samples %d
#Val samples %d
#Test samples %d""" %
(n_edges, n_classes,
train_mask.sum().item(),
val_mask.sum().item(),
test_mask.sum().item()))
if args.gpu < 0:
cuda = False
else:
cuda = True
torch.cuda.set_device(args.gpu)
features = features.cuda()
labels = labels.cuda()
train_mask = train_mask.cuda()
val_mask = val_mask.cuda()
test_mask = test_mask.cuda()
# graph preprocess and calculate normalization factor
g = DGLGraph(data.graph)
n_edges = g.number_of_edges()
# add self loop
g.add_edges(g.nodes(), g.nodes())
# normalization
degs = g.in_degrees().float()
norm = torch.pow(degs, -0.5)
norm[torch.isinf(norm)] = 0
if cuda:
norm = norm.cuda()
g.ndata['norm'] = norm.unsqueeze(1)
# create GCN model
model = GCN(g,
in_feats,
args.n_hidden,
n_classes,
args.n_layers,
F.relu,
args.dropout)
if cuda:
model.cuda()
loss_fcn = torch.nn.CrossEntropyLoss()
# use optimizer
optimizer = torch.optim.Adam(model.parameters(),
lr=args.lr,
weight_decay=args.weight_decay)
# initialize graph
dur = []
for epoch in range(args.n_epochs):
model.train()
if epoch >= 3:
t0 = time.time()
# forward
logits = model(features)
loss = loss_fcn(logits[train_mask], labels[train_mask])
optimizer.zero_grad()
loss.backward()
optimizer.step()
if epoch >= 3:
dur.append(time.time() - t0)
acc = evaluate(model, features, labels, val_mask)
print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | "
"ETputs(KTEPS) {:.2f}". format(epoch, np.mean(dur), loss.item(),
acc, n_edges / np.mean(dur) / 1000))
print()
acc = evaluate(model, features, labels, test_mask)
print("Test Accuracy {:.4f}".format(acc))
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='GCN')
register_data_args(parser)
parser.add_argument("--dropout", type=float, default=0.5,
help="dropout probability")
parser.add_argument("--gpu", type=int, default=-1,
help="gpu")
parser.add_argument("--lr", type=float, default=1e-2,
help="learning rate")
parser.add_argument("--n-epochs", type=int, default=200,
help="number of training epochs")
parser.add_argument("--n-hidden", type=int, default=16,
help="number of hidden gcn units")
parser.add_argument("--n-layers", type=int, default=1,
help="number of hidden gcn layers")
parser.add_argument("--weight-decay", type=float, default=5e-4,
help="Weight for L2 loss")
args = parser.parse_args()
print(args)
main(args)
...@@ -6,6 +6,7 @@ ...@@ -6,6 +6,7 @@
#ifndef DGL_SAMPLER_H_ #ifndef DGL_SAMPLER_H_
#define DGL_SAMPLER_H_ #define DGL_SAMPLER_H_
#include <string>
#include "graph_interface.h" #include "graph_interface.h"
namespace dgl { namespace dgl {
...@@ -65,7 +66,6 @@ class SamplerOp { ...@@ -65,7 +66,6 @@ class SamplerOp {
* \param num_hops The number of hops for each trace * \param num_hops The number of hops for each trace
* \return a flat ID array with shape (num_seeds, num_traces, num_hops + 1) * \return a flat ID array with shape (num_seeds, num_traces, num_hops + 1)
*/ */
// TODO: move to sampler.cc
static IdArray RandomWalk(const GraphInterface *gptr, static IdArray RandomWalk(const GraphInterface *gptr,
IdArray seeds, IdArray seeds,
int num_traces, int num_traces,
......
"""Package nn modules""" """Package for neural network common components."""
from __future__ import absolute_import
import os
__backend__ = os.environ.get('DGLBACKEND', 'pytorch').lower()
if __backend__ == 'numpy':
pass
elif __backend__ == 'pytorch':
from .pytorch import *
elif __backend__ != 'mxnet':
raise Exception("Unsupported backend %s" % __backend__)
"""Package for mxnet-specific NN modules."""
from .conv import *
"""MXNet modules for graph convolutions."""
# pylint: disable= no-member, arguments-differ
import mxnet as mx
from mxnet import gluon
from ... import function as fn
from ...utils import get_ndata_name
__all__ = ['GraphConv']
class GraphConv(gluon.Block):
r"""Apply graph convolution over an input signal.
Graph convolution is introduced in `GCN <https://arxiv.org/abs/1609.02907>`__
and can be described as below:
.. math::
h_i^{(l+1)} = \sigma(b^{(l)} + \sum_{j\in\mathcal{N}(i)}\frac{1}{c_{ij}}W^{(l)}h_j^{(l)})
where :math:`\mathcal{N}(i)` is the neighbor set of node :math:`i`. :math:`c_{ij}` is equal
to the product of the square root of node degrees:
:math:`\sqrt{|\mathcal{N}(i)|}\sqrt{|\mathcal{N}(j)|}`. :math:`\sigma` is an activation
function.
The model parameters are initialized as in the
`original implementation <https://github.com/tkipf/gcn/blob/master/gcn/layers.py>`__ where
the weight :math:`W^{(l)}` is initialized using Glorot uniform initialization
and the bias is initialized to be zero.
Notes
-----
Zero in degree nodes could lead to invalid normalizer. A common practice
to avoid this is to add a self-loop for each node in the graph, which
can be achieved by:
>>> g = ... # some DGLGraph
>>> g.add_edges(g.nodes(), g.nodes())
Parameters
----------
in_feats : int
Number of input features.
out_feats : int
Number of output features.
norm : bool, optional
If True, the normalizer :math:`c_{ij}` is applied. Default: ``True``.
bias : bool, optional
If True, adds a learnable bias to the output. Default: ``True``.
activation: callable activation function/layer or None, optional
If not None, applies an activation function to the updated node features.
Default: ``None``.
Attributes
----------
weight : mxnet.gluon.parameter.Parameter
The learnable weight tensor.
bias : mxnet.gluon.parameter.Parameter
The learnable bias tensor.
"""
def __init__(self,
in_feats,
out_feats,
norm=True,
bias=True,
activation=None):
super(GraphConv, self).__init__()
self._in_feats = in_feats
self._out_feats = out_feats
self._norm = norm
self._feat_name = "_gconv_feat"
self._msg_name = "_gconv_msg"
with self.name_scope():
self.weight = self.params.get('weight', shape=(in_feats, out_feats),
init=mx.init.Xavier())
if bias:
self.bias = self.params.get('bias', shape=(out_feats,),
init=mx.init.Zero())
else:
self.bias = None
self._activation = activation
def forward(self, feat, graph):
r"""Compute graph convolution.
Notes
-----
* Input shape: :math:`(N, *, \text{in_feats})` where * means any number of additional
dimensions, :math:`N` is the number of nodes.
* Output shape: :math:`(N, *, \text{out_feats})` where all but the last dimension are
the same shape as the input.
Parameters
----------
feat : mxnet.NDArray
The input feature
graph : DGLGraph
The graph.
Returns
-------
mxnet.NDArray
The output feature
"""
self._feat_name = get_ndata_name(graph, self._feat_name)
if self._norm:
degs = graph.in_degrees().astype('float32')
norm = mx.nd.power(degs, -0.5)
shp = norm.shape + (1,) * (feat.ndim - 1)
norm = norm.reshape(shp).as_in_context(feat.context)
feat = feat * norm
if self._in_feats > self._out_feats:
# mult W first to reduce the feature size for aggregation.
feat = mx.nd.dot(feat, self.weight.data(feat.context))
graph.ndata[self._feat_name] = feat
graph.update_all(fn.copy_src(src=self._feat_name, out=self._msg_name),
fn.sum(msg=self._msg_name, out=self._feat_name))
rst = graph.ndata.pop(self._feat_name)
else:
# aggregate first then mult W
graph.ndata[self._feat_name] = feat
graph.update_all(fn.copy_src(src=self._feat_name, out=self._msg_name),
fn.sum(msg=self._msg_name, out=self._feat_name))
rst = graph.ndata.pop(self._feat_name)
rst = mx.nd.dot(rst, self.weight.data(feat.context))
if self._norm:
rst = rst * norm
if self.bias is not None:
rst = rst + self.bias.data(rst.context)
if self._activation is not None:
rst = self._activation(rst)
return rst
def __repr__(self):
summary = 'GraphConv('
summary += 'in={:d}, out={:d}, normalization={}, activation={}'.format(
self._in_feats, self._out_feats,
self._norm, self._activation)
summary += '\n)'
return summary
from .gcn import GraphConvolutionLayer """Package for pytorch-specific NN modules."""
from .conv import *
from .softmax import *
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment