"docs/vscode:/vscode.git/clone" did not exist on "63f7796a72f9afbcba64f9bf0df11753ecc4558d"
Commit 9a0511c8 authored by Zihao Ye's avatar Zihao Ye Committed by Minjie Wang
Browse files

[NN] nn modules & examples update (#890)

* upd

* damn it

* fuck

* fuck pylint

* fudge

* remove some comments about MXNet

* upd

* upd

* damn it

* damn it

* fuck

* fuck

* upd

* upd

* pylint bastard

* upd

* upd

* upd

* upd

* upd

* upd

* upd

* upd

* upd
parent 7f65199a
......@@ -38,6 +38,106 @@ TAGConv
:members: forward
:show-inheritance:
GATConv
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: dgl.nn.mxnet.conv.GATConv
:members: forward
:show-inheritance:
EdgeConv
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: dgl.nn.mxnet.conv.EdgeConv
:members: forward
:show-inheritance:
SAGEConv
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: dgl.nn.mxnet.conv.SAGEConv
:members: forward
:show-inheritance:
SGConv
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: dgl.nn.mxnet.conv.SGConv
:members: forward
:show-inheritance:
APPNPConv
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: dgl.nn.mxnet.conv.APPNPConv
:members: forward
:show-inheritance:
GINConv
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: dgl.nn.mxnet.conv.GINConv
:members: forward
:show-inheritance:
GatedGraphConv
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: dgl.nn.mxnet.conv.GatedGraphConv
:members: forward
:show-inheritance:
GMMConv
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: dgl.nn.mxnet.conv.GMMConv
:members: forward
:show-inheritance:
ChebConv
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: dgl.nn.mxnet.conv.ChebConv
:members: forward
:show-inheritance:
AGNNConv
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: dgl.nn.mxnet.conv.AGNNConv
:members: forward
:show-inheritance:
NNConv
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: dgl.nn.mxnet.conv.NNConv
:members: forward
:show-inheritance
Dense Conv Layers
----------------------------------------
DenseGraphConv
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: dgl.nn.mxnet.conv.DenseGraphConv
:members: forward
:show-inheritance:
DenseSAGEConv
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: dgl.nn.mxnet.conv.DenseSAGEConv
:members: forward
:show-inheritance
DenseChebConv
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: dgl.nn.pytorch.conv.DenseChebConv
:members: forward
:show-inheritance:
Global Pooling Layers
----------------------------------------
......
Predict then Propagate: Graph Neural Networks meet Personalized PageRank (APPNP)
============
- Paper link: [Predict then Propagate: Graph Neural Networks meet Personalized PageRank](https://arxiv.org/abs/1810.05997)
- Author's code repo: [https://github.com/klicperajo/ppnp](https://github.com/klicperajo/ppnp).
Dependencies
------------
- MXNET 1.5+
- requests
``bash
pip install torch requests
``
Code
-----
The folder contains an implementation of APPNP (`appnp.py`).
Results
-------
Run with following (available dataset: "cora", "citeseer", "pubmed")
```bash
DGLBACKEND=mxnet python3 train.py --dataset cora --gpu 0
```
* cora: 0.8370 (paper: 0.850)
* citeseer: 0.713 (paper: 0.757)
* pubmed: 0.798 (paper: 0.797)
Experiments were done on dgl datasets (GCN settings) which are different from those used in the original implementation. (discrepancies are detailed in experimental section of the original paper)
\ No newline at end of file
import argparse, time
import numpy as np
import dgl
import mxnet as mx
from mxnet import nd, gluon
from mxnet.gluon import nn
from dgl import DGLGraph
from dgl.data import register_data_args, load_data
from dgl.nn.mxnet.conv import APPNPConv
class APPNP(nn.Block):
def __init__(self,
g,
in_feats,
hiddens,
n_classes,
activation,
feat_drop,
edge_drop,
alpha,
k):
super(APPNP, self).__init__()
self.g = g
with self.name_scope():
self.layers = nn.Sequential()
# input layer
self.layers.add(nn.Dense(hiddens[0], in_units=in_feats))
# hidden layers
for i in range(1, len(hiddens)):
self.layers.add(nn.Dense(hiddens[i], in_units=hiddens[i - 1]))
# output layer
self.layers.add(nn.Dense(n_classes, in_units=hiddens[-1]))
self.activation = activation
if feat_drop:
self.feat_drop = nn.Dropout(feat_drop)
else:
self.feat_drop = lambda x: x
self.propagate = APPNPConv(k, alpha, edge_drop)
def forward(self, features):
# prediction step
h = features
h = self.feat_drop(h)
h = self.activation(self.layers[0](h))
for layer in self.layers[1:-1]:
h = self.activation(layer(h))
h = self.layers[-1](self.feat_drop(h))
# propagation step
h = self.propagate(self.g, h)
return h
def evaluate(model, features, labels, mask):
pred = model(features).argmax(axis=1)
accuracy = ((pred == labels) * mask).sum() / mask.sum().asscalar()
return accuracy.asscalar()
def main(args):
# load and preprocess dataset
data = load_data(args)
features = nd.array(data.features)
labels = nd.array(data.labels)
train_mask = nd.array(data.train_mask)
val_mask = nd.array(data.val_mask)
test_mask = nd.array(data.test_mask)
in_feats = features.shape[1]
n_classes = data.num_labels
n_edges = data.graph.number_of_edges()
print("""----Data statistics------'
#Edges %d
#Classes %d
#Train samples %d
#Val samples %d
#Test samples %d""" %
(n_edges, n_classes,
train_mask.sum().asscalar(),
val_mask.sum().asscalar(),
test_mask.sum().asscalar()))
if args.gpu < 0:
ctx = mx.cpu()
else:
ctx = mx.gpu(args.gpu)
features = features.as_in_context(ctx)
labels = labels.as_in_context(ctx)
train_mask = train_mask.as_in_context(ctx)
val_mask = val_mask.as_in_context(ctx)
test_mask = test_mask.as_in_context(ctx)
# graph preprocess and calculate normalization factor
g = DGLGraph(data.graph)
n_edges = g.number_of_edges()
# add self loop
g.add_edges(g.nodes(), g.nodes())
g.set_n_initializer(dgl.init.zero_initializer)
g.set_e_initializer(dgl.init.zero_initializer)
# create APPNP model
model = APPNP(g,
in_feats,
args.hidden_sizes,
n_classes,
nd.relu,
args.in_drop,
args.edge_drop,
args.alpha,
args.k)
model.initialize(ctx=ctx)
n_train_samples = train_mask.sum().asscalar()
loss_fcn = gluon.loss.SoftmaxCELoss()
# use optimizer
print(model.collect_params())
trainer = gluon.Trainer(model.collect_params(), 'adam',
{'learning_rate': args.lr, 'wd': args.weight_decay})
# initialize graph
dur = []
for epoch in range(args.n_epochs):
if epoch >= 3:
t0 = time.time()
# forward
with mx.autograd.record():
pred = model(features)
loss = loss_fcn(pred, labels, mx.nd.expand_dims(train_mask, 1))
loss = loss.sum() / n_train_samples
loss.backward()
trainer.step(batch_size=1)
if epoch >= 3:
loss.asscalar()
dur.append(time.time() - t0)
acc = evaluate(model, features, labels, val_mask)
print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | "
"ETputs(KTEPS) {:.2f}". format(
epoch, np.mean(dur), loss.asscalar(), acc, n_edges / np.mean(dur) / 1000))
# test set accuracy
acc = evaluate(model, features, labels, test_mask)
print("Test accuracy {:.2%}".format(acc))
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='APPNP')
register_data_args(parser)
parser.add_argument("--in-drop", type=float, default=0.5,
help="input feature dropout")
parser.add_argument("--edge-drop", type=float, default=0.5,
help="edge propagation dropout")
parser.add_argument("--gpu", type=int, default=-1,
help="gpu")
parser.add_argument("--lr", type=float, default=1e-2,
help="learning rate")
parser.add_argument("--n-epochs", type=int, default=200,
help="number of training epochs")
parser.add_argument("--hidden_sizes", type=int, nargs='+', default=[64],
help="hidden unit sizes for appnp")
parser.add_argument("--k", type=int, default=10,
help="Number of propagation steps")
parser.add_argument("--alpha", type=float, default=0.1,
help="Teleport Probability")
parser.add_argument("--weight-decay", type=float, default=5e-4,
help="Weight for L2 loss")
args = parser.parse_args()
print(args)
main(args)
\ No newline at end of file
......@@ -7,87 +7,8 @@ Author's code: https://github.com/PetarV-/GAT
Pytorch implementation: https://github.com/Diego999/pyGAT
"""
import mxnet as mx
from mxnet import gluon
import mxnet.ndarray as nd
import mxnet.gluon.nn as nn
import dgl.function as fn
from dgl.nn.mxnet import edge_softmax
class GraphAttention(gluon.Block):
def __init__(self,
g,
in_dim,
out_dim,
num_heads,
feat_drop,
attn_drop,
alpha,
residual=False):
super(GraphAttention, self).__init__()
self.g = g
self.num_heads = num_heads
self.fc = nn.Dense(num_heads * out_dim, use_bias=False,
weight_initializer=mx.init.Xavier())
if feat_drop:
self.feat_drop = nn.Dropout(feat_drop)
else:
self.feat_drop = lambda x : x
if attn_drop:
self.attn_drop = nn.Dropout(attn_drop)
else:
self.attn_drop = lambda x : x
self.attn_l = self.params.get("left_att", grad_req="add",
shape=(1, num_heads, out_dim),
init=mx.init.Xavier())
self.attn_r = self.params.get("right_att", grad_req="add",
shape=(1, num_heads, out_dim),
init=mx.init.Xavier())
self.alpha = alpha
self.softmax = edge_softmax
self.residual = residual
if residual:
if in_dim != out_dim:
self.res_fc = nn.Dense(num_heads * out_dim, use_bias=False,
weight_initializer=mx.init.Xavier())
else:
self.res_fc = None
def forward(self, inputs):
# prepare
h = self.feat_drop(inputs) # NxD
ft = self.fc(h).reshape((h.shape[0], self.num_heads, -1)) # NxHxD'
a1 = (ft * self.attn_l.data(ft.context)).sum(axis=-1).expand_dims(-1) # N x H x 1
a2 = (ft * self.attn_r.data(ft.context)).sum(axis=-1).expand_dims(-1) # N x H x 1
self.g.ndata.update({'ft' : ft, 'a1' : a1, 'a2' : a2})
# 1. compute edge attention
self.g.apply_edges(self.edge_attention)
# 2. compute softmax
self.edge_softmax()
# 3. compute the aggregated node features
self.g.update_all(fn.src_mul_edge('ft', 'a_drop', 'ft'),
fn.sum('ft', 'ft'))
ret = self.g.ndata['ft']
# 4. residual
if self.residual:
if self.res_fc is not None:
resval = self.res_fc(h).reshape(
(h.shape[0], self.num_heads, -1)) # NxHxD'
else:
resval = nd.expand_dims(h, axis=1) # Nx1xD'
ret = resval + ret
return ret
def edge_attention(self, edges):
# an edge UDF to compute unnormalized attention values from src and dst
a = nd.LeakyReLU(edges.src['a1'] + edges.dst['a2'], slope=self.alpha)
return {'a' : a}
def edge_softmax(self):
attention = self.softmax(self.g, self.g.edata.pop('a'))
# Dropout attention scores and save them
self.g.edata['a_drop'] = self.attn_drop(attention)
from dgl.nn.mxnet.conv import GATConv
class GAT(nn.Block):
......@@ -109,18 +30,18 @@ class GAT(nn.Block):
self.gat_layers = []
self.activation = activation
# input projection (no residual)
self.gat_layers.append(GraphAttention(
g, in_dim, num_hidden, heads[0],
self.gat_layers.append(GATConv(
in_dim, num_hidden, heads[0],
feat_drop, attn_drop, alpha, False))
# hidden layers
for l in range(1, num_layers):
# due to multi-head, the in_dim = num_hidden * num_heads
self.gat_layers.append(GraphAttention(
g, num_hidden * heads[l-1], num_hidden, heads[l],
self.gat_layers.append(GATConv(
num_hidden * heads[l-1], num_hidden, heads[l],
feat_drop, attn_drop, alpha, residual))
# output projection
self.gat_layers.append(GraphAttention(
g, num_hidden * heads[-2], num_classes, heads[-1],
self.gat_layers.append(GATConv(
num_hidden * heads[-2], num_classes, heads[-1],
feat_drop, attn_drop, alpha, residual))
for i, layer in enumerate(self.gat_layers):
self.register_child(layer, "gat_layer_{}".format(i))
......@@ -128,8 +49,8 @@ class GAT(nn.Block):
def forward(self, inputs):
h = inputs
for l in range(self.num_layers):
h = self.gat_layers[l](h).flatten()
h = self.gat_layers[l](self.g, h).flatten()
h = self.activation(h)
# output projection
logits = self.gat_layers[-1](h).mean(1)
logits = self.gat_layers[-1](self.g, h).mean(1)
return logits
......@@ -25,6 +25,7 @@ def main(args):
train_mask = mx.nd.array(data.train_mask)
val_mask = mx.nd.array(data.val_mask)
test_mask = mx.nd.array(data.test_mask)
in_feats = features.shape[1]
n_classes = data.num_labels
n_edges = data.graph.number_of_edges()
......
Graph Isomorphism Network (GIN)
============
- Paper link: [arXiv](https://arxiv.org/abs/1810.00826) [OpenReview](https://openreview.net/forum?id=ryGs6iA5Km)
- Author's code repo: [https://github.com/weihua916/powerful-gnns](https://github.com/weihua916/powerful-gnns).
Dependencies
------------
- MXNet 1.5+
- sklearn
- tqdm
``bash
pip install torch sklearn tqdm
``
How to run
----------
An experiment on the GIN in default settings can be run with
```bash
DGLBACKEND=mxnet python main.py
```
An experiment on the GIN in customized settings can be run with
```bash
DGLBACKEND=mxnet python main.py [--device 0 | --disable-cuda] --dataset COLLAB \
--graph_pooling_type max --neighbor_pooling_type sum
```
Results
-------
Run with following with the double SUM pooling way:
(tested dataset: "MUTAG"(default), "COLLAB", "IMDBBINARY", "IMDBMULTI")
```bash
DGLBACKEND=mxnet python main.py --dataset MUTAG --device 0 \
--graph_pooling_type sum --neighbor_pooling_type sum
```
"""
MxNet compatible dataloader
"""
from mxnet.gluon.data import DataLoader, Sampler
import math
import numpy as np
from mxnet import nd
from sklearn.model_selection import StratifiedKFold
import dgl
class SubsetRandomSampler(Sampler):
def __init__(self, indices):
self.indices = indices
def __iter__(self):
return iter([self.indices[i] for i in np.random.permutation(len(self.indices))])
def __len__(self):
return len(self.indices)
# default collate function
def collate(samples):
# The input `samples` is a list of pairs (graph, label).
graphs, labels = map(list, zip(*samples))
for g in graphs:
# deal with node feats
for key in g.node_attr_schemes().keys():
g.ndata[key] = nd.array(g.ndata[key])
# no edge feats
batched_graph = dgl.batch(graphs)
labels = nd.array(labels)
return batched_graph, labels
class GraphDataLoader():
def __init__(self,
dataset,
batch_size,
collate_fn=collate,
seed=0,
shuffle=True,
split_name='fold10',
fold_idx=0,
split_ratio=0.7):
self.shuffle = shuffle
self.seed = seed
labels = [l for _, l in dataset]
if split_name == 'fold10':
train_idx, valid_idx = self._split_fold10(
labels, fold_idx, seed, shuffle)
elif split_name == 'rand':
train_idx, valid_idx = self._split_rand(
labels, split_ratio, seed, shuffle)
else:
raise NotImplementedError()
train_sampler = SubsetRandomSampler(train_idx)
valid_sampler = SubsetRandomSampler(valid_idx)
self.train_loader = DataLoader(
dataset, sampler=train_sampler,
batch_size=batch_size, batchify_fn=collate_fn)
self.valid_loader = DataLoader(
dataset, sampler=valid_sampler,
batch_size=batch_size, batchify_fn=collate_fn)
def train_valid_loader(self):
return self.train_loader, self.valid_loader
def _split_fold10(self, labels, fold_idx=0, seed=0, shuffle=True):
''' 10 flod '''
assert 0 <= fold_idx and fold_idx < 10, print(
"fold_idx must be from 0 to 9.")
skf = StratifiedKFold(n_splits=10, shuffle=shuffle, random_state=seed)
idx_list = []
for idx in skf.split(np.zeros(len(labels)), labels): # split(x, y)
idx_list.append(idx)
train_idx, valid_idx = idx_list[fold_idx]
print(
"train_set : test_set = %d : %d",
len(train_idx), len(valid_idx))
return train_idx, valid_idx
def _split_rand(self, labels, split_ratio=0.7, seed=0, shuffle=True):
num_entries = len(labels)
indices = list(range(num_entries))
np.random.seed(seed)
np.random.shuffle(indices)
split = int(math.floor(split_ratio * num_entries))
train_idx, valid_idx = indices[:split], indices[split:]
print(
"train_set : test_set = %d : %d",
len(train_idx), len(valid_idx))
return train_idx, valid_idx
"""
How Powerful are Graph Neural Networks
https://arxiv.org/abs/1810.00826
https://openreview.net/forum?id=ryGs6iA5Km
Author's implementation: https://github.com/weihua916/powerful-gnns
"""
import mxnet as mx
from mxnet import nd, gluon
from mxnet.gluon import nn
from dgl.nn.mxnet.conv import GINConv
from dgl.nn.mxnet.glob import SumPooling, AvgPooling, MaxPooling
class ApplyNodeFunc(nn.Block):
"""Update the node feature hv with MLP, BN and ReLU."""
def __init__(self, mlp):
super(ApplyNodeFunc, self).__init__()
with self.name_scope():
self.mlp = mlp
self.bn = nn.BatchNorm(in_channels=self.mlp.output_dim)
def forward(self, h):
h = self.mlp(h)
h = self.bn(h)
h = nd.relu(h)
return h
class MLP(nn.Block):
"""MLP with linear output"""
def __init__(self, num_layers, input_dim, hidden_dim, output_dim):
"""MLP layers construction
Paramters
---------
num_layers: int
The number of linear layers
input_dim: int
The dimensionality of input features
hidden_dim: int
The dimensionality of hidden units at ALL layers
output_dim: int
The number of classes for prediction
"""
super(MLP, self).__init__()
self.linear_or_not = True
self.num_layers = num_layers
self.output_dim = output_dim
with self.name_scope():
if num_layers < 1:
raise ValueError("number of layers should be positive!")
elif num_layers == 1:
# Linear model
self.linear = nn.Dense(output_dim, in_units=input_dim)
else:
self.linear_or_not = False
self.linears = nn.Sequential()
self.batch_norms = nn.Sequential()
self.linears.add(nn.Dense(hidden_dim, in_units=input_dim))
for layer in range(num_layers - 2):
self.linears.add(nn.Dense(hidden_dim, in_units=hidden_dim))
self.linears.add(nn.Dense(output_dim, in_units=hidden_dim))
for layer in range(num_layers - 1):
self.batch_norms.add(nn.BatchNorm(in_channels=hidden_dim))
def forward(self, x):
if self.linear_or_not:
return self.linear(x)
else:
h = x
for i in range(self.num_layers - 1):
h = nd.relu(self.batch_norms[i](self.linears[i](h)))
return self.linears[-1](h)
class GIN(nn.Block):
"""GIN model"""
def __init__(self, num_layers, num_mlp_layers, input_dim, hidden_dim,
output_dim, final_dropout, learn_eps, graph_pooling_type,
neighbor_pooling_type):
"""model parameters setting
Paramters
---------
num_layers: int
The number of linear layers in the neural network
num_mlp_layers: int
The number of linear layers in mlps
input_dim: int
The dimensionality of input features
hidden_dim: int
The dimensionality of hidden units at ALL layers
output_dim: int
The number of classes for prediction
final_dropout: float
dropout ratio on the final linear layer
learn_eps: boolean
If True, learn epsilon to distinguish center nodes from neighbors
If False, aggregate neighbors and center nodes altogether.
neighbor_pooling_type: str
how to aggregate neighbors (sum, mean, or max)
graph_pooling_type: str
how to aggregate entire nodes in a graph (sum, mean or max)
"""
super(GIN, self).__init__()
self.num_layers = num_layers
self.learn_eps = learn_eps
with self.name_scope():
# List of MLPs
self.ginlayers = nn.Sequential()
self.batch_norms = nn.Sequential()
for i in range(self.num_layers - 1):
if i == 0:
mlp = MLP(num_mlp_layers, input_dim, hidden_dim, hidden_dim)
else:
mlp = MLP(num_mlp_layers, hidden_dim, hidden_dim, hidden_dim)
self.ginlayers.add(
GINConv(ApplyNodeFunc(mlp), neighbor_pooling_type, 0, self.learn_eps))
self.batch_norms.add(nn.BatchNorm(in_channels=hidden_dim))
self.linears_prediction = nn.Sequential()
for i in range(num_layers):
if i == 0:
self.linears_prediction.add(nn.Dense(output_dim, in_units=input_dim))
else:
self.linears_prediction.add(nn.Dense(output_dim, in_units=hidden_dim))
self.drop = nn.Dropout(final_dropout)
if graph_pooling_type == 'sum':
self.pool = SumPooling()
elif graph_pooling_type == 'mean':
self.pool = AvgPooling()
elif graph_pooling_type == 'max':
self.pool = MaxPooling()
else:
raise NotImplementedError
def forward(self, g, h):
hidden_rep = [h]
for i in range(self.num_layers - 1):
h = self.ginlayers[i](g, h)
h = self.batch_norms[i](h)
h = nd.relu(h)
hidden_rep.append(h)
score_over_layer = 0
# perform pooling over all nodes in each graph in every layer
for i, h in enumerate(hidden_rep):
pooled_h = self.pool(g, h)
score_over_layer = score_over_layer + self.drop(self.linears_prediction[i](pooled_h))
return score_over_layer
import sys
import numpy as np
from tqdm import tqdm
import mxnet as mx
from mxnet import gluon, nd
from mxnet.gluon import nn
from dgl.data.gindt import GINDataset
from dataloader import GraphDataLoader, collate
from parser import Parser
from gin import GIN
def train(args, net, trainloader, trainer, criterion, epoch):
running_loss = 0
total_iters = len(trainloader)
# setup the offset to avoid the overlap with mouse cursor
bar = tqdm(range(total_iters), unit='batch', position=2, file=sys.stdout)
for pos, (graphs, labels) in zip(bar, trainloader):
# batch graphs will be shipped to device in forward part of model
labels = labels.as_in_context(args.device)
feat = graphs.ndata['attr'].astype('float32').as_in_context(args.device)
with mx.autograd.record():
outputs = net(graphs, feat)
loss = criterion(outputs, labels)
loss = loss.sum() / len(labels)
running_loss += loss.asscalar()
# backprop
loss.backward()
trainer.step(batch_size=1)
# report
bar.set_description('epoch-{}'.format(epoch))
bar.close()
# the final batch will be aligned
running_loss = running_loss / total_iters
return running_loss
def eval_net(args, net, dataloader, criterion):
total = 0
total_loss = 0
total_correct = 0
for data in dataloader:
graphs, labels = data
labels = labels.as_in_context(args.device)
feat = graphs.ndata['attr'].astype('float32').as_in_context(args.device)
total += len(labels)
outputs = net(graphs, feat)
predicted = nd.argmax(outputs, axis=1)
total_correct += (predicted == labels).sum().asscalar()
loss = criterion(outputs, labels)
# crossentropy(reduce=True) for default
total_loss += loss.sum().asscalar()
loss, acc = 1.0 * total_loss / total, 1.0*total_correct / total
return loss, acc
def main(args):
# set up seeds, args.seed supported
mx.random.seed(0)
np.random.seed(seed=0)
if args.device >= 0:
args.device = mx.gpu(args.device)
else:
args.device = mx.cpu()
dataset = GINDataset(args.dataset, not args.learn_eps)
trainloader, validloader = GraphDataLoader(
dataset, batch_size=args.batch_size,
collate_fn=collate, seed=args.seed, shuffle=True,
split_name='fold10', fold_idx=args.fold_idx).train_valid_loader()
# or split_name='rand', split_ratio=0.7
model = GIN(
args.num_layers, args.num_mlp_layers,
dataset.dim_nfeats, args.hidden_dim, dataset.gclasses,
args.final_dropout, args.learn_eps,
args.graph_pooling_type, args.neighbor_pooling_type)
model.initialize(ctx=args.device)
criterion = gluon.loss.SoftmaxCELoss()
print(model.collect_params())
lr_scheduler = mx.lr_scheduler.FactorScheduler(50, 0.5)
trainer = gluon.Trainer(model.collect_params(), 'adam',
{'lr_scheduler': lr_scheduler})
# it's not cost-effective to hanle the cursor and init 0
# https://stackoverflow.com/a/23121189
tbar = tqdm(range(args.epochs), unit="epoch", position=3, ncols=0, file=sys.stdout)
vbar = tqdm(range(args.epochs), unit="epoch", position=4, ncols=0, file=sys.stdout)
lrbar = tqdm(range(args.epochs), unit="epoch", position=5, ncols=0, file=sys.stdout)
for epoch, _, _ in zip(tbar, vbar, lrbar):
train(args, model, trainloader, trainer, criterion, epoch)
train_loss, train_acc = eval_net(
args, model, trainloader, criterion)
tbar.set_description(
'train set - average loss: {:.4f}, accuracy: {:.0f}%'
.format(train_loss, 100. * train_acc))
valid_loss, valid_acc = eval_net(
args, model, validloader, criterion)
vbar.set_description(
'valid set - average loss: {:.4f}, accuracy: {:.0f}%'
.format(valid_loss, 100. * valid_acc))
if not args.filename == "":
with open(args.filename, 'a') as f:
f.write('%s %s %s %s' % (
args.dataset,
args.learn_eps,
args.neighbor_pooling_type,
args.graph_pooling_type
))
f.write("\n")
f.write("%f %f %f %f" % (
train_loss,
train_acc,
valid_loss,
valid_acc
))
f.write("\n")
lrbar.set_description(
"Learning eps with learn_eps={}: {}".format(
args.learn_eps, [layer.eps.data(args.device).asscalar() for layer in model.ginlayers]))
tbar.close()
vbar.close()
lrbar.close()
if __name__ == '__main__':
args = Parser(description='GIN').args
print('show all arguments configuration...')
print(args)
main(args)
\ No newline at end of file
"""Parser for arguments
Put all arguments in one file and group similar arguments
"""
import argparse
class Parser():
def __init__(self, description):
'''
arguments parser
'''
self.parser = argparse.ArgumentParser(description=description)
self.args = None
self._parse()
def _parse(self):
# dataset
self.parser.add_argument(
'--dataset', type=str, default="MUTAG",
help='name of dataset (default: MUTAG)')
self.parser.add_argument(
'--batch_size', type=int, default=32,
help='batch size for training and validation (default: 32)')
self.parser.add_argument(
'--fold_idx', type=int, default=0,
help='the index(<10) of fold in 10-fold validation.')
self.parser.add_argument(
'--filename', type=str, default="",
help='output file')
# device
self.parser.add_argument(
'--disable-cuda', action='store_true',
help='Disable CUDA')
self.parser.add_argument(
'--device', type=int, default=0,
help='which gpu device to use (default: 0)')
# net
self.parser.add_argument(
'--net', type=str, default="gin",
help='gnn net (default: gin)')
self.parser.add_argument(
'--num_layers', type=int, default=5,
help='number of layers (default: 5)')
self.parser.add_argument(
'--num_mlp_layers', type=int, default=2,
help='number of MLP layers(default: 2). 1 means linear model.')
self.parser.add_argument(
'--hidden_dim', type=int, default=64,
help='number of hidden units (default: 64)')
# graph
self.parser.add_argument(
'--graph_pooling_type', type=str,
default="sum", choices=["sum", "mean", "max"],
help='type of graph pooling: sum, mean or max')
self.parser.add_argument(
'--neighbor_pooling_type', type=str,
default="sum", choices=["sum", "mean", "max"],
help='type of neighboring pooling: sum, mean or max')
self.parser.add_argument(
'--learn_eps', action="store_true",
help='learn the epsilon weighting')
self.parser.add_argument(
'--degree_as_tag', action="store_true",
help='take the degree of nodes as input feature')
# learning
self.parser.add_argument(
'--seed', type=int, default=0,
help='random seed (default: 0)')
self.parser.add_argument(
'--epochs', type=int, default=350,
help='number of epochs to train (default: 350)')
self.parser.add_argument(
'--lr', type=float, default=0.01,
help='learning rate (default: 0.01)')
self.parser.add_argument(
'--final_dropout', type=float, default=0.5,
help='final layer dropout (default: 0.5)')
# done
self.args = self.parser.parse_args()
\ No newline at end of file
Inductive Representation Learning on Large Graphs (GraphSAGE)
============
- Paper link: [http://papers.nips.cc/paper/6703-inductive-representation-learning-on-large-graphs.pdf](http://papers.nips.cc/paper/6703-inductive-representation-learning-on-large-graphs.pdf)
- Author's code repo: [https://github.com/williamleif/graphsage-simple](https://github.com/williamleif/graphsage-simple). Note that the original code is
simple reference implementation of GraphSAGE.
Requirements
------------
- requests
``bash
pip install requests
``
Results
-------
Run with following (available dataset: "cora", "citeseer", "pubmed")
```bash
python3 main.py --dataset cora --gpu 0
```
* cora: ~0.817
* citeseer: ~0.699
* pubmed: ~0.790
\ No newline at end of file
"""
Inductive Representation Learning on Large Graphs
Paper: http://papers.nips.cc/paper/6703-inductive-representation-learning-on-large-graphs.pdf
Code: https://github.com/williamleif/graphsage-simple
Simple reference implementation of GraphSAGE.
"""
import argparse
import time
import numpy as np
import networkx as nx
import mxnet as mx
from mxnet import nd, gluon
from mxnet.gluon import nn
from dgl import DGLGraph
from dgl.data import register_data_args, load_data
from dgl.nn.mxnet.conv import SAGEConv
class GraphSAGE(nn.Block):
def __init__(self,
g,
in_feats,
n_hidden,
n_classes,
n_layers,
activation,
dropout,
aggregator_type):
super(GraphSAGE, self).__init__()
self.g = g
with self.name_scope():
self.layers = nn.Sequential()
# input layer
self.layers.add(SAGEConv(in_feats, n_hidden, aggregator_type, feat_drop=dropout, activation=activation))
# hidden layers
for i in range(n_layers - 1):
self.layers.add(SAGEConv(n_hidden, n_hidden, aggregator_type, feat_drop=dropout, activation=activation))
# output layer
self.layers.add(SAGEConv(n_hidden, n_classes, aggregator_type, feat_drop=dropout, activation=None)) # activation None
def forward(self, features):
h = features
for layer in self.layers:
h = layer(self.g, h)
return h
def evaluate(model, features, labels, mask):
pred = model(features).argmax(axis=1)
accuracy = ((pred == labels) * mask).sum() / mask.sum().asscalar()
return accuracy.asscalar()
def main(args):
# load and preprocess dataset
data = load_data(args)
features = nd.array(data.features)
labels = nd.array(data.labels)
train_mask = nd.array(data.train_mask)
val_mask = nd.array(data.val_mask)
test_mask = nd.array(data.test_mask)
in_feats = features.shape[1]
n_classes = data.num_labels
n_edges = data.graph.number_of_edges()
print("""----Data statistics------'
#Edges %d
#Classes %d
#Train samples %d
#Val samples %d
#Test samples %d""" %
(n_edges, n_classes,
train_mask.sum().asscalar(),
val_mask.sum().asscalar(),
test_mask.sum().asscalar()))
if args.gpu < 0:
ctx = mx.cpu(0)
else:
ctx = mx.gpu(args.gpu)
print("use cuda:", args.gpu)
features = features.as_in_context(ctx)
labels = labels.as_in_context(ctx)
train_mask = train_mask.as_in_context(ctx)
val_mask = val_mask.as_in_context(ctx)
test_mask = test_mask.as_in_context(ctx)
# graph preprocess and calculate normalization factor
g = data.graph
g.remove_edges_from(nx.selfloop_edges(g))
g = DGLGraph(g)
n_edges = g.number_of_edges()
# create GraphSAGE model
model = GraphSAGE(g,
in_feats,
args.n_hidden,
n_classes,
args.n_layers,
nd.relu,
args.dropout,
args.aggregator_type
)
model.initialize(ctx=ctx)
n_train_samples = train_mask.sum().asscalar()
loss_fcn = gluon.loss.SoftmaxCELoss()
print(model.collect_params())
trainer = gluon.Trainer(model.collect_params(), 'adam',
{'learning_rate': args.lr, 'wd': args.weight_decay})
# initialize graph
dur = []
for epoch in range(args.n_epochs):
if epoch >= 3:
t0 = time.time()
# forward
with mx.autograd.record():
pred = model(features)
loss = loss_fcn(pred, labels, mx.nd.expand_dims(train_mask, 1))
loss = loss.sum() / n_train_samples
loss.backward()
trainer.step(batch_size=1)
if epoch >= 3:
loss.asscalar()
dur.append(time.time() - t0)
acc = evaluate(model, features, labels, val_mask)
print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | "
"ETputs(KTEPS) {:.2f}". format(
epoch, np.mean(dur), loss.asscalar(), acc, n_edges / np.mean(dur) / 1000))
# test set accuracy
acc = evaluate(model, features, labels, test_mask)
print("Test accuracy {:.2%}".format(acc))
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='GraphSAGE')
register_data_args(parser)
parser.add_argument("--dropout", type=float, default=0.5,
help="dropout probability")
parser.add_argument("--gpu", type=int, default=-1,
help="gpu")
parser.add_argument("--lr", type=float, default=1e-2,
help="learning rate")
parser.add_argument("--n-epochs", type=int, default=200,
help="number of training epochs")
parser.add_argument("--n-hidden", type=int, default=16,
help="number of hidden gcn units")
parser.add_argument("--n-layers", type=int, default=1,
help="number of hidden gcn layers")
parser.add_argument("--weight-decay", type=float, default=5e-4,
help="Weight for L2 loss")
parser.add_argument("--aggregator-type", type=str, default="gcn",
help="Aggregator type: mean/gcn/pool/lstm")
args = parser.parse_args()
print(args)
main(args)
\ No newline at end of file
MoNet
=====
- paper link: [Geometric deep learning on graphs and manifolds using mixture model CNNs](https://arxiv.org/pdf/1611.08402.pdf)
Dependencies
============
- MXNet 1.5+
Results
=======
Node classification on citation networks:
- Cora: ~0.814
- Pubmed: ~0.748
import argparse
import time
import numpy as np
import networkx as nx
import mxnet as mx
from mxnet import gluon, nd
from mxnet.gluon import nn
from dgl import DGLGraph
from dgl.data import register_data_args, load_data
from dgl.nn.mxnet.conv import GMMConv
class MoNet(nn.Block):
def __init__(self,
g,
in_feats,
n_hidden,
out_feats,
n_layers,
dim,
n_kernels):
super(MoNet, self).__init__()
self.g = g
with self.name_scope():
self.layers = nn.Sequential()
self.pseudo_proj = nn.Sequential()
# Input layer
self.layers.add(
GMMConv(in_feats, n_hidden, dim, n_kernels))
self.pseudo_proj.add(nn.Dense(dim, in_units=2, activation='tanh'))
# Hidden layer
for _ in range(n_layers - 1):
self.layers.add(GMMConv(n_hidden, n_hidden, dim, n_kernels))
self.pseudo_proj.add(nn.Dense(dim, in_units=2, activation='tanh'))
# Output layer
self.layers.add(GMMConv(n_hidden, out_feats, dim, n_kernels))
self.pseudo_proj.add(nn.Dense(dim, in_units=2, activation='tanh'))
def forward(self, feat, pseudo):
h = feat
for i in range(len(self.layers)):
h = self.layers[i](
self.g, h, self.pseudo_proj[i](pseudo))
return h
def evaluate(model, features, pseudo, labels, mask):
pred = model(features, pseudo).argmax(axis=1)
accuracy = ((pred == labels) * mask).sum() / mask.sum().asscalar()
return accuracy.asscalar()
def main(args):
# load and preprocess dataset
data = load_data(args)
features = nd.array(data.features)
labels = nd.array(data.labels)
train_mask = nd.array(data.train_mask)
val_mask = nd.array(data.val_mask)
test_mask = nd.array(data.test_mask)
in_feats = features.shape[1]
n_classes = data.num_labels
n_edges = data.graph.number_of_edges()
print("""----Data statistics------'
#Edges %d
#Classes %d
#Train samples %d
#Val samples %d
#Test samples %d""" %
(n_edges, n_classes,
train_mask.sum().asscalar(),
val_mask.sum().asscalar(),
test_mask.sum().asscalar()))
if args.gpu < 0:
ctx = mx.cpu(0)
else:
ctx = mx.gpu(args.gpu)
print("use cuda:", args.gpu)
features = features.as_in_context(ctx)
labels = labels.as_in_context(ctx)
train_mask = train_mask.as_in_context(ctx)
val_mask = val_mask.as_in_context(ctx)
test_mask = test_mask.as_in_context(ctx)
# graph preprocess and calculate normalization factor
g = data.graph
g.remove_edges_from(nx.selfloop_edges(g))
g = DGLGraph(g)
n_edges = g.number_of_edges()
us, vs = g.edges()
pseudo = []
for i in range(g.number_of_edges()):
pseudo.append([
1 / np.sqrt(g.in_degree(us[i].asscalar())),
1 / np.sqrt(g.in_degree(vs[i].asscalar()))
])
pseudo = nd.array(pseudo, ctx=ctx)
# create GraphSAGE model
model = MoNet(g,
in_feats,
args.n_hidden,
n_classes,
args.n_layers,
args.pseudo_dim,
args.n_kernels,
)
model.initialize(ctx=ctx)
n_train_samples = train_mask.sum().asscalar()
loss_fcn = gluon.loss.SoftmaxCELoss()
print(model.collect_params())
trainer = gluon.Trainer(model.collect_params(), 'adam',
{'learning_rate': args.lr, 'wd': args.weight_decay})
# initialize graph
dur = []
for epoch in range(args.n_epochs):
if epoch >= 3:
t0 = time.time()
# forward
with mx.autograd.record():
pred = model(features, pseudo)
loss = loss_fcn(pred, labels, mx.nd.expand_dims(train_mask, 1))
loss = loss.sum() / n_train_samples
loss.backward()
trainer.step(batch_size=1)
if epoch >= 3:
loss.asscalar()
dur.append(time.time() - t0)
acc = evaluate(model, features, pseudo, labels, val_mask)
print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | "
"ETputs(KTEPS) {:.2f}". format(
epoch, np.mean(dur), loss.asscalar(), acc, n_edges / np.mean(dur) / 1000))
# test set accuracy
acc = evaluate(model, features, pseudo, labels, test_mask)
print("Test accuracy {:.2%}".format(acc))
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='MoNet on citation network')
register_data_args(parser)
parser.add_argument("--dropout", type=float, default=0.5,
help="dropout probability")
parser.add_argument("--gpu", type=int, default=-1,
help="gpu")
parser.add_argument("--lr", type=float, default=1e-2,
help="learning rate")
parser.add_argument("--n-epochs", type=int, default=200,
help="number of training epochs")
parser.add_argument("--n-hidden", type=int, default=16,
help="number of hidden gcn units")
parser.add_argument("--n-layers", type=int, default=1,
help="number of hidden gcn layers")
parser.add_argument("--pseudo-dim", type=int, default=2,
help="Pseudo coordinate dimensions in GMMConv, 2 for cora and 3 for pubmed")
parser.add_argument("--n-kernels", type=int, default=3,
help="Number of kernels in GMMConv layer")
parser.add_argument("--weight-decay", type=float, default=5e-5,
help="Weight for L2 loss")
args = parser.parse_args()
print(args)
main(args)
\ No newline at end of file
Simple Graph Convolution (SGC)
============
- Paper link: [Simplifying Graph Convolutional Networks](https://arxiv.org/abs/1902.07153)
- Author's code repo: [https://github.com/Tiiiger/SGC](https://github.com/Tiiiger/SGC).
Dependencies
------------
- MXNET 1.5+
- requests
``bash
pip install torch requests
``
Codes
-----
The folder contains an implementation of SGC (`sgc.py`).
Results
-------
Run with following (available dataset: "cora", "citeseer", "pubmed")
```bash
DGLBACKEND=mxnet python3 sgc.py --dataset cora --gpu 0
DGLBACKEND=mxnet python3 sgc.py --dataset citeseer --weight-decay 5e-5 --n-epochs 150 --bias --gpu 0
DGLBACKEND=mxnet python3 sgc.py --dataset pubmed --weight-decay 5e-5 --bias --gpu 0
```
On NVIDIA V100
* cora: 0.818 (paper: 0.810)
* citeseer: 0.725 (paper: 0.719)
* pubmed: 0.788 (paper: 0.789)
"""
This code was modified from the GCN implementation in DGL examples.
Simplifying Graph Convolutional Networks
Paper: https://arxiv.org/abs/1902.07153
Code: https://github.com/Tiiiger/SGC
SGC implementation in DGL.
"""
import argparse, time, math
import numpy as np
import mxnet as mx
from mxnet import nd, gluon
from mxnet.gluon import nn
from dgl import DGLGraph
from dgl.data import register_data_args, load_data
from dgl.nn.mxnet.conv import SGConv
def evaluate(model, g, features, labels, mask):
pred = model(g, features).argmax(axis=1)
accuracy = ((pred == labels) * mask).sum() / mask.sum().asscalar()
return accuracy.asscalar()
def main(args):
# load and preprocess dataset
data = load_data(args)
features = nd.array(data.features)
labels = nd.array(data.labels)
train_mask = nd.array(data.train_mask)
val_mask = nd.array(data.val_mask)
test_mask = nd.array(data.test_mask)
in_feats = features.shape[1]
n_classes = data.num_labels
n_edges = data.graph.number_of_edges()
print("""----Data statistics------'
#Edges %d
#Classes %d
#Train samples %d
#Val samples %d
#Test samples %d""" %
(n_edges, n_classes,
train_mask.sum().asscalar(),
val_mask.sum().asscalar(),
test_mask.sum().asscalar()))
if args.gpu < 0:
ctx = mx.cpu(0)
else:
ctx = mx.gpu(args.gpu)
features = features.as_in_context(ctx)
labels = labels.as_in_context(ctx)
train_mask = train_mask.as_in_context(ctx)
val_mask = val_mask.as_in_context(ctx)
test_mask = test_mask.as_in_context(ctx)
# graph preprocess and calculate normalization factor
g = DGLGraph(data.graph)
n_edges = g.number_of_edges()
# add self loop
g.add_edges(g.nodes(), g.nodes())
# create SGC model
model = SGConv(in_feats,
n_classes,
k=2,
cached=True,
bias=args.bias)
model.initialize(ctx=ctx)
n_train_samples = train_mask.sum().asscalar()
loss_fcn = gluon.loss.SoftmaxCELoss()
# use optimizer
print(model.collect_params())
trainer = gluon.Trainer(model.collect_params(), 'adam',
{'learning_rate': args.lr, 'wd': args.weight_decay})
# initialize graph
dur = []
for epoch in range(args.n_epochs):
if epoch >= 3:
t0 = time.time()
# forward
with mx.autograd.record():
pred = model(g, features)
loss = loss_fcn(pred, labels, mx.nd.expand_dims(train_mask, 1))
loss = loss.sum() / n_train_samples
loss.backward()
trainer.step(batch_size=1)
if epoch >= 3:
loss.asscalar()
dur.append(time.time() - t0)
acc = evaluate(model, g, features, labels, val_mask)
print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | "
"ETputs(KTEPS) {:.2f}". format(
epoch, np.mean(dur), loss.asscalar(), acc, n_edges / np.mean(dur) / 1000))
# test set accuracy
acc = evaluate(model, g, features, labels, test_mask)
print("Test accuracy {:.2%}".format(acc))
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='SGC')
register_data_args(parser)
parser.add_argument("--gpu", type=int, default=-1,
help="gpu")
parser.add_argument("--lr", type=float, default=0.2,
help="learning rate")
parser.add_argument("--bias", action='store_true', default=False,
help="flag to use bias")
parser.add_argument("--n-epochs", type=int, default=100,
help="number of training epochs")
parser.add_argument("--weight-decay", type=float, default=5e-6,
help="Weight for L2 loss")
args = parser.parse_args()
print(args)
main(args)
\ No newline at end of file
......@@ -5,9 +5,7 @@ References
Paper: https://arxiv.org/abs/1810.05997
Author's code: https://github.com/klicperajo/ppnp
"""
import torch
import torch.nn as nn
import dgl.function as fn
from dgl.nn.pytorch.conv import APPNPConv
......
......@@ -65,13 +65,6 @@ def main(args):
g.add_edges(g.nodes(), g.nodes())
g.set_n_initializer(dgl.init.zero_initializer)
g.set_e_initializer(dgl.init.zero_initializer)
# normalization
degs = g.in_degrees().float()
norm = torch.pow(degs, -0.5)
norm[torch.isinf(norm)] = 0
if cuda:
norm = norm.cuda()
g.ndata['norm'] = norm.unsqueeze(1)
# create APPNP model
model = APPNP(g,
......
"""
PyTorch compatible dataloader
"""
......@@ -19,12 +18,8 @@ def collate(samples):
graphs, labels = map(list, zip(*samples))
for g in graphs:
# deal with node feats
for feat in g.node_attr_schemes().keys():
# TODO torch.Tensor is not recommended
# torch.DoubleTensor and torch.tensor
# will meet error in executor.py@runtime line 472, tensor.py@backend line 147
# RuntimeError: expected type torch.cuda.DoubleTensor but got torch.cuda.FloatTensor
g.ndata[feat] = torch.Tensor(g.ndata[feat])
for key in g.node_attr_schemes().keys():
g.ndata[key] = torch.from_numpy(g.ndata[key]).float()
# no edge feats
batched_graph = dgl.batch(graphs)
labels = torch.tensor(labels)
......@@ -63,10 +58,10 @@ class GraphDataLoader():
self.train_loader = DataLoader(
dataset, sampler=train_sampler,
batch_size=batch_size, collate_fn=collate, **self.kwargs)
batch_size=batch_size, collate_fn=collate_fn, **self.kwargs)
self.valid_loader = DataLoader(
dataset, sampler=valid_sampler,
batch_size=batch_size, collate_fn=collate, **self.kwargs)
batch_size=batch_size, collate_fn=collate_fn, **self.kwargs)
def train_valid_loader(self):
return self.train_loader, self.valid_loader
......@@ -76,7 +71,6 @@ class GraphDataLoader():
assert 0 <= fold_idx and fold_idx < 10, print(
"fold_idx must be from 0 to 9.")
idx_list = []
skf = StratifiedKFold(n_splits=10, shuffle=shuffle, random_state=seed)
idx_list = []
for idx in skf.split(np.zeros(len(labels)), labels): # split(x, y)
......
......@@ -10,9 +10,7 @@ import torch
import torch.nn as nn
import torch.nn.functional as F
from dgl.nn.pytorch.conv import GINConv
import dgl
import dgl.function as fn
from dgl.nn.pytorch.glob import SumPooling, AvgPooling, MaxPooling
class ApplyNodeFunc(nn.Module):
......@@ -77,16 +75,16 @@ class MLP(nn.Module):
else:
# If MLP
h = x
for layer in range(self.num_layers - 1):
h = F.relu(self.batch_norms[layer](self.linears[layer](h)))
return self.linears[self.num_layers - 1](h)
for i in range(self.num_layers - 1):
h = F.relu(self.batch_norms[i](self.linears[i](h)))
return self.linears[-1](h)
class GIN(nn.Module):
"""GIN model"""
def __init__(self, num_layers, num_mlp_layers, input_dim, hidden_dim,
output_dim, final_dropout, learn_eps, graph_pooling_type,
neighbor_pooling_type, device):
neighbor_pooling_type):
"""model parameters setting
Paramters
......@@ -110,15 +108,10 @@ class GIN(nn.Module):
how to aggregate neighbors (sum, mean, or max)
graph_pooling_type: str
how to aggregate entire nodes in a graph (sum, mean or max)
device: str
which device to use
"""
super(GIN, self).__init__()
self.final_dropout = final_dropout
self.device = device
self.num_layers = num_layers
self.graph_pooling_type = graph_pooling_type
self.learn_eps = learn_eps
# List of MLPs
......@@ -147,36 +140,32 @@ class GIN(nn.Module):
self.linears_prediction.append(
nn.Linear(hidden_dim, output_dim))
def forward(self, g):
h = g.ndata['attr']
h = h.to(self.device)
self.drop = nn.Dropout(final_dropout)
if graph_pooling_type == 'sum':
self.pool = SumPooling()
elif graph_pooling_type == 'mean':
self.pool = AvgPooling()
elif graph_pooling_type == 'max':
self.pool = MaxPooling()
else:
raise NotImplementedError
def forward(self, g, h):
# list of hidden representation at each layer (including input)
hidden_rep = [h]
for layer in range(self.num_layers - 1):
h = self.ginlayers[layer](g, h)
h = self.batch_norms[layer](h)
for i in range(self.num_layers - 1):
h = self.ginlayers[i](g, h)
h = self.batch_norms[i](h)
h = F.relu(h)
hidden_rep.append(h)
score_over_layer = 0
# perform pooling over all nodes in each graph in every layer
for layer, h in enumerate(hidden_rep):
g.ndata['h'] = h
if self.graph_pooling_type == 'sum':
pooled_h = dgl.sum_nodes(g, 'h')
elif self.graph_pooling_type == 'mean':
pooled_h = dgl.mean_nodes(g, 'h')
elif self.graph_pooling_type == 'max':
pooled_h = dgl.max_nodes(g, 'h')
else:
raise NotImplementedError()
score_over_layer += F.dropout(
self.linears_prediction[layer](pooled_h),
self.final_dropout,
training=self.training)
for i, h in enumerate(hidden_rep):
pooled_h = self.pool(g, h)
score_over_layer += self.drop(self.linears_prediction[i](pooled_h))
return score_over_layer
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment