"examples/pytorch/argo/ogb_example_ARGO.py" did not exist on "704bcaf6ddc77a6e1e8ecea0beec76eed73fe826"
Unverified Commit 650f6ee1 authored by Zihao Ye's avatar Zihao Ye Committed by GitHub
Browse files

[NN] Add commonly used GNN models from examples to dgl.nn modules. (#748)

* gat

* upd

* upd sage

* upd

* upd

* upd

* upd

* upd

* add gmmconv

* upd ggnn

* upd

* upd

* upd

* upd

* add citation examples

* add README

* fix cheb

* improve doc

* formula

* upd

* trigger

* lint

* lint

* upd

* add test for transform

* add test

* check

* upd

* improve doc

* shape check

* upd

* densechebconv, currently not correct (?)

* fix cheb

* fix

* upd

* upd sgc-reddit

* upd

* trigger
parent 8079d986
......@@ -14,6 +14,32 @@ Message functions
copy_src
copy_edge
src_mul_edge
copy_u
copy_e
u_add_v
u_sub_v
u_mul_v
u_div_v
u_add_e
u_sub_e
u_mul_e
u_div_e
v_add_u
v_sub_u
v_mul_u
v_div_u
v_add_e
v_sub_e
v_mul_e
v_div_e
e_add_u
e_sub_u
e_mul_u
e_div_u
e_add_v
e_sub_v
e_mul_v
e_div_v
Reduce functions
----------------
......@@ -23,3 +49,6 @@ Reduce functions
sum
max
min
prod
mean
......@@ -16,6 +16,62 @@ dgl.nn.pytorch.conv
:members: forward
:show-inheritance:
.. autoclass:: dgl.nn.pytorch.conv.TAGConv
:members: forward
:show-inheritance:
.. autoclass:: dgl.nn.pytorch.conv.GATConv
:members: forward
:show-inheritance:
.. autoclass:: dgl.nn.pytorch.conv.SAGEConv
:members: forward
:show-inheritance:
.. autoclass:: dgl.nn.pytorch.conv.SGConv
:members: forward
:show-inheritance:
.. autoclass:: dgl.nn.pytorch.conv.APPNPConv
:members: forward
:show-inheritance:
.. autoclass:: dgl.nn.pytorch.conv.GINConv
:members: forward
:show-inheritance:
.. autoclass:: dgl.nn.pytorch.conv.GatedGraphConv
:members: forward
:show-inheritance:
.. autoclass:: dgl.nn.pytorch.conv.GMMConv
:members: forward
:show-inheritance:
.. autoclass:: dgl.nn.pytorch.conv.ChebConv
:members: forward
:show-inheritance:
.. autoclass:: dgl.nn.pytorch.conv.AGNNConv
:members: forward
:show-inheritance:
.. autoclass:: dgl.nn.pytorch.conv.NNConv
:members: forward
:show-inheritance:
.. autoclass:: dgl.nn.pytorch.conv.DenseGraphConv
:members: forward
:show-inheritance:
.. autoclass:: dgl.nn.pytorch.conv.DenseSAGEConv
:members: forward
:show-inheritance:
.. autoclass:: dgl.nn.pytorch.conv.DenseChebConv
:members: forward
:show-inheritance:
dgl.nn.pytorch.glob
-------------------
.. automodule:: dgl.nn.pytorch.glob
......
......@@ -12,3 +12,6 @@ Transform -- Graph Transformation
reverse
to_simple_graph
to_bidirected
khop_adj
khop_graph
laplacian_lambda_max
......@@ -8,44 +8,7 @@ Author's code: https://github.com/klicperajo/ppnp
import torch
import torch.nn as nn
import dgl.function as fn
class GraphPropagation(nn.Module):
def __init__(self,
g,
edge_drop,
alpha,
k):
super(GraphPropagation, self).__init__()
self.g = g
self.alpha = alpha
self.k = k
if edge_drop:
self.edge_drop = nn.Dropout(edge_drop)
else:
self.edge_drop = 0.
def forward(self, h):
self.cached_h = h
for _ in range(self.k):
# normalization by square root of src degree
h = h * self.g.ndata['norm']
self.g.ndata['h'] = h
if self.edge_drop:
# performing edge dropout
ed = self.edge_drop(torch.ones((self.g.number_of_edges(), 1), device=h.device))
self.g.edata['d'] = ed
self.g.update_all(fn.src_mul_edge(src='h', edge='d', out='m'),
fn.sum(msg='m', out='h'))
else:
self.g.update_all(fn.copy_src(src='h', out='m'),
fn.sum(msg='m', out='h'))
h = self.g.ndata.pop('h')
# normalization by square root of dst degree
h = h * self.g.ndata['norm']
# update h using teleport probability alpha
h = h * (1 - self.alpha) + self.cached_h * self.alpha
return h
from dgl.nn.pytorch.conv import APPNPConv
class APPNP(nn.Module):
......@@ -60,6 +23,7 @@ class APPNP(nn.Module):
alpha,
k):
super(APPNP, self).__init__()
self.g = g
self.layers = nn.ModuleList()
# input layer
self.layers.append(nn.Linear(in_feats, hiddens[0]))
......@@ -73,7 +37,7 @@ class APPNP(nn.Module):
self.feat_drop = nn.Dropout(feat_drop)
else:
self.feat_drop = lambda x: x
self.propagate = GraphPropagation(g, edge_drop, alpha, k)
self.propagate = APPNPConv(k, alpha, edge_drop)
self.reset_parameters()
def reset_parameters(self):
......@@ -89,5 +53,5 @@ class APPNP(nn.Module):
h = self.activation(layer(h))
h = self.layers[-1](self.feat_drop(h))
# propagation step
h = self.propagate(h)
h = self.propagate(h, self.g)
return h
......@@ -10,78 +10,8 @@ Pytorch implementation: https://github.com/Diego999/pyGAT
import torch
import torch.nn as nn
import dgl.function as fn
from dgl.nn.pytorch import edge_softmax
from dgl.nn.pytorch import edge_softmax, GATConv
class GraphAttention(nn.Module):
def __init__(self,
g,
in_dim,
out_dim,
num_heads,
feat_drop,
attn_drop,
alpha,
residual=False):
super(GraphAttention, self).__init__()
self.g = g
self.num_heads = num_heads
self.fc = nn.Linear(in_dim, num_heads * out_dim, bias=False)
if feat_drop:
self.feat_drop = nn.Dropout(feat_drop)
else:
self.feat_drop = lambda x : x
if attn_drop:
self.attn_drop = nn.Dropout(attn_drop)
else:
self.attn_drop = lambda x : x
self.attn_l = nn.Parameter(torch.Tensor(size=(1, num_heads, out_dim)))
self.attn_r = nn.Parameter(torch.Tensor(size=(1, num_heads, out_dim)))
nn.init.xavier_normal_(self.fc.weight.data, gain=1.414)
nn.init.xavier_normal_(self.attn_l.data, gain=1.414)
nn.init.xavier_normal_(self.attn_r.data, gain=1.414)
self.leaky_relu = nn.LeakyReLU(alpha)
self.softmax = edge_softmax
self.residual = residual
if residual:
if in_dim != out_dim:
self.res_fc = nn.Linear(in_dim, num_heads * out_dim, bias=False)
nn.init.xavier_normal_(self.res_fc.weight.data, gain=1.414)
else:
self.res_fc = None
def forward(self, inputs):
# prepare
h = self.feat_drop(inputs) # NxD
ft = self.fc(h).reshape((h.shape[0], self.num_heads, -1)) # NxHxD'
a1 = (ft * self.attn_l).sum(dim=-1).unsqueeze(-1) # N x H x 1
a2 = (ft * self.attn_r).sum(dim=-1).unsqueeze(-1) # N x H x 1
self.g.ndata.update({'ft' : ft, 'a1' : a1, 'a2' : a2})
# 1. compute edge attention
self.g.apply_edges(self.edge_attention)
# 2. compute softmax
self.edge_softmax()
# 3. compute the aggregated node features scaled by the dropped,
# unnormalized attention values.
self.g.update_all(fn.src_mul_edge('ft', 'a_drop', 'ft'), fn.sum('ft', 'ft'))
ret = self.g.ndata['ft']
# 4. residual
if self.residual:
if self.res_fc is not None:
resval = self.res_fc(h).reshape((h.shape[0], self.num_heads, -1)) # NxHxD'
else:
resval = torch.unsqueeze(h, 1) # Nx1xD'
ret = resval + ret
return ret
def edge_attention(self, edges):
# an edge UDF to compute unnormalized attention values from src and dst
a = self.leaky_relu(edges.src['a1'] + edges.dst['a2'])
return {'a' : a}
def edge_softmax(self):
attention = self.softmax(self.g, self.g.edata.pop('a'))
# Dropout attention scores and save them
self.g.edata['a_drop'] = self.attn_drop(attention)
class GAT(nn.Module):
def __init__(self,
......@@ -94,7 +24,7 @@ class GAT(nn.Module):
activation,
feat_drop,
attn_drop,
alpha,
negative_slope,
residual):
super(GAT, self).__init__()
self.g = g
......@@ -102,24 +32,24 @@ class GAT(nn.Module):
self.gat_layers = nn.ModuleList()
self.activation = activation
# input projection (no residual)
self.gat_layers.append(GraphAttention(
g, in_dim, num_hidden, heads[0], feat_drop, attn_drop, alpha, False))
self.gat_layers.append(GATConv(
in_dim, num_hidden, heads[0],
feat_drop, attn_drop, negative_slope, False, self.activation))
# hidden layers
for l in range(1, num_layers):
# due to multi-head, the in_dim = num_hidden * num_heads
self.gat_layers.append(GraphAttention(
g, num_hidden * heads[l-1], num_hidden, heads[l],
feat_drop, attn_drop, alpha, residual))
self.gat_layers.append(GATConv(
num_hidden * heads[l-1], num_hidden, heads[l],
feat_drop, attn_drop, negative_slope, residual, self.activation))
# output projection
self.gat_layers.append(GraphAttention(
g, num_hidden * heads[-2], num_classes, heads[-1],
feat_drop, attn_drop, alpha, residual))
self.gat_layers.append(GATConv(
num_hidden * heads[-2], num_classes, heads[-1],
feat_drop, attn_drop, negative_slope, residual, None))
def forward(self, inputs):
h = inputs
for l in range(self.num_layers):
h = self.gat_layers[l](h).flatten(1)
h = self.activation(h)
h = self.gat_layers[l](h, self.g).flatten(1)
# output projection
logits = self.gat_layers[-1](h).mean(1)
logits = self.gat_layers[-1](h, self.g).mean(1)
return logits
......@@ -86,7 +86,7 @@ def main(args):
F.elu,
args.in_drop,
args.attn_drop,
args.alpha,
args.negative_slope,
args.residual)
print(model)
stopper = EarlyStopping(patience=100)
......@@ -161,8 +161,8 @@ if __name__ == '__main__':
help="learning rate")
parser.add_argument('--weight-decay', type=float, default=5e-4,
help="weight decay")
parser.add_argument('--alpha', type=float, default=0.2,
help="the negative slop of leaky relu")
parser.add_argument('--negative-slope', type=float, default=0.2,
help="the negative slope of leaky relu")
parser.add_argument('--fastmode', action="store_true", default=False,
help="skip re-evaluate the validation set")
args = parser.parse_args()
......
......@@ -9,72 +9,24 @@ Author's implementation: https://github.com/weihua916/powerful-gnns
import torch
import torch.nn as nn
import torch.nn.functional as F
from dgl.nn.pytorch.conv import GINConv
import dgl
import dgl.function as fn
# Sends a message of node feature h.
msg = fn.copy_src(src='h', out='m')
reduce_sum = fn.sum(msg='m', out='h')
reduce_max = fn.max(msg='m', out='h')
def reduce_mean(nodes):
return {'h': torch.mean(nodes.mailbox['m'], dim=1)[0]}
class ApplyNodes(nn.Module):
class ApplyNodeFunc(nn.Module):
"""Update the node feature hv with MLP, BN and ReLU."""
def __init__(self, mlp, layer):
super(ApplyNodes, self).__init__()
def __init__(self, mlp):
super(ApplyNodeFunc, self).__init__()
self.mlp = mlp
self.bn = nn.BatchNorm1d(self.mlp.output_dim)
self.layer = layer
def forward(self, nodes):
h = self.mlp(nodes.data['h'])
def forward(self, h):
h = self.mlp(h)
h = self.bn(h)
h = F.relu(h)
return {'h': h}
class GINLayer(nn.Module):
"""Neighbor pooling and reweight nodes before send graph into MLP"""
def __init__(self, eps, layer, mlp, neighbor_pooling_type, learn_eps):
super(GINLayer, self).__init__()
self.bn = nn.BatchNorm1d(mlp.output_dim)
self.neighbor_pooling_type = neighbor_pooling_type
self.eps = eps
self.learn_eps = learn_eps
self.layer = layer
self.apply_mod = ApplyNodes(mlp, layer)
def forward(self, g, feature):
g.ndata['h'] = feature
if self.neighbor_pooling_type == 'sum':
reduce_func = reduce_sum
elif self.neighbor_pooling_type == 'mean':
reduce_func = reduce_mean
elif self.neighbor_pooling_type == 'max':
reduce_func = reduce_max
else:
raise NotImplementedError()
h = feature # h0
g.update_all(msg, reduce_func)
pooled = g.ndata['h']
# reweight the center node when aggregating it with its neighbors
if self.learn_eps:
pooled = pooled + (1 + self.eps[self.layer])*h
g.ndata['h'] = pooled
g.apply_nodes(func=self.apply_mod)
return g.ndata.pop('h')
return h
class MLP(nn.Module):
......@@ -168,7 +120,6 @@ class GIN(nn.Module):
self.num_layers = num_layers
self.graph_pooling_type = graph_pooling_type
self.learn_eps = learn_eps
self.eps = nn.Parameter(torch.zeros(self.num_layers - 1))
# List of MLPs
self.ginlayers = torch.nn.ModuleList()
......@@ -180,8 +131,8 @@ class GIN(nn.Module):
else:
mlp = MLP(num_mlp_layers, hidden_dim, hidden_dim, hidden_dim)
self.ginlayers.append(GINLayer(
self.eps, layer, mlp, neighbor_pooling_type, self.learn_eps))
self.ginlayers.append(
GINConv(ApplyNodeFunc(mlp), neighbor_pooling_type, 0, self.learn_eps))
self.batch_norms.append(nn.BatchNorm1d(hidden_dim))
# Linear function for graph poolings of output of each layer
......@@ -204,7 +155,7 @@ class GIN(nn.Module):
hidden_rep = [h]
for layer in range(self.num_layers - 1):
h = self.ginlayers[layer](g, h)
h = self.ginlayers[layer](h, g)
hidden_rep.append(h)
score_over_layer = 0
......
......@@ -148,7 +148,7 @@ def main(args):
lrbar.set_description(
"the learning eps with learn_eps={} is: {}".format(
args.learn_eps, model.eps.data))
args.learn_eps, [layer.eps.data for layer in model.ginlayers]))
tbar.close()
vbar.close()
......
......@@ -22,6 +22,6 @@ Run with following (available dataset: "cora", "citeseer", "pubmed")
python3 graphsage.py --dataset cora --gpu 0
```
* cora: ~0.8470
* citeseer: ~0.6870
* pubmed: ~0.7730
* cora: ~0.8330
* citeseer: ~0.7110
* pubmed: ~0.7830
......@@ -13,100 +13,7 @@ import torch.nn as nn
import torch.nn.functional as F
from dgl import DGLGraph
from dgl.data import register_data_args, load_data
import dgl.function as fn
class Aggregator(nn.Module):
def __init__(self, g, in_feats, out_feats, activation=None, bias=True):
super(Aggregator, self).__init__()
self.g = g
self.linear = nn.Linear(in_feats, out_feats, bias=bias) # (F, EF) or (2F, EF)
self.activation = activation
nn.init.xavier_uniform_(self.linear.weight, gain=nn.init.calculate_gain('relu'))
def forward(self, node):
nei = node.mailbox['m'] # (B, N, F)
h = node.data['h'] # (B, F)
h = self.concat(h, nei, node) # (B, F) or (B, 2F)
h = self.linear(h) # (B, EF)
if self.activation:
h = self.activation(h)
norm = torch.pow(h, 2)
norm = torch.sum(norm, 1, keepdim=True)
norm = torch.pow(norm, -0.5)
norm[torch.isinf(norm)] = 0
# h = h * norm
return {'h': h}
@abc.abstractmethod
def concat(self, h, nei, nodes):
raise NotImplementedError
class MeanAggregator(Aggregator):
def __init__(self, g, in_feats, out_feats, activation, bias):
super(MeanAggregator, self).__init__(g, in_feats, out_feats, activation, bias)
def concat(self, h, nei, nodes):
degs = self.g.in_degrees(nodes.nodes()).float()
if h.is_cuda:
degs = degs.cuda(h.device)
concatenate = torch.cat((nei, h.unsqueeze(1)), 1)
concatenate = torch.sum(concatenate, 1) / degs.unsqueeze(1)
return concatenate # (B, F)
class PoolingAggregator(Aggregator):
def __init__(self, g, in_feats, out_feats, activation, bias): # (2F, F)
super(PoolingAggregator, self).__init__(g, in_feats*2, out_feats, activation, bias)
self.mlp = PoolingAggregator.MLP(in_feats, in_feats, F.relu, False, True)
def concat(self, h, nei, nodes):
nei = self.mlp(nei) # (B, F)
concatenate = torch.cat((nei, h), 1) # (B, 2F)
return concatenate
class MLP(nn.Module):
def __init__(self, in_feats, out_feats, activation, dropout, bias): # (F, F)
super(PoolingAggregator.MLP, self).__init__()
self.linear = nn.Linear(in_feats, out_feats, bias=bias) # (F, F)
self.dropout = nn.Dropout(p=dropout)
self.activation = activation
nn.init.xavier_uniform_(self.linear.weight, gain=nn.init.calculate_gain('relu'))
def forward(self, nei):
nei = self.dropout(nei) # (B, N, F)
nei = self.linear(nei)
if self.activation:
nei = self.activation(nei)
max_value = torch.max(nei, dim=1)[0] # (B, F)
return max_value
class GraphSAGELayer(nn.Module):
def __init__(self,
g,
in_feats,
out_feats,
activation,
dropout,
aggregator_type,
bias=True,
):
super(GraphSAGELayer, self).__init__()
self.g = g
self.dropout = nn.Dropout(p=dropout)
if aggregator_type == "pooling":
self.aggregator = PoolingAggregator(g, in_feats, out_feats, activation, bias)
else:
self.aggregator = MeanAggregator(g, in_feats, out_feats, activation, bias)
def forward(self, h):
h = self.dropout(h)
self.g.ndata['h'] = h
self.g.update_all(fn.copy_src(src='h', out='m'), self.aggregator)
h = self.g.ndata.pop('h')
return h
from dgl.nn.pytorch.conv import SAGEConv
class GraphSAGE(nn.Module):
......@@ -121,19 +28,20 @@ class GraphSAGE(nn.Module):
aggregator_type):
super(GraphSAGE, self).__init__()
self.layers = nn.ModuleList()
self.g = g
# input layer
self.layers.append(GraphSAGELayer(g, in_feats, n_hidden, activation, dropout, aggregator_type))
self.layers.append(SAGEConv(in_feats, n_hidden, aggregator_type, feat_drop=dropout, activation=activation))
# hidden layers
for i in range(n_layers - 1):
self.layers.append(GraphSAGELayer(g, n_hidden, n_hidden, activation, dropout, aggregator_type))
self.layers.append(SAGEConv(n_hidden, n_hidden, aggregator_type, feat_drop=dropout, activation=activation))
# output layer
self.layers.append(GraphSAGELayer(g, n_hidden, n_classes, None, dropout, aggregator_type))
self.layers.append(SAGEConv(n_hidden, n_classes, aggregator_type, feat_drop=dropout, activation=None)) # activation None
def forward(self, features):
h = features
for layer in self.layers:
h = layer(h)
h = layer(h, self.g)
return h
......@@ -182,7 +90,9 @@ def main(args):
print("use cuda:", args.gpu)
# graph preprocess and calculate normalization factor
g = DGLGraph(data.graph)
g = data.graph
g.remove_edges_from(g.selfloop_edges())
g = DGLGraph(g)
n_edges = g.number_of_edges()
# create GraphSAGE model
......@@ -231,7 +141,7 @@ def main(args):
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='GCN')
parser = argparse.ArgumentParser(description='GraphSAGE')
register_data_args(parser)
parser.add_argument("--dropout", type=float, default=0.5,
help="dropout probability")
......@@ -247,8 +157,8 @@ if __name__ == '__main__':
help="number of hidden gcn layers")
parser.add_argument("--weight-decay", type=float, default=5e-4,
help="Weight for L2 loss")
parser.add_argument("--aggregator-type", type=str, default="mean",
help="Weight for L2 loss")
parser.add_argument("--aggregator-type", type=str, default="gcn",
help="Aggregator type: mean/gcn/pool/lstm")
args = parser.parse_args()
print(args)
......
# Node Classification on Citation Networks
This example shows how to use modules defined in `dgl.nn.pytorch.conv` to do node classification on
citation network datasets.
## Datasets
- Cora
- Citeseer
- Pubmed
## Models
- GCN: [Semi-Supervised Classification with Graph Convolutional Networks](https://arxiv.org/pdf/1609.02907)
- GAT: [Graph Attention Networks](https://arxiv.org/abs/1710.10903)
- GraphSAGE [Inductive Representation Learning on Large Graphs](https://cs.stanford.edu/people/jure/pubs/graphsage-nips17.pdf)
- APPNP: [Predict then Propagate: Graph Neural Networks meet Personalized PageRank](https://arxiv.org/pdf/1810.05997)
- GIN: [How Powerful are Graph Neural Networks?](https://arxiv.org/abs/1810.00826)
- TAGCN: [Topology Adaptive Graph Convolutional Networks](https://arxiv.org/abs/1710.10370)
- SGC: [Simplifying Graph Convolutional Networks](https://arxiv.org/abs/1902.07153)
- AGNN: [Attention-based Graph Neural Network for Semi-supervised Learning](https://arxiv.org/pdf/1803.03735.pdf)
- ChebNet: [Convolutional Neural Networks on Graphs with Fast Localized Spectral Filtering](https://arxiv.org/abs/1606.09375)
## Usage
```
python run.py [--gpu] --model MODEL_NAME --dataset DATASET_NAME [--self-loop]
```
The hyperparameters might not be the optimal, you could specify them manually in `conf.py`.
import torch as th
import torch.nn.functional as F
GCN_CONFIG = {
'extra_args': [16, 1, F.relu, 0.5],
'lr': 1e-2,
'weight_decay': 5e-4,
}
GAT_CONFIG = {
'extra_args': [8, 1, [8] * 1 + [1], F.elu, 0.6, 0.6, 0.2, False],
'lr': 0.005,
'weight_decay': 5e-4,
}
GRAPHSAGE_CONFIG = {
'extra_args': [16, 1, F.relu, 0.5, 'gcn'],
'lr': 1e-2,
'weight_decay': 5e-4,
}
APPNP_CONFIG = {
'extra_args': [64, 1, F.relu, 0.5, 0.5, 0.1, 10],
'lr': 1e-2,
'weight_decay': 5e-4,
}
TAGCN_CONFIG = {
'extra_args': [16, 1, F.relu, 0.5],
'lr': 1e-2,
'weight_decay': 5e-4,
}
AGNN_CONFIG = {
'extra_args': [32, 2, 1.0, True, 0.5],
'lr': 1e-2,
'weight_decay': 5e-4,
}
SGC_CONFIG = {
'extra_args': [None, 2, False],
'lr': 0.2,
'weight_decay': 5e-6,
}
GIN_CONFIG = {
'extra_args': [16, 1, 0, True],
'lr': 1e-2,
'weight_decay': 5e-6,
}
CHEBNET_CONFIG = {
'extra_args': [16, 1, 3, True],
'lr': 1e-2,
'weight_decay': 5e-4,
}
import torch
import torch.nn as nn
from dgl.nn.pytorch import GraphConv, GATConv, SAGEConv, GINConv,\
APPNPConv, TAGConv, SGConv, AGNNConv, ChebConv
class GCN(nn.Module):
def __init__(self,
g,
in_feats,
n_classes,
n_hidden,
n_layers,
activation,
dropout):
super(GCN, self).__init__()
self.g = g
self.layers = nn.ModuleList()
# input layer
self.layers.append(GraphConv(in_feats, n_hidden, activation=activation))
# hidden layers
for i in range(n_layers - 1):
self.layers.append(GraphConv(n_hidden, n_hidden, activation=activation))
# output layer
self.layers.append(GraphConv(n_hidden, n_classes))
self.dropout = nn.Dropout(p=dropout)
def forward(self, features):
h = features
for i, layer in enumerate(self.layers):
if i != 0:
h = self.dropout(h)
h = layer(h, self.g)
return h
class GAT(nn.Module):
def __init__(self,
g,
in_dim,
num_classes,
num_hidden,
num_layers,
heads,
activation,
feat_drop,
attn_drop,
negative_slope,
residual):
super(GAT, self).__init__()
self.g = g
self.num_layers = num_layers
self.gat_layers = nn.ModuleList()
self.activation = activation
# input projection (no residual)
self.gat_layers.append(GATConv(
in_dim, num_hidden, heads[0],
feat_drop, attn_drop, negative_slope, False, self.activation))
# hidden layers
for l in range(1, num_layers):
# due to multi-head, the in_dim = num_hidden * num_heads
self.gat_layers.append(GATConv(
num_hidden * heads[l-1], num_hidden, heads[l],
feat_drop, attn_drop, negative_slope, residual, self.activation))
# output projection
self.gat_layers.append(GATConv(
num_hidden * heads[-2], num_classes, heads[-1],
feat_drop, attn_drop, negative_slope, residual, None))
def forward(self, inputs):
h = inputs
for l in range(self.num_layers):
h = self.gat_layers[l](h, self.g).flatten(1)
# output projection
logits = self.gat_layers[-1](h, self.g).mean(1)
return logits
class GraphSAGE(nn.Module):
def __init__(self,
g,
in_feats,
n_classes,
n_hidden,
n_layers,
activation,
dropout,
aggregator_type):
super(GraphSAGE, self).__init__()
self.layers = nn.ModuleList()
self.g = g
# input layer
self.layers.append(SAGEConv(in_feats, n_hidden, aggregator_type, feat_drop=dropout, activation=activation))
# hidden layers
for i in range(n_layers - 1):
self.layers.append(SAGEConv(n_hidden, n_hidden, aggregator_type, feat_drop=dropout, activation=activation))
# output layer
self.layers.append(SAGEConv(n_hidden, n_classes, aggregator_type, feat_drop=dropout, activation=None)) # activation None
def forward(self, features):
h = features
for layer in self.layers:
h = layer(h, self.g)
return h
class APPNP(nn.Module):
def __init__(self,
g,
in_feats,
n_classes,
n_hidden,
n_layers,
activation,
feat_drop,
edge_drop,
alpha,
k):
super(APPNP, self).__init__()
self.g = g
self.layers = nn.ModuleList()
# input layer
self.layers.append(nn.Linear(in_feats, n_hidden))
# hidden layers
for i in range(1, n_layers):
self.layers.append(nn.Linear(n_hidden, n_hidden))
# output layer
self.layers.append(nn.Linear(n_hidden, n_classes))
self.activation = activation
if feat_drop:
self.feat_drop = nn.Dropout(feat_drop)
else:
self.feat_drop = lambda x: x
self.propagate = APPNPConv(k, alpha, edge_drop)
self.reset_parameters()
def reset_parameters(self):
for layer in self.layers:
layer.reset_parameters()
def forward(self, features):
# prediction step
h = features
h = self.feat_drop(h)
h = self.activation(self.layers[0](h))
for layer in self.layers[1:-1]:
h = self.activation(layer(h))
h = self.layers[-1](self.feat_drop(h))
# propagation step
h = self.propagate(h, self.g)
return h
class TAGCN(nn.Module):
def __init__(self,
g,
in_feats,
n_classes,
n_hidden,
n_layers,
activation,
dropout):
super(TAGCN, self).__init__()
self.g = g
self.layers = nn.ModuleList()
# input layer
self.layers.append(TAGConv(in_feats, n_hidden, activation=activation))
# hidden layers
for i in range(n_layers - 1):
self.layers.append(TAGConv(n_hidden, n_hidden, activation=activation))
# output layer
self.layers.append(TAGConv(n_hidden, n_classes)) #activation=None
self.dropout = nn.Dropout(p=dropout)
def forward(self, features):
h = features
for i, layer in enumerate(self.layers):
if i != 0:
h = self.dropout(h)
h = layer(h, self.g)
return h
class AGNN(nn.Module):
def __init__(self,
g,
in_feats,
n_classes,
n_hidden,
n_layers,
init_beta,
learn_beta,
dropout):
super(AGNN, self).__init__()
self.g = g
self.layers = nn.ModuleList(
[AGNNConv(init_beta, learn_beta) for _ in range(n_layers)]
)
self.proj = nn.Sequential(
nn.Dropout(dropout),
nn.Linear(in_feats, n_hidden),
nn.ReLU()
)
self.cls = nn.Sequential(
nn.Dropout(dropout),
nn.Linear(n_hidden, n_classes)
)
def forward(self, features):
h = self.proj(features)
for layer in self.layers:
h = layer(h, self.g)
return self.cls(h)
class SGC(nn.Module):
def __init__(self,
g,
in_feats,
n_classes,
n_hidden,
k,
bias):
super(SGC, self).__init__()
self.g = g
self.net = SGConv(in_feats,
n_classes,
k=k,
cached=True,
bias=bias)
def forward(self, features):
return self.net(features, self.g)
class GIN(nn.Module):
def __init__(self,
g,
in_feats,
n_classes,
n_hidden,
n_layers,
init_eps,
learn_eps):
super(GIN, self).__init__()
self.g = g
self.layers = nn.ModuleList()
self.layers.append(
GINConv(
nn.Sequential(
nn.Dropout(0.6),
nn.Linear(in_feats, n_hidden),
nn.ReLU(),
),
'mean',
init_eps,
learn_eps
)
)
for i in range(n_layers - 1):
self.layers.append(
GINConv(
nn.Sequential(
nn.Dropout(0.6),
nn.Linear(n_hidden, n_hidden),
nn.ReLU()
),
'mean',
init_eps,
learn_eps
)
)
self.layers.append(
GINConv(
nn.Sequential(
nn.Dropout(0.6),
nn.Linear(n_hidden, n_classes),
),
'mean',
init_eps,
learn_eps
)
)
def forward(self, features):
h = features
for layer in self.layers:
h = layer(h, self.g)
return h
class ChebNet(nn.Module):
def __init__(self,
g,
in_feats,
n_classes,
n_hidden,
n_layers,
k,
bias):
super(ChebNet, self).__init__()
self.g = g
self.layers = nn.ModuleList()
self.layers.append(
ChebConv(in_feats, n_hidden, k, bias)
)
for _ in range(n_layers - 1):
self.layers.append(
ChebConv(n_hidden, n_hidden, k, bias)
)
self.layers.append(
ChebConv(n_hidden, n_classes, k, bias)
)
def forward(self, features):
h = features
for layer in self.layers:
h = layer(h, self.g)
return h
\ No newline at end of file
import argparse, time
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from dgl import DGLGraph
from dgl.data import register_data_args, load_data
from models import *
from conf import *
def get_model_and_config(name):
name = name.lower()
if name == 'gcn':
return GCN, GCN_CONFIG
elif name == 'gat':
return GAT, GAT_CONFIG
elif name == 'graphsage':
return GraphSAGE, GRAPHSAGE_CONFIG
elif name == 'appnp':
return APPNP, APPNP_CONFIG
elif name == 'tagcn':
return TAGCN, TAGCN_CONFIG
elif name == 'agnn':
return AGNN, AGNN_CONFIG
elif name == 'sgc':
return SGC, SGC_CONFIG
elif name == 'gin':
return GIN, GIN_CONFIG
elif name == 'chebnet':
return ChebNet, CHEBNET_CONFIG
def evaluate(model, features, labels, mask):
model.eval()
with torch.no_grad():
logits = model(features)
logits = logits[mask]
labels = labels[mask]
_, indices = torch.max(logits, dim=1)
correct = torch.sum(indices == labels)
return correct.item() * 1.0 / len(labels)
def main(args):
# load and preprocess dataset
data = load_data(args)
features = torch.FloatTensor(data.features)
labels = torch.LongTensor(data.labels)
train_mask = torch.ByteTensor(data.train_mask)
val_mask = torch.ByteTensor(data.val_mask)
test_mask = torch.ByteTensor(data.test_mask)
in_feats = features.shape[1]
n_classes = data.num_labels
n_edges = data.graph.number_of_edges()
print("""----Data statistics------'
#Edges %d
#Classes %d
#Train samples %d
#Val samples %d
#Test samples %d""" %
(n_edges, n_classes,
train_mask.sum().item(),
val_mask.sum().item(),
test_mask.sum().item()))
if args.gpu < 0:
cuda = False
else:
cuda = True
torch.cuda.set_device(args.gpu)
features = features.cuda()
labels = labels.cuda()
train_mask = train_mask.cuda()
val_mask = val_mask.cuda()
test_mask = test_mask.cuda()
# graph preprocess and calculate normalization factor
g = data.graph
# add self loop
if args.self_loop:
g.remove_edges_from(g.selfloop_edges())
g.add_edges_from(zip(g.nodes(), g.nodes()))
g = DGLGraph(g)
n_edges = g.number_of_edges()
# normalization
degs = g.in_degrees().float()
norm = torch.pow(degs, -0.5)
norm[torch.isinf(norm)] = 0
if cuda:
norm = norm.cuda()
g.ndata['norm'] = norm.unsqueeze(1)
# create GCN model
GNN, config = get_model_and_config(args.model)
model = GNN(g,
in_feats,
n_classes,
*config['extra_args'])
if cuda:
model.cuda()
print(model)
loss_fcn = torch.nn.CrossEntropyLoss()
# use optimizer
optimizer = torch.optim.Adam(model.parameters(),
lr=config['lr'],
weight_decay=config['weight_decay'])
# initialize graph
dur = []
for epoch in range(200):
model.train()
if epoch >= 3:
t0 = time.time()
# forward
logits = model(features)
loss = loss_fcn(logits[train_mask], labels[train_mask])
optimizer.zero_grad()
loss.backward()
optimizer.step()
if epoch >= 3:
dur.append(time.time() - t0)
acc = evaluate(model, features, labels, val_mask)
print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | "
"ETputs(KTEPS) {:.2f}". format(epoch, np.mean(dur), loss.item(),
acc, n_edges / np.mean(dur) / 1000))
print()
acc = evaluate(model, features, labels, test_mask)
print("Test Accuracy {:.4f}".format(acc))
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Node classification on citation networks.')
register_data_args(parser)
parser.add_argument("--model", type=str, default='gcn',
help='model to use, available models are gcn, gat, graphsage, gin,'
'appnp, tagcn, sgc, agnn')
parser.add_argument("--gpu", type=int, default=-1,
help="gpu")
parser.add_argument("--self-loop", action='store_true',
help="graph self-loop (default=False)")
args = parser.parse_args()
print(args)
main(args)
\ No newline at end of file
......@@ -13,40 +13,13 @@ import torch.nn.functional as F
import dgl.function as fn
from dgl import DGLGraph
from dgl.data import register_data_args, load_data
from dgl.nn.pytorch.conv import SGConv
class SGCLayer(nn.Module):
def __init__(self,
g,
h,
in_feats,
out_feats,
bias=False,
K=2):
super(SGCLayer, self).__init__()
self.g = g
self.weight = nn.Linear(in_feats, out_feats, bias=bias)
self.K = K
# precomputing message passing
for _ in range(self.K):
# normalization by square root of src degree
h = h * self.g.ndata['norm']
self.g.ndata['h'] = h
self.g.update_all(fn.copy_src(src='h', out='m'),
fn.sum(msg='m', out='h'))
h = self.g.ndata.pop('h')
# normalization by square root of dst degree
h = h * self.g.ndata['norm']
# store precomputed result into a cached variable
self.cached_h = h
def forward(self, mask):
h = self.weight(self.cached_h[mask])
return h
def evaluate(model, features, labels, mask):
def evaluate(model, g, features, labels, mask):
model.eval()
with torch.no_grad():
logits = model(mask) # only compute the evaluation set
logits = model(features, g)[mask] # only compute the evaluation set
labels = labels[mask]
_, indices = torch.max(logits, dim=1)
correct = torch.sum(indices == labels)
......@@ -90,21 +63,13 @@ def main(args):
n_edges = g.number_of_edges()
# add self loop
g.add_edges(g.nodes(), g.nodes())
# normalization
degs = g.in_degrees().float()
norm = torch.pow(degs, -0.5)
norm[torch.isinf(norm)] = 0
if cuda:
norm = norm.cuda()
g.ndata['norm'] = norm.unsqueeze(1)
# create SGC model
model = SGCLayer(g,
features,
in_feats,
model = SGConv(in_feats,
n_classes,
args.bias,
K=2)
k=2,
cached=True,
bias=args.bias)
if cuda: model.cuda()
loss_fcn = torch.nn.CrossEntropyLoss()
......@@ -121,8 +86,8 @@ def main(args):
if epoch >= 3:
t0 = time.time()
# forward
logits = model(train_mask) # only compute the train set
loss = loss_fcn(logits, labels[train_mask])
logits = model(features, g) # only compute the train set
loss = loss_fcn(logits[train_mask], labels[train_mask])
optimizer.zero_grad()
loss.backward()
......@@ -131,13 +96,13 @@ def main(args):
if epoch >= 3:
dur.append(time.time() - t0)
acc = evaluate(model, features, labels, val_mask)
acc = evaluate(model, g, features, labels, val_mask)
print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | "
"ETputs(KTEPS) {:.2f}". format(epoch, np.mean(dur), loss.item(),
acc, n_edges / np.mean(dur) / 1000))
print()
acc = evaluate(model, features, labels, test_mask)
acc = evaluate(model, g, features, labels, test_mask)
print("Test Accuracy {:.4f}".format(acc))
......
......@@ -13,38 +13,15 @@ import torch.nn.functional as F
import dgl.function as fn
from dgl import DGLGraph
from dgl.data import register_data_args, load_data
from dgl.nn.pytorch.conv import SGConv
class SGCLayer(nn.Module):
def __init__(self,g,h,in_feats,out_feats,K=2):
super(SGCLayer, self).__init__()
self.g = g
self.weight = nn.Linear(in_feats, out_feats, bias=True)
self.K = K
# precomputing message passing
start = time.perf_counter()
for _ in range(self.K):
# normalization by square root of src degree
h = h * self.g.ndata['norm']
self.g.ndata['h'] = h
self.g.update_all(fn.copy_src(src='h', out='m'),
fn.sum(msg='m', out='h'))
h = self.g.ndata.pop('h')
# normalization by square root of dst degree
h = h * self.g.ndata['norm']
h = (h-h.mean(0))/h.std(0)
precompute_elapse = time.perf_counter()-start
print("Precompute Time(s): {:.4f}".format(precompute_elapse))
# store precomputed result into a cached variable
self.cached_h = h
def normalize(h):
return (h-h.mean(0))/h.std(0)
def forward(self, mask):
h = self.weight(self.cached_h[mask])
return h
def evaluate(model, features, labels, mask):
def evaluate(model, features, graph, labels, mask):
model.eval()
with torch.no_grad():
logits = model(mask) # only compute the evaluation set
logits = model(features, graph)[mask] # only compute the evaluation set
labels = labels[mask]
_, indices = torch.max(logits, dim=1)
correct = torch.sum(indices == labels)
......@@ -85,7 +62,6 @@ def main(args):
test_mask = test_mask.cuda()
# graph preprocess and calculate normalization factor
start = time.perf_counter()
g = DGLGraph(data.graph)
n_edges = g.number_of_edges()
# normalization
......@@ -94,14 +70,11 @@ def main(args):
norm[torch.isinf(norm)] = 0
if cuda: norm = norm.cuda()
g.ndata['norm'] = norm.unsqueeze(1)
preprocess_elapse = time.perf_counter()-start
print("Preprocessing Time: {:.4f}".format(preprocess_elapse))
# create SGC model
model = SGCLayer(g,features,in_feats,n_classes,K=2)
if cuda: model.cuda()
loss_fcn = torch.nn.CrossEntropyLoss()
model = SGConv(in_feats, n_classes, k=2, cached=True, bias=True, norm=normalize)
if args.gpu >= 0:
model = model.cuda()
# use optimizer
optimizer = torch.optim.LBFGS(model.parameters())
......@@ -109,22 +82,17 @@ def main(args):
# define loss closure
def closure():
optimizer.zero_grad()
output = model(train_mask)
output = model(features, g)[train_mask]
loss_train = F.cross_entropy(output, labels[train_mask])
loss_train.backward()
return loss_train
# initialize graph
dur = []
start = time.perf_counter()
for epoch in range(args.n_epochs):
model.train()
logits = model(train_mask) # only compute the train set
loss = optimizer.step(closure)
optimizer.step(closure)
train_elapse = time.perf_counter()-start
print("Train epoch {} | Train Time(s) {:.4f}".format(epoch, train_elapse))
acc = evaluate(model, features, labels, test_mask)
acc = evaluate(model, features, g, labels, test_mask)
print("Test Accuracy {:.4f}".format(acc))
......
......@@ -7,7 +7,7 @@ References:
"""
import torch
import torch.nn as nn
from dgl.nn.pytorch.conv import TGConv
from dgl.nn.pytorch.conv import TAGConv
class TAGCN(nn.Module):
def __init__(self,
......@@ -22,12 +22,12 @@ class TAGCN(nn.Module):
self.g = g
self.layers = nn.ModuleList()
# input layer
self.layers.append(TGConv(in_feats, n_hidden, activation=activation))
self.layers.append(TAGConv(in_feats, n_hidden, activation=activation))
# hidden layers
for i in range(n_layers - 1):
self.layers.append(TGConv(n_hidden, n_hidden, activation=activation))
self.layers.append(TAGConv(n_hidden, n_hidden, activation=activation))
# output layer
self.layers.append(TGConv(n_hidden, n_classes)) #activation=None
self.layers.append(TAGConv(n_hidden, n_classes)) #activation=None
self.dropout = nn.Dropout(p=dropout)
def forward(self, features):
......
......@@ -479,7 +479,7 @@ class GraphIndex(ObjectBase):
Returns
-------
int
tensor
The in degree array.
"""
v_array = v.todgltensor()
......@@ -510,7 +510,7 @@ class GraphIndex(ObjectBase):
Returns
-------
int
tensor
The out degree array.
"""
v_array = v.todgltensor()
......
"""MXNet modules for graph convolutions."""
# pylint: disable= no-member, arguments-differ
# pylint: disable= no-member, arguments-differ, invalid-name
import math
import mxnet as mx
from mxnet import gluon, nd
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment