Commit 3f464591 authored by Aymen Waheb's avatar Aymen Waheb Committed by Mufei Li
Browse files

[Model] Add edge dropout to APPNP (#493)

* [Model] Add edge dropout to APPNP

[Model] Add edge dropout to APPNP

* [Model] Refactor the sampling examples (#498)

* reorganize sampling code.

* speedup gcn_ns.

* speed up gcn_cv

* fix graphsage_cv.

* undo the modification.

* accel training.

* update readme.

* [Model] Add edge dropout to APPNP

[Model] Add edge dropout to APPNP

update
parent 6124667f
...@@ -16,10 +16,11 @@ Contributors ...@@ -16,10 +16,11 @@ Contributors
* [@hbsun2113](https://github.com/hbsun2113): GraphSAGE in Pytorch * [@hbsun2113](https://github.com/hbsun2113): GraphSAGE in Pytorch
* [Tianyi Zhang](https://github.com/Tiiiger): SGC in Pytorch * [Tianyi Zhang](https://github.com/Tiiiger): SGC in Pytorch
* [Jun Chen](https://github.com/kitaev-chen): GIN in Pytorch * [Jun Chen](https://github.com/kitaev-chen): GIN in Pytorch
* [Aymen Waheb](https://github.com/aymenwah): APPNP in Pytorch
Other improvement Other improvement
* [Brett Koonce](https://github.com/brettkoonce) * [Brett Koonce](https://github.com/brettkoonce)
* [@giuseppefutia](https://github.com/giuseppefutia) * [@giuseppefutia](https://github.com/giuseppefutia)
* [@mori97](https://github.com/mori97) * [@mori97](https://github.com/mori97)
* Hao Jin * Hao Jin
* [@aymenwah](https://github.com/aymenwah)
...@@ -29,8 +29,4 @@ python train.py --dataset cora --gpu 0 ...@@ -29,8 +29,4 @@ python train.py --dataset cora --gpu 0
* citeseer: 0.715 (paper: 0.757) * citeseer: 0.715 (paper: 0.757)
* pubmed: 0.793 (paper: 0.797) * pubmed: 0.793 (paper: 0.797)
Differences from the original implementation Experiments were done on dgl datasets (GCN settings) which are different from those used in the original implementation. (discrepancies are detailed in experimental section of the original paper)
---------
- This implementation does not perform dropout on adjacency matrices during propagation step.
- Experiments were done on dgl datasets (GCN settings) which are different from those used in the original implementation. (discrepancies are detailed in experimental section of the original paper)
...@@ -5,11 +5,49 @@ References ...@@ -5,11 +5,49 @@ References
Paper: https://arxiv.org/abs/1810.05997 Paper: https://arxiv.org/abs/1810.05997
Author's code: https://github.com/klicperajo/ppnp Author's code: https://github.com/klicperajo/ppnp
""" """
import torch
import torch.nn as nn import torch.nn as nn
import dgl.function as fn import dgl.function as fn
class GraphPropagation(nn.Module):
def __init__(self,
g,
edge_drop,
alpha,
k):
super(GraphPropagation, self).__init__()
self.g = g
self.alpha = alpha
self.k = k
if edge_drop:
self.edge_drop = nn.Dropout(edge_drop)
else:
self.edge_drop = 0.
def forward(self, h):
self.cached_h = h
for _ in range(self.k):
# normalization by square root of src degree
h = h * self.g.ndata['norm']
self.g.ndata['h'] = h
if self.edge_drop:
# performing edge dropout
ed = self.edge_drop(torch.ones((self.g.number_of_edges(), 1)))
self.g.edata['d'] = ed
self.g.update_all(fn.src_mul_edge(src='h', edge='d', out='m'),
fn.sum(msg='m', out='h'))
else:
self.g.update_all(fn.copy_src(src='h', out='m'),
fn.sum(msg='m', out='h'))
h = self.g.ndata.pop('h')
# normalization by square root of dst degree
h = h * self.g.ndata['norm']
# update h using teleport probability alpha
h = h * (1 - self.alpha) + self.cached_h * self.alpha
return h
class APPNP(nn.Module): class APPNP(nn.Module):
def __init__(self, def __init__(self,
g, g,
...@@ -17,12 +55,12 @@ class APPNP(nn.Module): ...@@ -17,12 +55,12 @@ class APPNP(nn.Module):
hiddens, hiddens,
n_classes, n_classes,
activation, activation,
dropout, feat_drop,
edge_drop,
alpha, alpha,
k): k):
super(APPNP, self).__init__() super(APPNP, self).__init__()
self.layers = nn.ModuleList() self.layers = nn.ModuleList()
self.g = g
# input layer # input layer
self.layers.append(nn.Linear(in_feats, hiddens[0])) self.layers.append(nn.Linear(in_feats, hiddens[0]))
# hidden layers # hidden layers
...@@ -31,12 +69,12 @@ class APPNP(nn.Module): ...@@ -31,12 +69,12 @@ class APPNP(nn.Module):
# output layer # output layer
self.layers.append(nn.Linear(hiddens[-1], n_classes)) self.layers.append(nn.Linear(hiddens[-1], n_classes))
self.activation = activation self.activation = activation
if dropout: if feat_drop:
self.dropout = nn.Dropout(p=dropout) self.feat_drop = nn.Dropout(feat_drop)
else: else:
self.dropout = 0. self.feat_drop = lambda x: x
self.K = k self.propagate = GraphPropagation(g, edge_drop, alpha, k)
self.alpha = alpha self.reset_parameters()
def reset_parameters(self): def reset_parameters(self):
for layer in self.layers: for layer in self.layers:
...@@ -45,26 +83,11 @@ class APPNP(nn.Module): ...@@ -45,26 +83,11 @@ class APPNP(nn.Module):
def forward(self, features): def forward(self, features):
# prediction step # prediction step
h = features h = features
if self.dropout: h = self.feat_drop(h)
h = self.dropout(h)
h = self.activation(self.layers[0](h)) h = self.activation(self.layers[0](h))
for layer in self.layers[1:-1]: for layer in self.layers[1:-1]:
h = self.activation(layer(h)) h = self.activation(layer(h))
if self.dropout: h = self.layers[-1](self.feat_drop(h))
h = self.layers[-1](self.dropout(h)) # propagation step
# propagation step without dropout on adjacency matrices h = self.propagate(h)
self.cached_h = h
for _ in range(self.K):
# normalization by square root of src degree
h = h * self.g.ndata['norm']
self.g.ndata['h'] = h
# message-passing without performing adjacency dropout
self.g.update_all(fn.copy_src(src='h', out='m'),
fn.sum(msg='m', out='h'))
h = self.g.ndata.pop('h')
# normalization by square root of dst degree
h = h * self.g.ndata['norm']
# update h using teleport probability alpha
h = h * (1 - self.alpha) + self.cached_h * self.alpha
return h return h
...@@ -8,6 +8,7 @@ from dgl.data import register_data_args, load_data ...@@ -8,6 +8,7 @@ from dgl.data import register_data_args, load_data
import dgl import dgl
from appnp import APPNP from appnp import APPNP
def evaluate(model, features, labels, mask): def evaluate(model, features, labels, mask):
model.eval() model.eval()
with torch.no_grad(): with torch.no_grad():
...@@ -18,6 +19,7 @@ def evaluate(model, features, labels, mask): ...@@ -18,6 +19,7 @@ def evaluate(model, features, labels, mask):
correct = torch.sum(indices == labels) correct = torch.sum(indices == labels)
return correct.item() * 1.0 / len(labels) return correct.item() * 1.0 / len(labels)
def main(args): def main(args):
# load and preprocess dataset # load and preprocess dataset
data = load_data(args) data = load_data(args)
...@@ -36,9 +38,9 @@ def main(args): ...@@ -36,9 +38,9 @@ def main(args):
#Val samples %d #Val samples %d
#Test samples %d""" % #Test samples %d""" %
(n_edges, n_classes, (n_edges, n_classes,
train_mask.sum().item(), train_mask.sum().item(),
val_mask.sum().item(), val_mask.sum().item(),
test_mask.sum().item())) test_mask.sum().item()))
if args.gpu < 0: if args.gpu < 0:
cuda = False cuda = False
...@@ -72,13 +74,13 @@ def main(args): ...@@ -72,13 +74,13 @@ def main(args):
args.hidden_sizes, args.hidden_sizes,
n_classes, n_classes,
F.relu, F.relu,
args.dropout, args.in_drop,
args.edge_drop,
args.alpha, args.alpha,
args.k) args.k)
if cuda: if cuda:
model.cuda() model.cuda()
model.reset_parameters()
loss_fcn = torch.nn.CrossEntropyLoss() loss_fcn = torch.nn.CrossEntropyLoss()
# use optimizer # use optimizer
...@@ -105,8 +107,8 @@ def main(args): ...@@ -105,8 +107,8 @@ def main(args):
acc = evaluate(model, features, labels, val_mask) acc = evaluate(model, features, labels, val_mask)
print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | " print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | "
"ETputs(KTEPS) {:.2f}". format(epoch, np.mean(dur), loss.item(), "ETputs(KTEPS) {:.2f}".format(epoch, np.mean(dur), loss.item(),
acc, n_edges / np.mean(dur) / 1000)) acc, n_edges / np.mean(dur) / 1000))
print() print()
acc = evaluate(model, features, labels, test_mask) acc = evaluate(model, features, labels, test_mask)
...@@ -116,22 +118,24 @@ def main(args): ...@@ -116,22 +118,24 @@ def main(args):
if __name__ == '__main__': if __name__ == '__main__':
parser = argparse.ArgumentParser(description='APPNP') parser = argparse.ArgumentParser(description='APPNP')
register_data_args(parser) register_data_args(parser)
parser.add_argument("--dropout", type=float, default=0.5, parser.add_argument("--in-drop", type=float, default=0.5,
help="dropout probability") help="input feature dropout")
parser.add_argument("--edge-drop", type=float, default=0.5,
help="edge propagation dropout")
parser.add_argument("--gpu", type=int, default=-1, parser.add_argument("--gpu", type=int, default=-1,
help="gpu") help="gpu")
parser.add_argument("--lr", type=float, default=1e-2, parser.add_argument("--lr", type=float, default=1e-2,
help="learning rate") help="learning rate")
parser.add_argument("--n-epochs", type=int, default=200, parser.add_argument("--n-epochs", type=int, default=200,
help="number of training epochs") help="number of training epochs")
parser.add_argument("--hidden_sizes", type=int, nargs='+', default=[64], parser.add_argument("--hidden_sizes", type=int, nargs='+', default=[64],
help="hidden unit sizes for appnp") help="hidden unit sizes for appnp")
parser.add_argument("--k", type=int, default=10, parser.add_argument("--k", type=int, default=10,
help="Number of propagation steps") help="Number of propagation steps")
parser.add_argument("--alpha", type=float, default=0.1, parser.add_argument("--alpha", type=float, default=0.1,
help="Teleport Probability") help="Teleport Probability")
parser.add_argument("--weight-decay", type=float, default=5e-4, parser.add_argument("--weight-decay", type=float, default=5e-4,
help="Weight for L2 loss") help="Weight for L2 loss")
args = parser.parse_args() args = parser.parse_args()
print(args) print(args)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment