Unverified Commit 2daba976 authored by Zheng Zhang's avatar Zheng Zhang Committed by GitHub
Browse files

Merge pull request #12 from ylfdq1118/master

Example GCN and GAT model
parents 174c1d55 20d5d532
# DGL example models
## Dataset
gat.py and gcn.py use real dataset, please download citeseer/cora/pubmed dataset from:
https://github.com/tkipf/gcn/tree/master/gcn/data
dataset.py (adapted from tkipf/gcn) assumes that there is a "data" folder under current directory
import numpy as np
import pickle as pkl
import networkx as nx
import scipy.sparse as sp
import sys
# (lingfan): following dataset loading and preprocessing code from tkipf/gcn
# https://github.com/tkipf/gcn/blob/master/gcn/utils.py
def parse_index_file(filename):
"""Parse index file."""
index = []
for line in open(filename):
index.append(int(line.strip()))
return index
def sample_mask(idx, l):
"""Create mask."""
mask = np.zeros(l)
mask[idx] = 1
return np.array(mask, dtype=np.bool)
def load_data(dataset_str):
"""
Loads input data from gcn/data directory
ind.dataset_str.x => the feature vectors of the training instances as scipy.sparse.csr.csr_matrix object;
ind.dataset_str.tx => the feature vectors of the test instances as scipy.sparse.csr.csr_matrix object;
ind.dataset_str.allx => the feature vectors of both labeled and unlabeled training instances
(a superset of ind.dataset_str.x) as scipy.sparse.csr.csr_matrix object;
ind.dataset_str.y => the one-hot labels of the labeled training instances as numpy.ndarray object;
ind.dataset_str.ty => the one-hot labels of the test instances as numpy.ndarray object;
ind.dataset_str.ally => the labels for instances in ind.dataset_str.allx as numpy.ndarray object;
ind.dataset_str.graph => a dict in the format {index: [index_of_neighbor_nodes]} as collections.defaultdict
object;
ind.dataset_str.test.index => the indices of test instances in graph, for the inductive setting as list object.
All objects above must be saved using python pickle module.
:param dataset_str: Dataset name
:return: All data input files loaded (as well the training/test data).
"""
names = ['x', 'y', 'tx', 'ty', 'allx', 'ally', 'graph']
objects = []
for i in range(len(names)):
with open("data/ind.{}.{}".format(dataset_str, names[i]), 'rb') as f:
if sys.version_info > (3, 0):
objects.append(pkl.load(f, encoding='latin1'))
else:
objects.append(pkl.load(f))
x, y, tx, ty, allx, ally, graph = tuple(objects)
test_idx_reorder = parse_index_file("data/ind.{}.test.index".format(dataset_str))
test_idx_range = np.sort(test_idx_reorder)
if dataset_str == 'citeseer':
# Fix citeseer dataset (there are some isolated nodes in the graph)
# Find isolated nodes, add them as zero-vecs into the right position
test_idx_range_full = range(min(test_idx_reorder), max(test_idx_reorder)+1)
tx_extended = sp.lil_matrix((len(test_idx_range_full), x.shape[1]))
tx_extended[test_idx_range-min(test_idx_range), :] = tx
tx = tx_extended
ty_extended = np.zeros((len(test_idx_range_full), y.shape[1]))
ty_extended[test_idx_range-min(test_idx_range), :] = ty
ty = ty_extended
features = sp.vstack((allx, tx)).tolil()
features[test_idx_reorder, :] = features[test_idx_range, :]
adj = nx.adjacency_matrix(nx.from_dict_of_lists(graph))
labels = np.vstack((ally, ty))
labels[test_idx_reorder, :] = labels[test_idx_range, :]
idx_test = test_idx_range.tolist()
idx_train = range(len(y))
idx_val = range(len(y), len(y)+500)
train_mask = sample_mask(idx_train, labels.shape[0])
val_mask = sample_mask(idx_val, labels.shape[0])
test_mask = sample_mask(idx_test, labels.shape[0])
y_train = np.zeros(labels.shape)
y_val = np.zeros(labels.shape)
y_test = np.zeros(labels.shape)
y_train[train_mask, :] = labels[train_mask, :]
y_val[val_mask, :] = labels[val_mask, :]
y_test[test_mask, :] = labels[test_mask, :]
return adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask
def preprocess_features(features):
"""Row-normalize feature matrix and convert to tuple representation"""
rowsum = np.array(features.sum(1))
r_inv = np.power(rowsum, -1).flatten()
r_inv[np.isinf(r_inv)] = 0.
r_mat_inv = sp.diags(r_inv)
features = r_mat_inv.dot(features)
return features
import networkx as nx
from dgl.graph import DGLGraph
import torch
import torch.nn as nn
import torch.nn.functional as F
import argparse
from dataset import load_data, preprocess_features
import numpy as np
class NodeUpdateModule(nn.Module):
def __init__(self, input_dim, num_hidden, aggregator, num_heads=3, act=None,
attention_dropout=None, input_dropout=None, residual=False):
super(NodeUpdateModule, self).__init__()
self.num_hidden = num_hidden
self.num_heads = num_heads
self.fc = nn.ModuleList(
[nn.Linear(input_dim, num_hidden, bias=False)
for _ in range(num_heads)])
self.attention = nn.ModuleList(
[nn.Linear(num_hidden * 2, 1, bias=False) for _ in range(num_heads)])
self.act = act
self.attention_dropout = attention_dropout
self.input_dropout = input_dropout
self.aggregator = aggregator
self.residual = residual
def forward(self, node, msgs):
hv = node['h']
hu = torch.cat(msgs, dim=0)
# number of neighbors, including itself
n = len(msgs) + 1
out = []
for i in range(self.num_heads):
hvv = hv
huu = hu
if self.input_dropout is not None:
hvv = F.dropout(hvv, self.input_dropout)
huu = F.dropout(huu, self.input_dropout)
# calc W*hself and W*hneigh
hvv = self.fc[i](hv)
huu = self.fc[i](hu)
# concat itself with neighbors to make self-attention
huu = torch.cat((hvv, huu), dim=0)
# calculate W*hself||W*hneigh
h = torch.cat((hvv.expand(n, -1), huu), dim=1)
a = F.leaky_relu(self.attention[i](h))
a = F.softmax(a, dim=0)
if self.attention_dropout is not None:
a = F.dropout(a, self.attention_dropout)
if self.input_dropout is not None:
hvv = F.dropout(hvv, self.input_dropout)
h = torch.sum(a * hvv, 0, keepdim=True)
# add residual connection
if self.residual:
h += hvv
if self.act is not None:
h = self.act(h)
out.append(h)
# aggregate multi-head results
h = self.aggregator(out)
return {'h': h}
class GAT(nn.Module):
def __init__(self, num_layers, in_dim, num_hidden, num_classes, num_heads,
activation, attention_dropout, input_dropout, use_residual=False):
super(GAT, self).__init__()
self.layers = nn.ModuleList()
# update layers
aggregator = lambda x: torch.cat(x, 1)
for i in range(num_layers):
if i == 0:
last_dim = in_dim
residual = False
else:
last_dim = num_hidden * num_heads # because of concat heads
residual = use_residual
self.layers.append(
NodeUpdateModule(last_dim, num_hidden, aggregator, num_heads,
activation, attention_dropout, input_dropout, residual))
# projection layer
# FIXME: does pytorch has something similar to tf.add_n which sum over a list?
aggregator = lambda x: reduce(lambda a, b: a+b, x)
self.layers.append(NodeUpdateModule(num_hidden * 3, num_classes, aggregator,
1, None, attention_dropout, input_dropout, False))
def forward(self, g):
g.register_message_func(lambda src, dst, edge: src['h'])
for layer in self.layers:
g.register_update_func(layer)
g.update_all()
logits = [g.node[n]['h'] for n in g.nodes()]
logits = torch.cat(logits, dim=0)
return logits
def main(args):
# dropout parameters
input_dropout = 0.2
attention_dropout = 0.2
# load and preprocess dataset
adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = load_data(args.dataset)
features = preprocess_features(features)
# initialize graph
g = DGLGraph(adj)
# create model
model = GAT(args.num_layers,
features.shape[1],
args.num_hidden,
y_train.shape[1],
args.num_heads,
F.elu,
attention_dropout,
input_dropout,
args.residual)
# use optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
# convert labels and masks to tensor
labels = torch.FloatTensor(y_train)
mask = torch.FloatTensor(train_mask.astype(np.float32))
for epoch in range(args.epochs):
# reset grad
optimizer.zero_grad()
# reset graph states
for n in g.nodes():
g.node[n]['h'] = torch.FloatTensor(features[n].toarray())
# forward
logits = model.forward(g)
# masked cross entropy loss
# TODO: (lingfan) use gather to speed up
logp = F.log_softmax(logits, 1)
loss = torch.mean(logp * labels * mask.view(-1, 1))
print("epoch {} loss: {}".format(epoch, loss.item()))
loss.backward()
optimizer.step()
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='GAT')
parser.add_argument("--dataset", type=str, required=True,
help="dataset name")
parser.add_argument("--epochs", type=int, default=10,
help="training epoch")
parser.add_argument("--num-heads", type=int, default=3,
help="number of attentional heads to use")
parser.add_argument("--num-layers", type=int, default=1,
help="number of hidden layers")
parser.add_argument("--num-hidden", type=int, default=8,
help="size of hidden units")
parser.add_argument("--residual", action="store_true",
help="use residual connection")
parser.add_argument("--lr", type=float, default=0.001,
help="learning rate")
args = parser.parse_args()
print(args)
main(args)
import networkx as nx
from dgl.graph import DGLGraph
import torch
import torch.nn as nn
import torch.nn.functional as F
import argparse
from dataset import load_data, preprocess_features
import numpy as np
class NodeUpdateModule(nn.Module):
def __init__(self, input_dim, output_dim, act=None, p=None):
super(NodeUpdateModule, self).__init__()
self.linear = nn.Linear(input_dim, output_dim)
self.act = act
self.p = p
def forward(self, node, msgs):
h = node['h']
if self.p is not None:
h = F.dropout(h, p=self.p)
# aggregator messages
for msg in msgs:
h += msg
h = self.linear(h)
if self.act is not None:
h = self.act(h)
# (lingfan): Can user directly update node instead of using return statement?
return {'h': h}
class GCN(nn.Module):
def __init__(self, input_dim, num_hidden, num_classes, num_layers, activation, dropout):
super(GCN, self).__init__()
self.layers = nn.ModuleList()
# hidden layers
last_dim = input_dim
for _ in range(num_layers):
self.layers.append(
NodeUpdateModule(last_dim, num_hidden, act=activation, p=dropout))
last_dim = num_hidden
# output layer
self.layers.append(NodeUpdateModule(num_hidden, num_classes, p=dropout))
def forward(self, g):
g.register_message_func(lambda src, dst, edge: src['h'])
for layer in self.layers:
g.register_update_func(layer)
g.update_all()
logits = [g.node[n]['h'] for n in g.nodes()]
return torch.cat(logits, dim=0)
def main(args):
# load and preprocess dataset
adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = load_data(args.dataset)
features = preprocess_features(features)
# initialize graph
g = DGLGraph(adj)
# create GCN model
model = GCN(features.shape[1],
args.num_hidden,
y_train.shape[1],
args.num_layers,
F.relu,
args.dropout)
# use optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
# convert labels and masks to tensor
labels = torch.FloatTensor(y_train)
mask = torch.FloatTensor(train_mask.astype(np.float32))
for epoch in range(args.epochs):
# reset grad
optimizer.zero_grad()
# reset graph states
for n in g.nodes():
g.node[n]['h'] = torch.FloatTensor(features[n].toarray())
# forward
logits = model.forward(g)
# masked cross entropy loss
# TODO: (lingfan) use gather to speed up
logp = F.log_softmax(logits, 1)
loss = torch.mean(logp * labels * mask.view(-1, 1))
print("epoch {} loss: {}".format(epoch, loss.item()))
loss.backward()
optimizer.step()
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='GCN')
parser.add_argument("--dataset", type=str, required=True,
help="dataset name")
parser.add_argument("--num-layers", type=int, default=1,
help="number of gcn layers")
parser.add_argument("--num-hidden", type=int, default=64,
help="number of hidden units")
parser.add_argument("--epochs", type=int, default=10,
help="training epoch")
parser.add_argument("--dropout", type=float, default=None,
help="dropout probability")
parser.add_argument("--lr", type=float, default=0.001,
help="learning rate")
args = parser.parse_args()
print(args)
main(args)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment