train.py 5.96 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
"""
Graph Attention Networks in DGL using SPMV optimization.
Multiple heads are also batched together for faster training.
References
----------
Paper: https://arxiv.org/abs/1710.10903
Author's code: https://github.com/PetarV-/GAT
Pytorch implementation: https://github.com/Diego999/pyGAT
"""

import argparse
import numpy as np
13
import networkx as nx
14
15
16
import time
import torch
import torch.nn.functional as F
17
18
19
20
import dgl
from dgl.data import register_data_args
from dgl.data import CoraGraphDataset, CiteseerGraphDataset, PubmedGraphDataset

21
from gat import GAT
VoVAllen's avatar
VoVAllen committed
22
23
from utils import EarlyStopping

24
25
26
27
28
29

def accuracy(logits, labels):
    _, indices = torch.max(logits, dim=1)
    correct = torch.sum(indices == labels)
    return correct.item() * 1.0 / len(labels)

VoVAllen's avatar
VoVAllen committed
30

31
32
33
34
35
36
37
38
def evaluate(model, features, labels, mask):
    model.eval()
    with torch.no_grad():
        logits = model(features)
        logits = logits[mask]
        labels = labels[mask]
        return accuracy(logits, labels)

VoVAllen's avatar
VoVAllen committed
39

40
41
def main(args):
    # load and preprocess dataset
42
43
44
45
46
47
    if args.dataset == 'cora':
        data = CoraGraphDataset()
    elif args.dataset == 'citeseer':
        data = CiteseerGraphDataset()
    elif args.dataset == 'pubmed':
        data = PubmedGraphDataset()
48
    else:
49
50
51
52
53
54
55
        raise ValueError('Unknown dataset: {}'.format(args.dataset))

    g = data[0]
    if args.gpu < 0:
        cuda = False
    else:
        cuda = True
56
        g = g.int().to(args.gpu)
57
58
59
60
61
62

    features = g.ndata['feat']
    labels = g.ndata['label']
    train_mask = g.ndata['train_mask']
    val_mask = g.ndata['val_mask']
    test_mask = g.ndata['test_mask']
63
64
65
66
67
    num_feats = features.shape[1]
    n_classes = data.num_labels
    n_edges = data.graph.number_of_edges()
    print("""----Data statistics------'
      #Edges %d
68
      #Classes %d
69
70
71
72
      #Train samples %d
      #Val samples %d
      #Test samples %d""" %
          (n_edges, n_classes,
Zihao Ye's avatar
Zihao Ye committed
73
74
75
           train_mask.int().sum().item(),
           val_mask.int().sum().item(),
           test_mask.int().sum().item()))
76
77

    # add self loop
78
79
    g = dgl.remove_self_loop(g)
    g = dgl.add_self_loop(g)
80
    n_edges = g.number_of_edges()
81
    # create model
82
    heads = ([args.num_heads] * (args.num_layers-1)) + [args.num_out_heads]
83
84
85
86
87
    model = GAT(g,
                args.num_layers,
                num_feats,
                args.num_hidden,
                n_classes,
88
                heads,
89
90
91
                F.elu,
                args.in_drop,
                args.attn_drop,
92
                args.negative_slope,
93
94
                args.residual)
    print(model)
95
96
    if args.early_stop:
        stopper = EarlyStopping(patience=100)
97
98
99
100
101
    if cuda:
        model.cuda()
    loss_fcn = torch.nn.CrossEntropyLoss()

    # use optimizer
VoVAllen's avatar
VoVAllen committed
102
103
    optimizer = torch.optim.Adam(
        model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
104
105
106
107
108
109

    # initialize graph
    dur = []
    for epoch in range(args.epochs):
        model.train()
        if epoch >= 3:
110
111
            if cuda:
                torch.cuda.synchronize()
112
113
114
115
116
117
118
119
120
121
            t0 = time.time()
        # forward
        logits = model(features)
        loss = loss_fcn(logits[train_mask], labels[train_mask])

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if epoch >= 3:
122
123
            if cuda:
                torch.cuda.synchronize()
124
125
126
127
128
129
130
131
            dur.append(time.time() - t0)

        train_acc = accuracy(logits[train_mask], labels[train_mask])

        if args.fastmode:
            val_acc = accuracy(logits[val_mask], labels[val_mask])
        else:
            val_acc = evaluate(model, features, labels, val_mask)
132
            if args.early_stop:
133
                if stopper.step(val_acc, model):
134
                    break
135
136
137
138
139
140
141

        print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | TrainAcc {:.4f} |"
              " ValAcc {:.4f} | ETputs(KTEPS) {:.2f}".
              format(epoch, np.mean(dur), loss.item(), train_acc,
                     val_acc, n_edges / np.mean(dur) / 1000))

    print()
142
143
    if args.early_stop:
        model.load_state_dict(torch.load('es_checkpoint.pt'))
144
145
146
    acc = evaluate(model, features, labels, test_mask)
    print("Test Accuracy {:.4f}".format(acc))

VoVAllen's avatar
VoVAllen committed
147

148
149
150
151
152
153
if __name__ == '__main__':

    parser = argparse.ArgumentParser(description='GAT')
    register_data_args(parser)
    parser.add_argument("--gpu", type=int, default=-1,
                        help="which GPU to use. Set -1 to use CPU.")
Minjie Wang's avatar
Minjie Wang committed
154
    parser.add_argument("--epochs", type=int, default=200,
155
156
157
158
159
                        help="number of training epochs")
    parser.add_argument("--num-heads", type=int, default=8,
                        help="number of hidden attention heads")
    parser.add_argument("--num-out-heads", type=int, default=1,
                        help="number of output attention heads")
160
    parser.add_argument("--num-layers", type=int, default=2,
161
162
163
164
165
166
167
168
169
170
171
172
173
                        help="number of hidden layers")
    parser.add_argument("--num-hidden", type=int, default=8,
                        help="number of hidden units")
    parser.add_argument("--residual", action="store_true", default=False,
                        help="use residual connection")
    parser.add_argument("--in-drop", type=float, default=.6,
                        help="input feature dropout")
    parser.add_argument("--attn-drop", type=float, default=.6,
                        help="attention dropout")
    parser.add_argument("--lr", type=float, default=0.005,
                        help="learning rate")
    parser.add_argument('--weight-decay', type=float, default=5e-4,
                        help="weight decay")
174
175
    parser.add_argument('--negative-slope', type=float, default=0.2,
                        help="the negative slope of leaky relu")
176
177
    parser.add_argument('--early-stop', action='store_true', default=False,
                        help="indicates whether to use early stop or not")
178
179
180
181
182
183
    parser.add_argument('--fastmode', action="store_true", default=False,
                        help="skip re-evaluate the validation set")
    args = parser.parse_args()
    print(args)

    main(args)