"vscode:/vscode.git/clone" did not exist on "07b297e7dec3f475a880c15addcb6c02c0690992"
train.py 5.83 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
"""
Graph Attention Networks in DGL using SPMV optimization.
Multiple heads are also batched together for faster training.
References
----------
Paper: https://arxiv.org/abs/1710.10903
Author's code: https://github.com/PetarV-/GAT
Pytorch implementation: https://github.com/Diego999/pyGAT
"""

import argparse
import numpy as np
13
import networkx as nx
14
15
16
import time
import torch
import torch.nn.functional as F
17
18
19
20
import dgl
from dgl.data import register_data_args
from dgl.data import CoraGraphDataset, CiteseerGraphDataset, PubmedGraphDataset

21
from gat import GAT
VoVAllen's avatar
VoVAllen committed
22
23
from utils import EarlyStopping

24
25
26
27
28
29

def accuracy(logits, labels):
    _, indices = torch.max(logits, dim=1)
    correct = torch.sum(indices == labels)
    return correct.item() * 1.0 / len(labels)

VoVAllen's avatar
VoVAllen committed
30

31
32
33
34
35
36
37
38
def evaluate(model, features, labels, mask):
    model.eval()
    with torch.no_grad():
        logits = model(features)
        logits = logits[mask]
        labels = labels[mask]
        return accuracy(logits, labels)

VoVAllen's avatar
VoVAllen committed
39

40
41
def main(args):
    # load and preprocess dataset
42
43
44
45
46
47
    if args.dataset == 'cora':
        data = CoraGraphDataset()
    elif args.dataset == 'citeseer':
        data = CiteseerGraphDataset()
    elif args.dataset == 'pubmed':
        data = PubmedGraphDataset()
48
    else:
49
50
51
52
53
54
55
        raise ValueError('Unknown dataset: {}'.format(args.dataset))

    g = data[0]
    if args.gpu < 0:
        cuda = False
    else:
        cuda = True
56
        g = g.int().to(args.gpu)
57
58
59
60
61
62

    features = g.ndata['feat']
    labels = g.ndata['label']
    train_mask = g.ndata['train_mask']
    val_mask = g.ndata['val_mask']
    test_mask = g.ndata['test_mask']
63
64
65
66
67
    num_feats = features.shape[1]
    n_classes = data.num_labels
    n_edges = data.graph.number_of_edges()
    print("""----Data statistics------'
      #Edges %d
68
      #Classes %d
69
70
71
72
      #Train samples %d
      #Val samples %d
      #Test samples %d""" %
          (n_edges, n_classes,
Zihao Ye's avatar
Zihao Ye committed
73
74
75
           train_mask.int().sum().item(),
           val_mask.int().sum().item(),
           test_mask.int().sum().item()))
76
77

    # add self loop
78
79
    g = dgl.remove_self_loop(g)
    g = dgl.add_self_loop(g)
80
    n_edges = g.number_of_edges()
81
    # create model
82
    heads = ([args.num_heads] * args.num_layers) + [args.num_out_heads]
83
84
85
86
87
    model = GAT(g,
                args.num_layers,
                num_feats,
                args.num_hidden,
                n_classes,
88
                heads,
89
90
91
                F.elu,
                args.in_drop,
                args.attn_drop,
92
                args.negative_slope,
93
94
                args.residual)
    print(model)
95
96
    if args.early_stop:
        stopper = EarlyStopping(patience=100)
97
98
99
100
101
    if cuda:
        model.cuda()
    loss_fcn = torch.nn.CrossEntropyLoss()

    # use optimizer
VoVAllen's avatar
VoVAllen committed
102
103
    optimizer = torch.optim.Adam(
        model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127

    # initialize graph
    dur = []
    for epoch in range(args.epochs):
        model.train()
        if epoch >= 3:
            t0 = time.time()
        # forward
        logits = model(features)
        loss = loss_fcn(logits[train_mask], labels[train_mask])

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if epoch >= 3:
            dur.append(time.time() - t0)

        train_acc = accuracy(logits[train_mask], labels[train_mask])

        if args.fastmode:
            val_acc = accuracy(logits[val_mask], labels[val_mask])
        else:
            val_acc = evaluate(model, features, labels, val_mask)
128
            if args.early_stop:
129
                if stopper.step(val_acc, model):
130
                    break
131
132
133
134
135
136
137

        print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | TrainAcc {:.4f} |"
              " ValAcc {:.4f} | ETputs(KTEPS) {:.2f}".
              format(epoch, np.mean(dur), loss.item(), train_acc,
                     val_acc, n_edges / np.mean(dur) / 1000))

    print()
138
139
    if args.early_stop:
        model.load_state_dict(torch.load('es_checkpoint.pt'))
140
141
142
    acc = evaluate(model, features, labels, test_mask)
    print("Test Accuracy {:.4f}".format(acc))

VoVAllen's avatar
VoVAllen committed
143

144
145
146
147
148
149
if __name__ == '__main__':

    parser = argparse.ArgumentParser(description='GAT')
    register_data_args(parser)
    parser.add_argument("--gpu", type=int, default=-1,
                        help="which GPU to use. Set -1 to use CPU.")
Minjie Wang's avatar
Minjie Wang committed
150
    parser.add_argument("--epochs", type=int, default=200,
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
                        help="number of training epochs")
    parser.add_argument("--num-heads", type=int, default=8,
                        help="number of hidden attention heads")
    parser.add_argument("--num-out-heads", type=int, default=1,
                        help="number of output attention heads")
    parser.add_argument("--num-layers", type=int, default=1,
                        help="number of hidden layers")
    parser.add_argument("--num-hidden", type=int, default=8,
                        help="number of hidden units")
    parser.add_argument("--residual", action="store_true", default=False,
                        help="use residual connection")
    parser.add_argument("--in-drop", type=float, default=.6,
                        help="input feature dropout")
    parser.add_argument("--attn-drop", type=float, default=.6,
                        help="attention dropout")
    parser.add_argument("--lr", type=float, default=0.005,
                        help="learning rate")
    parser.add_argument('--weight-decay', type=float, default=5e-4,
                        help="weight decay")
170
171
    parser.add_argument('--negative-slope', type=float, default=0.2,
                        help="the negative slope of leaky relu")
172
173
    parser.add_argument('--early-stop', action='store_true', default=False,
                        help="indicates whether to use early stop or not")
174
175
176
177
178
179
    parser.add_argument('--fastmode', action="store_true", default=False,
                        help="skip re-evaluate the validation set")
    args = parser.parse_args()
    print(args)

    main(args)