main_early_stop.py 7.18 KB
Newer Older
1
2
3
import json
import os
from datetime import datetime
4
from time import time
5

Hongzhi (Steve), Chen's avatar
Hongzhi (Steve), Chen committed
6
7
import dgl

8
9
import torch
import torch.nn.functional as F
10
from data_preprocess import degree_as_feature, node_label_as_feature
Hongzhi (Steve), Chen's avatar
Hongzhi (Steve), Chen committed
11
12
from dgl.data import LegacyTUDataset
from dgl.dataloading import GraphDataLoader
13
from networks import GraphClassifier
14
15
from torch import Tensor
from torch.utils.data import random_split
16
from utils import get_stats, parse_args
17

18

19
20
21
22
23
24
25
26
27
def compute_loss(
    cls_logits: Tensor,
    labels: Tensor,
    logits_s1: Tensor,
    logits_s2: Tensor,
    epoch: int,
    total_epochs: int,
    device: torch.device,
):
28
29
30
31
32
33
34
35
36
37
38
39
40
    # classification loss
    classify_loss = F.nll_loss(cls_logits, labels.to(device))

    # loss for vertex infomax pooling
    scale1, scale2 = logits_s1.size(0) // 2, logits_s2.size(0) // 2
    s1_label_t, s1_label_f = torch.ones(scale1), torch.zeros(scale1)
    s2_label_t, s2_label_f = torch.ones(scale2), torch.zeros(scale2)
    s1_label = torch.cat((s1_label_t, s1_label_f), dim=0).to(device)
    s2_label = torch.cat((s2_label_t, s2_label_f), dim=0).to(device)

    pool_loss_s1 = F.binary_cross_entropy_with_logits(logits_s1, s1_label)
    pool_loss_s2 = F.binary_cross_entropy_with_logits(logits_s2, s2_label)
    pool_loss = (pool_loss_s1 + pool_loss_s2) / 2
41

42
43
44
45
46
    loss = classify_loss + (2 - epoch / total_epochs) * pool_loss

    return loss


47
48
49
50
51
52
53
54
def train(
    model: torch.nn.Module,
    optimizer,
    trainloader,
    device,
    curr_epoch,
    total_epochs,
):
55
56
    model.train()

57
    total_loss = 0.0
58
59
60
61
62
63
64
    num_batches = len(trainloader)

    for batch in trainloader:
        optimizer.zero_grad()
        batch_graphs, batch_labels = batch
        batch_graphs = batch_graphs.to(device)
        batch_labels = batch_labels.long().to(device)
65
66
67
68
        out, l1, l2 = model(batch_graphs, batch_graphs.ndata["feat"])
        loss = compute_loss(
            out, batch_labels, l1, l2, curr_epoch, total_epochs, device
        )
69
70
71
72
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
73

74
75
76
77
    return total_loss / num_batches


@torch.no_grad()
78
def test(model: torch.nn.Module, loader, device):
79
80
    model.eval()

81
    correct = 0.0
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
    num_graphs = 0

    for batch in loader:
        batch_graphs, batch_labels = batch
        num_graphs += batch_labels.size(0)
        batch_graphs = batch_graphs.to(device)
        batch_labels = batch_labels.long().to(device)
        out, _, _ = model(batch_graphs, batch_graphs.ndata["feat"])
        pred = out.argmax(dim=1)
        correct += pred.eq(batch_labels).sum().item()

    return correct / num_graphs


@torch.no_grad()
97
def validate(model: torch.nn.Module, loader, device, curr_epoch, total_epochs):
98
99
    model.eval()

100
101
    tt_loss = 0.0
    correct = 0.0
102
103
104
105
106
107
108
109
110
    num_graphs = 0
    num_batchs = len(loader)

    for batch in loader:
        batch_graphs, batch_labels = batch
        num_graphs += batch_labels.size(0)
        batch_graphs = batch_graphs.to(device)
        batch_labels = batch_labels.long().to(device)
        out, l1, l2 = model(batch_graphs, batch_graphs.ndata["feat"])
111
112
113
        tt_loss += compute_loss(
            out, batch_labels, l1, l2, curr_epoch, total_epochs, device
        ).item()
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
        pred = out.argmax(dim=1)
        correct += pred.eq(batch_labels).sum().item()

    return correct / num_graphs, tt_loss / num_batchs


def main(args):
    # Step 1: Prepare graph data and retrieve train/validation/test index ============================= #
    dataset = LegacyTUDataset(args.dataset, raw_dir=args.dataset_path)

    # add self loop. We add self loop for each graph here since the function "add_self_loop" does not
    # support batch graph.
    for i in range(len(dataset)):
        dataset.graph_lists[i] = dgl.remove_self_loop(dataset.graph_lists[i])
        dataset.graph_lists[i] = dgl.add_self_loop(dataset.graph_lists[i])
129

130
131
132
133
134
135
136
137
138
139
140
    # use degree as node feature
    if args.degree_as_feature:
        dataset = degree_as_feature(dataset)
        mode = "concat"
    else:
        mode = "replace"
    dataset = node_label_as_feature(dataset, mode=mode)

    num_training = int(len(dataset) * 0.8)
    num_val = int(len(dataset) * 0.1)
    num_test = len(dataset) - num_training - num_val
141
142
143
144
145
146
147
148
149
150
151
152
153
    train_set, val_set, test_set = random_split(
        dataset, [num_training, num_val, num_test]
    )

    train_loader = GraphDataLoader(
        train_set, batch_size=args.batch_size, shuffle=True, num_workers=1
    )
    val_loader = GraphDataLoader(
        val_set, batch_size=args.batch_size, num_workers=1
    )
    test_loader = GraphDataLoader(
        test_set, batch_size=args.batch_size, num_workers=1
    )
154
155

    device = torch.device(args.device)
156

157
158
159
160
    # Step 2: Create model =================================================================== #
    num_feature, num_classes, _ = dataset.statistics()
    args.in_dim = int(num_feature)
    args.out_dim = int(num_classes)
161
162
    args.edge_feat_dim = 0  # No edge feature in datasets that we use.

163
164
165
    model = GraphClassifier(args).to(device)

    # Step 3: Create training components ===================================================== #
166
167
168
169
170
171
    optimizer = torch.optim.Adam(
        model.parameters(),
        lr=args.lr,
        amsgrad=True,
        weight_decay=args.weight_decay,
    )
172
173
174
175
176
177
178
179
180
181

    # Step 4: training epoches =============================================================== #
    best_test_acc = 0.0
    best_epoch = -1
    train_times = []

    bad_count = 0
    best_val_loss = float("inf")
    for e in range(args.epochs):
        s_time = time()
182
183
184
        train_loss = train(
            model, optimizer, train_loader, device, e, args.epochs
        )
185
186
187
188
189
190
191
192
193
194
195
        train_times.append(time() - s_time)
        _, val_loss = validate(model, val_loader, device, e, args.epochs)
        test_acc = test(model, test_loader, device)

        if best_val_loss > val_loss:
            best_val_loss = val_loss
            best_epoch = e
            bad_count = 0
            best_test_acc = test_acc
        else:
            bad_count += 1
196

197
198
199
200
        if bad_count > args.patience:
            break

        if (e + 1) % args.print_every == 0:
201
202
203
            log_format = (
                "Epoch {}: loss={:.4f}, test_acc={:.4f}, best_test_acc={:.4f}"
            )
204
            print(log_format.format(e + 1, train_loss, test_acc, best_test_acc))
205
206
207
    print(
        "Best Epoch {}, final test acc {:.4f}".format(best_epoch, best_test_acc)
    )
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
    return best_test_acc, sum(train_times) / len(train_times)


if __name__ == "__main__":
    args = parse_args()
    res = []
    train_times = []
    for i in range(args.num_trials):
        print("Trial {}/{}".format(i + 1, args.num_trials))
        acc, train_time = main(args)
        # acc, train_time = 0, 0
        res.append(acc)
        train_times.append(train_time)

    mean, err_bd = get_stats(res, conf_interval=False)
    print("mean acc: {:.4f}, error bound: {:.4f}".format(mean, err_bd))

225
226
227
228
229
230
231
232
233
234
235
    out_dict = {
        "hyper-parameters": vars(args),
        "result_date": str(datetime.now()),
        "result": "{:.4f}(+-{:.4f})".format(mean, err_bd),
        "train_time": "{:.4f}".format(sum(train_times) / len(train_times)),
        "details": res,
    }

    with open(
        os.path.join(args.output_path, "{}.log".format(args.dataset)), "w"
    ) as f:
236
        json.dump(out_dict, f, sort_keys=True, indent=4)