model.py 3.98 KB
Newer Older
Hongzhi (Steve), Chen's avatar
Hongzhi (Steve), Chen committed
1
2
import dgl
import dgl.nn as dglnn
3
4
import sklearn.linear_model as lm
import sklearn.metrics as skm
5
6
import torch as th
import torch.functional as F
7
8
9
import torch.nn as nn
import tqdm

10
11

class SAGE(nn.Module):
12
13
14
    def __init__(
        self, in_feats, n_hidden, n_classes, n_layers, activation, dropout
    ):
15
16
17
        super().__init__()
        self.init(in_feats, n_hidden, n_classes, n_layers, activation, dropout)

18
19
20
    def init(
        self, in_feats, n_hidden, n_classes, n_layers, activation, dropout
    ):
21
22
23
24
        self.n_layers = n_layers
        self.n_hidden = n_hidden
        self.n_classes = n_classes
        self.layers = nn.ModuleList()
25
        if n_layers > 1:
26
            self.layers.append(dglnn.SAGEConv(in_feats, n_hidden, "mean"))
27
            for i in range(1, n_layers - 1):
28
29
                self.layers.append(dglnn.SAGEConv(n_hidden, n_hidden, "mean"))
            self.layers.append(dglnn.SAGEConv(n_hidden, n_classes, "mean"))
30
        else:
31
            self.layers.append(dglnn.SAGEConv(in_feats, n_classes, "mean"))
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
        self.dropout = nn.Dropout(dropout)
        self.activation = activation

    def forward(self, blocks, x):
        h = x
        for l, (layer, block) in enumerate(zip(self.layers, blocks)):
            h = layer(block, h)
            if l != len(self.layers) - 1:
                h = self.activation(h)
                h = self.dropout(h)
        return h

    def inference(self, g, x, device, batch_size, num_workers):
        """
        Inference with the GraphSAGE model on full neighbors (i.e. without neighbor sampling).
        g : the entire graph.
        x : the input of entire node set.

        The inference code is written in a fashion that it could handle any number of nodes and
        layers.
        """
        # During inference with sampling, multi-layer blocks are very inefficient because
        # lots of computations in the first few layers are repeated.
        # Therefore, we compute the representation of all nodes layer by layer.  The nodes
        # on each layer are of course splitted in batches.
        # TODO: can we standardize this?
        for l, layer in enumerate(self.layers):
59
60
61
62
            y = th.zeros(
                g.num_nodes(),
                self.n_hidden if l != len(self.layers) - 1 else self.n_classes,
            )
63
64

            sampler = dgl.dataloading.MultiLayerFullNeighborSampler(1)
65
            dataloader = dgl.dataloading.DataLoader(
66
67
68
                g,
                th.arange(g.num_nodes()).to(g.device),
                sampler,
69
                device=device if num_workers == 0 else None,
70
                batch_size=batch_size,
71
                shuffle=False,
72
                drop_last=False,
73
74
                num_workers=num_workers,
            )
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90

            for input_nodes, output_nodes, blocks in tqdm.tqdm(dataloader):
                block = blocks[0]

                block = block.int().to(device)
                h = x[input_nodes].to(device)
                h = layer(block, h)
                if l != len(self.layers) - 1:
                    h = self.activation(h)
                    h = self.dropout(h)

                y[output_nodes] = h.cpu()

            x = y
        return y

91

92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
def compute_acc_unsupervised(emb, labels, train_nids, val_nids, test_nids):
    """
    Compute the accuracy of prediction given the labels.
    """
    emb = emb.cpu().numpy()
    labels = labels.cpu().numpy()
    train_nids = train_nids.cpu().numpy()
    train_labels = labels[train_nids]
    val_nids = val_nids.cpu().numpy()
    val_labels = labels[val_nids]
    test_nids = test_nids.cpu().numpy()
    test_labels = labels[test_nids]

    emb = (emb - emb.mean(0, keepdims=True)) / emb.std(0, keepdims=True)

107
    lr = lm.LogisticRegression(multi_class="multinomial", max_iter=10000)
108
109
110
    lr.fit(emb[train_nids], train_labels)

    pred = lr.predict(emb)
111
112
    f1_micro_eval = skm.f1_score(val_labels, pred[val_nids], average="micro")
    f1_micro_test = skm.f1_score(test_labels, pred[test_nids], average="micro")
113
    return f1_micro_eval, f1_micro_test