classify.py 2.11 KB
Newer Older
1
2
from statistics import mean

3
4
5
import torch
import torch.nn as nn
import torch.nn.functional as F
6

7
8

class LogisticRegressionClassifier(nn.Module):
9
10
    """Define a logistic regression classifier to evaluate the quality of embedding results"""

11
12
13
14
15
16
17
    def __init__(self, nfeat, nclass):
        super(LogisticRegressionClassifier, self).__init__()
        self.lrc = nn.Linear(nfeat, nclass)

    def forward(self, x):
        preds = self.lrc(x)
        return preds
18
19


20
21
22
23
24
25
26
27
def _evaluate(model, features, labels, test_mask):
    model.eval()
    with torch.no_grad():
        logits = model(features)
        logits = logits[test_mask]
        labels = labels[test_mask]
        _, indices = torch.max(logits, dim=1)
        correct = torch.sum(indices == labels)
28
29
30
        return correct.item() * 1.0 / len(labels)


31
def _train_test_with_lrc(model, features, labels, train_mask, test_mask):
32
    """Under the pre-defined balanced train/test label setting, train a lrc to evaluate the embedding results."""
33
34
35
36
37
38
39
40
    optimizer = torch.optim.Adam(model.parameters(), lr=0.2, weight_decay=5e-06)
    for _ in range(100):
        model.train()
        optimizer.zero_grad()
        output = model(features)
        loss_train = F.cross_entropy(output[train_mask], labels[train_mask])
        loss_train.backward()
        optimizer.step()
41
42
43
44
    return _evaluate(
        model=model, features=features, labels=labels, test_mask=test_mask
    )

45

46
47
48
49
50
51
def evaluate_embeds(
    features, labels, train_mask, test_mask, n_classes, cuda, test_times=10
):
    print(
        "Training a logistic regression classifier with the pre-defined train/test split setting ..."
    )
52
53
    res_list = []
    for _ in range(test_times):
54
55
56
        model = LogisticRegressionClassifier(
            nfeat=features.shape[1], nclass=n_classes
        )
57
        if cuda:
58
59
60
61
62
63
64
65
            model.cuda()
        res = _train_test_with_lrc(
            model=model,
            features=features,
            labels=labels,
            train_mask=train_mask,
            test_mask=test_mask,
        )
66
67
        res_list.append(res)
    return mean(res_list)