utils.py 3.59 KB
Newer Older
lt610's avatar
lt610 committed
1
2
3
import json
import os
from functools import namedtuple
4

Hongzhi (Steve), Chen's avatar
Hongzhi (Steve), Chen committed
5
6
import dgl

lt610's avatar
lt610 committed
7
import numpy as np
8
import scipy.sparse
lt610's avatar
lt610 committed
9
10
import torch
from sklearn.metrics import f1_score
11
12
from sklearn.preprocessing import StandardScaler

lt610's avatar
lt610 committed
13
14

class Logger(object):
15
16
    """A custom logger to log stdout to a logging file."""

lt610's avatar
lt610 committed
17
18
19
20
21
22
23
24
25
26
27
    def __init__(self, path):
        """Initialize the logger.

        Parameters
        ---------
        path : str
            The file path to be stored in.
        """
        self.path = path

    def write(self, s):
28
        with open(self.path, "a") as f:
lt610's avatar
lt610 committed
29
30
31
32
33
34
            f.write(str(s))
        print(s)
        return


def save_log_dir(args):
35
    log_dir = "./log/{}/{}".format(args.dataset, args.log_dir)
lt610's avatar
lt610 committed
36
37
38
39
40
41
42
43
44
45
    os.makedirs(log_dir, exist_ok=True)
    return log_dir


def calc_f1(y_true, y_pred, multilabel):
    if multilabel:
        y_pred[y_pred > 0] = 1
        y_pred[y_pred <= 0] = 0
    else:
        y_pred = np.argmax(y_pred, axis=1)
46
47
48
    return f1_score(y_true, y_pred, average="micro"), f1_score(
        y_true, y_pred, average="macro"
    )
lt610's avatar
lt610 committed
49
50
51
52
53
54
55
56


def evaluate(model, g, labels, mask, multilabel=False):
    model.eval()
    with torch.no_grad():
        logits = model(g)
        logits = logits[mask]
        labels = labels[mask]
57
58
59
        f1_mic, f1_mac = calc_f1(
            labels.cpu().numpy(), logits.cpu().numpy(), multilabel
        )
lt610's avatar
lt610 committed
60
61
62
63
64
        return f1_mic, f1_mac


# load data of GraphSAINT and convert them to the format of dgl
def load_data(args, multilabel):
65
    if not os.path.exists("graphsaintdata") and not os.path.exists("data"):
K's avatar
K committed
66
        raise ValueError("The directory graphsaintdata does not exist!")
67
68
    elif os.path.exists("graphsaintdata") and not os.path.exists("data"):
        os.rename("graphsaintdata", "data")
lt610's avatar
lt610 committed
69
    prefix = "data/{}".format(args.dataset)
70
    DataType = namedtuple("Dataset", ["num_classes", "train_nid", "g"])
lt610's avatar
lt610 committed
71

72
73
74
    adj_full = scipy.sparse.load_npz("./{}/adj_full.npz".format(prefix)).astype(
        np.bool
    )
lt610's avatar
lt610 committed
75
76
77
    g = dgl.from_scipy(adj_full)
    num_nodes = g.num_nodes()

78
79
80
    adj_train = scipy.sparse.load_npz(
        "./{}/adj_train.npz".format(prefix)
    ).astype(np.bool)
lt610's avatar
lt610 committed
81
82
    train_nid = np.array(list(set(adj_train.nonzero()[0])))

83
    role = json.load(open("./{}/role.json".format(prefix)))
lt610's avatar
lt610 committed
84
85
    mask = np.zeros((num_nodes,), dtype=bool)
    train_mask = mask.copy()
86
    train_mask[role["tr"]] = True
lt610's avatar
lt610 committed
87
    val_mask = mask.copy()
88
    val_mask[role["va"]] = True
lt610's avatar
lt610 committed
89
    test_mask = mask.copy()
90
    test_mask[role["te"]] = True
lt610's avatar
lt610 committed
91

92
    feats = np.load("./{}/feats.npy".format(prefix))
lt610's avatar
lt610 committed
93
94
95
96
    scaler = StandardScaler()
    scaler.fit(feats[train_nid])
    feats = scaler.transform(feats)

97
    class_map = json.load(open("./{}/class_map.json".format(prefix)))
lt610's avatar
lt610 committed
98
99
100
101
102
103
104
105
106
107
108
109
110
    class_map = {int(k): v for k, v in class_map.items()}
    if multilabel:
        # Multi-label binary classification
        num_classes = len(list(class_map.values())[0])
        class_arr = np.zeros((num_nodes, num_classes))
        for k, v in class_map.items():
            class_arr[k] = v
    else:
        num_classes = max(class_map.values()) - min(class_map.values()) + 1
        class_arr = np.zeros((num_nodes,))
        for k, v in class_map.items():
            class_arr[k] = v

111
112
113
114
115
116
117
    g.ndata["feat"] = torch.tensor(feats, dtype=torch.float)
    g.ndata["label"] = torch.tensor(
        class_arr, dtype=torch.float if multilabel else torch.long
    )
    g.ndata["train_mask"] = torch.tensor(train_mask, dtype=torch.bool)
    g.ndata["val_mask"] = torch.tensor(val_mask, dtype=torch.bool)
    g.ndata["test_mask"] = torch.tensor(test_mask, dtype=torch.bool)
lt610's avatar
lt610 committed
118
119
120

    data = DataType(g=g, num_classes=num_classes, train_nid=train_nid)
    return data