evaluate.py 2.33 KB
Newer Older
1
2
3
4
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import argparse
5
import inspect
6

7
import numpy as np
8
from clustering_benchmark import ClusteringBenchmark
9
10
from utils import TextColors, Timer, metrics

11
12
13
14
15
16
17
18
19
20
21
22

def _read_meta(fn):
    labels = list()
    lb_set = set()
    with open(fn) as f:
        for lb in f.readlines():
            lb = int(lb.strip())
            labels.append(lb)
            lb_set.add(lb)
    return np.array(labels), lb_set


23
def evaluate(gt_labels, pred_labels, metric="pairwise"):
24
    if isinstance(gt_labels, str) and isinstance(pred_labels, str):
25
26
        print("[gt_labels] {}".format(gt_labels))
        print("[pred_labels] {}".format(pred_labels))
27
28
29
        gt_labels, gt_lb_set = _read_meta(gt_labels)
        pred_labels, pred_lb_set = _read_meta(pred_labels)

30
31
32
33
34
35
        print(
            "#inst: gt({}) vs pred({})".format(len(gt_labels), len(pred_labels))
        )
        print(
            "#cls: gt({}) vs pred({})".format(len(gt_lb_set), len(pred_lb_set))
        )
36
37
38

    metric_func = metrics.__dict__[metric]

39
40
41
    with Timer(
        "evaluate with {}{}{}".format(TextColors.FATAL, metric, TextColors.ENDC)
    ):
42
43
        result = metric_func(gt_labels, pred_labels)
    if isinstance(result, np.float):
44
45
46
47
48
        print(
            "{}{}: {:.4f}{}".format(
                TextColors.OKGREEN, metric, result, TextColors.ENDC
            )
        )
49
50
    else:
        ave_pre, ave_rec, fscore = result
51
52
53
54
55
56
        print(
            "{}ave_pre: {:.4f}, ave_rec: {:.4f}, fscore: {:.4f}{}".format(
                TextColors.OKGREEN, ave_pre, ave_rec, fscore, TextColors.ENDC
            )
        )

57
58

def evaluation(pred_labels, labels, metrics):
59
60
    print("==> evaluation")
    # pred_labels = g.ndata['pred_labels'].cpu().numpy()
61
    max_cluster = np.max(pred_labels)
62
    # gt_labels_all = g.ndata['labels'].cpu().numpy()
63
64
    gt_labels_all = labels
    pred_labels_all = pred_labels
65
    metric_list = metrics.split(",")
66
67
68
69
70
71
72
73
74
75
76
77
    for metric in metric_list:
        evaluate(gt_labels_all, pred_labels_all, metric)
    # H and C-scores
    gt_dict = {}
    pred_dict = {}
    for i in range(len(gt_labels_all)):
        gt_dict[str(i)] = gt_labels_all[i]
        pred_dict[str(i)] = pred_labels_all[i]
    bm = ClusteringBenchmark(gt_dict)
    scores = bm.evaluate_vmeasure(pred_dict)
    fmi_scores = bm.evaluate_fowlkes_mallows_score(pred_dict)
    print(scores)