hrbench.py 1.46 KB
Newer Older
luopl's avatar
luopl committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
from ...smp import *
import os


def report_acc_hrbench(df):
    cycle_group = df.groupby('cycle_category')
    result_dic = defaultdict(list)
    avg_dic = defaultdict(int)

    count = 0
    for key, data_value in cycle_group:
        count += 1
        _, resp_dic = hrbench_score(data_value)

        for task_type, accuracy in resp_dic.items():
            result_dic['cycle'].append(key)
            result_dic['type'].append(task_type)
            result_dic['accuracy'].append(accuracy)

            avg_dic[task_type] += accuracy
    for task_type, accuracy in avg_dic.items():
        result_dic['cycle'].append('Average')
        result_dic['type'].append(task_type)
        result_dic['accuracy'].append(accuracy / count)
    result_pd = pd.DataFrame(result_dic)

    return result_pd


def hrbench_score(data):
    ret = defaultdict(list)
    resp_dic = {}
    category_list = set(data['category'])
    score_dict = defaultdict(list)

    for i in range(len(data)):
        d = data.iloc[i]
        category = d['category']
        gpt_score = d['hit']
        score_dict[category].append(gpt_score)
        score_dict['all'].append(gpt_score)

    all_acc = np.mean(score_dict['all'])
    ret['type'].append('all')
    ret['acc'].append(all_acc)
    resp_dic['all'] = all_acc
    for cate in category_list:
        acc = np.mean(score_dict[cate])
        ret['type'].append(cate)
        ret['acc'].append(acc)

        resp_dic[cate] = acc

    return pd.DataFrame(ret), resp_dic