mathbench_v1.py 2.28 KB
Newer Older
1
2
summarizer = dict(
    dataset_abbrs=[
Fengzhe Zhou's avatar
Fengzhe Zhou committed
3
        '######## MathBench Application Accuracy ########', # category
4
5
6
7
8
9
10
11
        ['mathbench-college-single_choice_cn', 'acc_1'],
        ['mathbench-college-single_choice_en', 'acc_1'],
        ['mathbench-high-single_choice_cn', 'acc_1'],
        ['mathbench-high-single_choice_en', 'acc_1'],
        ['mathbench-middle-single_choice_cn', 'acc_1'],
        ['mathbench-middle-single_choice_en', 'acc_1'],
        ['mathbench-primary-cloze_cn', 'accuracy'],
        ['mathbench-primary-cloze_en', 'accuracy'],
Fengzhe Zhou's avatar
Fengzhe Zhou committed
12
13
        ['mathbench-arithmetic-cloze_en', 'accuracy'],
        '######## MathBench Application CircularEval ########', # category
14
15
16
17
18
19
        ['mathbench-college-single_choice_cn', 'perf_4'],
        ['mathbench-college-single_choice_en', 'perf_4'],
        ['mathbench-high-single_choice_cn', 'perf_4'],
        ['mathbench-high-single_choice_en', 'perf_4'],
        ['mathbench-middle-single_choice_cn', 'perf_4'],
        ['mathbench-middle-single_choice_en', 'perf_4'],
Fengzhe Zhou's avatar
Fengzhe Zhou committed
20
        '######## MathBench Knowledge CircularEval ########', # category
Fengzhe Zhou's avatar
Fengzhe Zhou committed
21
22
23
24
25
26
27
28
        ['mathbench-college_knowledge-single_choice_cn', 'perf_4'],
        ['mathbench-college_knowledge-single_choice_en', 'perf_4'],
        ['mathbench-high_knowledge-single_choice_cn', 'perf_4'],
        ['mathbench-high_knowledge-single_choice_en', 'perf_4'],
        ['mathbench-middle_knowledge-single_choice_cn', 'perf_4'],
        ['mathbench-middle_knowledge-single_choice_en', 'perf_4'],
        ['mathbench-primary_knowledge-single_choice_cn', 'perf_4'],
        ['mathbench-primary_knowledge-single_choice_en', 'perf_4'],
Fengzhe Zhou's avatar
Fengzhe Zhou committed
29
30
31
32
33
34
35
36
37
        '######## MathBench Knowledge Accuracy ########', # category
        ['mathbench-college_knowledge-single_choice_cn', 'acc_1'],
        ['mathbench-college_knowledge-single_choice_en', 'acc_1'],
        ['mathbench-high_knowledge-single_choice_cn', 'acc_1'],
        ['mathbench-high_knowledge-single_choice_en', 'acc_1'],
        ['mathbench-middle_knowledge-single_choice_cn', 'acc_1'],
        ['mathbench-middle_knowledge-single_choice_en', 'acc_1'],
        ['mathbench-primary_knowledge-single_choice_cn', 'acc_1'],
        ['mathbench-primary_knowledge-single_choice_en', 'acc_1'],
38
39
40
41
    ],
    summary_groups=sum(
        [v for k, v in locals().items() if k.endswith("_summary_groups")], [])
)