compassbench_v1_reason.py 3.18 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
compassbench_v1_reason_groups = [
    {'name': 'reasonbench_cn_logic_circular', 'subsets': ['reasonbench_cn_abductive_alphanlg_translated_circular', 'reasonbench_cn_deductive_bbh3obj_translated_circular', 'reasonbench_cn_deductive_logiqa_zh_circular', 'reasonbench_cn_inductive_deer_translated_circular', 'reasonbench_cn_inductive_selfgenerated_circular']},
    {'name': 'reasonbench_en_logic_circular', 'subsets': ['reasonbench_en_abductive_alphanlg_circular', 'reasonbench_en_deductive_bbh7obj_circular', 'reasonbench_en_deductive_logiqa_zh_translated_circular', 'reasonbench_en_deductive_ocnli_translated_circular', 'reasonbench_en_inductive_deer_circular', 'reasonbench_en_inductive_selfgenerated_circular']},
    {'name': 'reasonbench', 'subsets': ['reasonbench_cn_commonsense_circular', 'reasonbench_cn_logic_circular', 'reasonbench_en_commonsense_circular', 'reasonbench_en_logic_circular']},
]

summarizer = dict(
    dataset_abbrs=[
        ['reasonbench', 'acc_origin'],
        ['reasonbench_cn_commonsense_circular', 'acc_origin'],
        ['reasonbench_en_commonsense_circular', 'acc_origin'],
        ['reasonbench_cn_logic_circular', 'acc_origin'],
        ['reasonbench_en_logic_circular', 'acc_origin'],
        ['reasonbench_cn_abductive_alphanlg_translated_circular', 'acc_origin'],
        ['reasonbench_cn_deductive_bbh3obj_translated_circular', 'acc_origin'],
        ['reasonbench_cn_deductive_logiqa_zh_circular', 'acc_origin'],
        ['reasonbench_cn_inductive_deer_translated_circular', 'acc_origin'],
        ['reasonbench_cn_inductive_selfgenerated_circular', 'acc_origin'],
        ['reasonbench_en_abductive_alphanlg_circular', 'acc_origin'],
        ['reasonbench_en_deductive_bbh7obj_circular', 'acc_origin'],
        ['reasonbench_en_deductive_logiqa_zh_translated_circular', 'acc_origin'],
        ['reasonbench_en_deductive_ocnli_translated_circular', 'acc_origin'],
        ['reasonbench_en_inductive_deer_circular', 'acc_origin'],
        ['reasonbench_en_inductive_selfgenerated_circular', 'acc_origin'],

        ['reasonbench', 'perf_circular'],
        ['reasonbench_cn_commonsense_circular', 'perf_circular'],
        ['reasonbench_en_commonsense_circular', 'perf_circular'],
        ['reasonbench_cn_logic_circular', 'perf_circular'],
        ['reasonbench_en_logic_circular', 'perf_circular'],
        ['reasonbench_cn_abductive_alphanlg_translated_circular', 'perf_circular'],
        ['reasonbench_cn_deductive_bbh3obj_translated_circular', 'perf_circular'],
        ['reasonbench_cn_deductive_logiqa_zh_circular', 'perf_circular'],
        ['reasonbench_cn_inductive_deer_translated_circular', 'perf_circular'],
        ['reasonbench_cn_inductive_selfgenerated_circular', 'perf_circular'],
        ['reasonbench_en_abductive_alphanlg_circular', 'perf_circular'],
        ['reasonbench_en_deductive_bbh7obj_circular', 'perf_circular'],
        ['reasonbench_en_deductive_logiqa_zh_translated_circular', 'perf_circular'],
        ['reasonbench_en_deductive_ocnli_translated_circular', 'perf_circular'],
        ['reasonbench_en_inductive_deer_circular', 'perf_circular'],
        ['reasonbench_en_inductive_selfgenerated_circular', 'perf_circular'],
    ],
    summary_groups=compassbench_v1_reason_groups,
)