compassbench_v1_reason.py 4.27 KB
Newer Older
1
compassbench_v1_reason_groups = [
2
3
4
5
6
7
8
9
10
    {'name': 'reasonbench_cn_abductive_circular', 'subsets': ['reasonbench_cn_abductive_alphanlg_translated_circular']},
    {'name': 'reasonbench_en_abductive_circular', 'subsets': ['reasonbench_en_abductive_alphanlg_circular']},
    {'name': 'reasonbench_cn_deductive_circular', 'subsets': ['reasonbench_cn_deductive_bbh3obj_translated_circular', 'reasonbench_cn_deductive_logiqa_zh_circular']},
    {'name': 'reasonbench_cn_inductive_circular', 'subsets': ['reasonbench_cn_inductive_deer_translated_circular', 'reasonbench_cn_inductive_selfgenerated_circular']},
    {'name': 'reasonbench_en_inductive_circular', 'subsets': ['reasonbench_en_inductive_deer_circular', 'reasonbench_en_inductive_selfgenerated_circular']},

    {'name': 'reasonbench_cn_circular', 'subsets': ['reasonbench_cn_commonsense_circular', 'reasonbench_cn_abductive_circular', 'reasonbench_cn_deductive_circular', 'reasonbench_cn_inductive_circular']},
    {'name': 'reasonbench_en_circular', 'subsets': ['reasonbench_en_commonsense_circular', 'reasonbench_en_abductive_circular', 'reasonbench_en_deductive_logiqa_zh_translated_circular', 'reasonbench_en_inductive_circular']},
    {'name': 'reasonbench', 'subsets': ['reasonbench_cn_circular', 'reasonbench_en_circular']},
11
12
13
14
15
]

summarizer = dict(
    dataset_abbrs=[
        ['reasonbench', 'acc_origin'],
16
17
18
        ['reasonbench_cn_circular', 'acc_origin'],
        ['reasonbench_en_circular', 'acc_origin'],

19
        ['reasonbench_cn_commonsense_circular', 'acc_origin'],
20
21
22
        ['reasonbench_cn_abductive_circular', 'acc_origin'],
        ['reasonbench_cn_deductive_circular', 'acc_origin'],
        ['reasonbench_cn_inductive_circular', 'acc_origin'],
23
        ['reasonbench_en_commonsense_circular', 'acc_origin'],
24
25
26
27
28
        ['reasonbench_en_abductive_circular', 'acc_origin'],
        ['reasonbench_en_deductive_logiqa_zh_translated_circular', 'acc_origin'],
        ['reasonbench_en_inductive_circular', 'acc_origin'],

        ['reasonbench_cn_commonsense_circular', 'acc_origin'],
29
30
31
32
33
        ['reasonbench_cn_abductive_alphanlg_translated_circular', 'acc_origin'],
        ['reasonbench_cn_deductive_bbh3obj_translated_circular', 'acc_origin'],
        ['reasonbench_cn_deductive_logiqa_zh_circular', 'acc_origin'],
        ['reasonbench_cn_inductive_deer_translated_circular', 'acc_origin'],
        ['reasonbench_cn_inductive_selfgenerated_circular', 'acc_origin'],
34
        ['reasonbench_en_commonsense_circular', 'acc_origin'],
35
36
37
38
39
        ['reasonbench_en_abductive_alphanlg_circular', 'acc_origin'],
        ['reasonbench_en_deductive_logiqa_zh_translated_circular', 'acc_origin'],
        ['reasonbench_en_inductive_deer_circular', 'acc_origin'],
        ['reasonbench_en_inductive_selfgenerated_circular', 'acc_origin'],

40

41
        ['reasonbench', 'perf_circular'],
42
43
44
        ['reasonbench_cn_circular', 'perf_circular'],
        ['reasonbench_en_circular', 'perf_circular'],

45
        ['reasonbench_cn_commonsense_circular', 'perf_circular'],
46
47
48
        ['reasonbench_cn_abductive_circular', 'perf_circular'],
        ['reasonbench_cn_deductive_circular', 'perf_circular'],
        ['reasonbench_cn_inductive_circular', 'perf_circular'],
49
        ['reasonbench_en_commonsense_circular', 'perf_circular'],
50
51
52
53
54
        ['reasonbench_en_abductive_circular', 'perf_circular'],
        ['reasonbench_en_deductive_logiqa_zh_translated_circular', 'perf_circular'],
        ['reasonbench_en_inductive_circular', 'perf_circular'],

        ['reasonbench_cn_commonsense_circular', 'perf_circular'],
55
56
57
58
59
        ['reasonbench_cn_abductive_alphanlg_translated_circular', 'perf_circular'],
        ['reasonbench_cn_deductive_bbh3obj_translated_circular', 'perf_circular'],
        ['reasonbench_cn_deductive_logiqa_zh_circular', 'perf_circular'],
        ['reasonbench_cn_inductive_deer_translated_circular', 'perf_circular'],
        ['reasonbench_cn_inductive_selfgenerated_circular', 'perf_circular'],
60
        ['reasonbench_en_commonsense_circular', 'perf_circular'],
61
62
63
64
65
66
67
        ['reasonbench_en_abductive_alphanlg_circular', 'perf_circular'],
        ['reasonbench_en_deductive_logiqa_zh_translated_circular', 'perf_circular'],
        ['reasonbench_en_inductive_deer_circular', 'perf_circular'],
        ['reasonbench_en_inductive_selfgenerated_circular', 'perf_circular'],
    ],
    summary_groups=compassbench_v1_reason_groups,
)