cibench.py 2.5 KB
Newer Older
1
2
3
4
5
6
7
from mmengine.config import read_base

with read_base():
    from .groups.cibench import cibench_summary_groups

summarizer = dict(
    dataset_abbrs=[
klein's avatar
klein committed
8
9
10
11
12
13
14
15
16
17
18
19
        '######## CIBench Generation########', # category
        'cibench_generation:tool_rate',
        'cibench_generation:executable',
        'cibench_generation:numeric_correct',
        'cibench_generation:text_score',
        'cibench_generation:vis_sim',
        '######## CIBench Generation Oracle########', # category
        'cibench_generation_oracle:tool_rate',
        'cibench_generation_oracle:executable',
        'cibench_generation_oracle:numeric_correct',
        'cibench_generation_oracle:text_score',
        'cibench_generation_oracle:vis_sim',
20
        '######## CIBench Template ########', # category
klein's avatar
klein committed
21
        'cibench_template:tool_rate',
22
23
24
25
        'cibench_template:executable',
        'cibench_template:numeric_correct',
        'cibench_template:text_score',
        'cibench_template:vis_sim',
klein's avatar
klein committed
26
27
28
29
30
31
        '######## CIBench Template Oracle########', # category
        'cibench_template_oracle:tool_rate',
        'cibench_template_oracle:executable',
        'cibench_template_oracle:numeric_correct',
        'cibench_template_oracle:text_score',
        'cibench_template_oracle:vis_sim',
32
        '######## CIBench Template Chinese ########', # category
klein's avatar
klein committed
33
        'cibench_template_cn:tool_rate',
34
35
36
37
        'cibench_template_cn:executable',
        'cibench_template_cn:numeric_correct',
        'cibench_template_cn:text_score',
        'cibench_template_cn:vis_sim',
klein's avatar
klein committed
38
39
40
41
42
43
        '######## CIBench Template Chinese Oracle########', # category
        'cibench_template_cn_oracle:tool_rate',
        'cibench_template_cn_oracle:executable',
        'cibench_template_cn_oracle:numeric_correct',
        'cibench_template_cn_oracle:text_score',
        'cibench_template_cn_oracle:vis_sim',
44
        '######## CIBench Category Metric ########',
klein's avatar
klein committed
45
46
47
48
49
50
        'cibench_data_manipulation:scores',
        'cibench_data_visualization:scores',
        'cibench_modeling:scores',
        'cibench_nlp:scores',
        'cibench_ip:scores',
        'cibench_math:scores',
51
        '######## CIBench Category Metric Oracle ########',
klein's avatar
klein committed
52
53
54
55
56
57
58
        'cibench_data_manipulation_oracle:scores',
        'cibench_data_visualization_oracle:scores',
        'cibench_modeling_oracle:scores',
        'cibench_nlp_oracle:scores',
        'cibench_ip_oracle:scores',
        'cibench_math_oracle:scores',

59
60
    ],
    summary_groups=sum(
61
62
        [v for k, v in locals().items() if k.endswith('_summary_groups')], [])
)