eval_circular.py 4.26 KB
Newer Older
jerrrrry's avatar
jerrrrry committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
from mmengine.config import read_base

from opencompass.datasets.circular import (
    CircularARCDataset, CircularCEvalDataset, CircularCMMLUDataset,
    CircularCSQADataset, CircularEvaluator, CircularHSWAGDataset,
    CircularMMLUDataset, CircularOBQADataset, CircularRaceDataset)
from opencompass.summarizers import CircularSummarizer

with read_base():
    from opencompass.configs.datasets.ARC_c.ARC_c_gen_1e0de5 import \
        ARC_c_datasets
    from opencompass.configs.datasets.ARC_e.ARC_e_gen_1e0de5 import \
        ARC_e_datasets
    from opencompass.configs.datasets.ceval.ceval_gen_5f30c7 import \
        ceval_datasets
    from opencompass.configs.datasets.cmmlu.cmmlu_gen_c13365 import \
        cmmlu_datasets
    from opencompass.configs.datasets.commonsenseqa.commonsenseqa_gen_1da2d0 import \
        commonsenseqa_datasets
    from opencompass.configs.datasets.hellaswag.hellaswag_gen_6faab5 import \
        hellaswag_datasets
    from opencompass.configs.datasets.mmlu.mmlu_gen_a484b3 import mmlu_datasets
    from opencompass.configs.datasets.obqa.obqa_gen_9069e4 import obqa_datasets
    from opencompass.configs.datasets.race.race_gen_69ee4f import race_datasets
    from opencompass.configs.models.hf_internlm.hf_internlm_chat_7b import \
        models as hf_internlm_chat_7b_model
    from opencompass.configs.models.hf_internlm.hf_internlm_chat_20b import \
        models as hf_internlm_chat_20b_model
    from opencompass.configs.models.qwen.hf_qwen_7b_chat import \
        models as hf_qwen_7b_chat_model
    from opencompass.configs.models.qwen.hf_qwen_14b_chat import \
        models as hf_qwen_14b_chat_model
    from opencompass.configs.summarizers.groups.ceval import \
        ceval_summary_groups
    from opencompass.configs.summarizers.groups.cmmlu import \
        cmmlu_summary_groups
    from opencompass.configs.summarizers.groups.mmlu import mmlu_summary_groups

for ds, t in [
    (ceval_datasets, CircularCEvalDataset),
    (mmlu_datasets, CircularMMLUDataset),
    (cmmlu_datasets, CircularCMMLUDataset),
    (hellaswag_datasets, CircularHSWAGDataset),
    (ARC_e_datasets, CircularARCDataset),
    (ARC_c_datasets, CircularARCDataset),
    (commonsenseqa_datasets, CircularCSQADataset),
    (obqa_datasets, CircularOBQADataset),
    (race_datasets, CircularRaceDataset),
]:
    for d in ds:
        d['type'] = t
        d['abbr'] = d['abbr'] + '-circular-4'
        d['eval_cfg']['evaluator'] = {
            'type': CircularEvaluator,
            'circular_pattern': 'circular'
        }
        d['circular_patterns'] = 'circular'

datasets = sum([
    v
    for k, v in locals().items() if k.endswith('_datasets') or k == 'datasets'
], [])
models = sum([v for k, v in locals().items() if k.endswith('_model')], [])

# config summarizer
other_summary_groups = [
    {
        'name':
        'average',
        'subsets': [
            'ceval', 'mmlu', 'cmmlu', 'hellaswag', 'ARC-e', 'ARC-c',
            'commonsense_qa', 'openbookqa_fact', 'race-middle', 'race-high'
        ]
    },
]
origin_summary_groups = sum(
    [v for k, v in locals().items() if k.endswith('_summary_groups')], [])
new_summary_groups = []
for item in origin_summary_groups:
    new_summary_groups.append({
        'name':
        item['name'] + '-circular-4',
        'subsets': [i + '-circular-4' for i in item['subsets']],
    })
summarizer = dict(
    type=CircularSummarizer,
    metric_types=['acc_origin', 'perf_circular'],
    dataset_abbrs=[
        'average-circular-4',
        'ceval-circular-4',
        'mmlu-circular-4',
        'cmmlu-circular-4',
        'hellaswag-circular-4',
        'ARC-e-circular-4',
        'ARC-c-circular-4',
        'commonsense_qa-circular-4',
        'openbookqa_fact-circular-4',
        'race-middle-circular-4',
        'race-high-circular-4',
        'ceval-humanities-circular-4',
        'ceval-stem-circular-4',
        'ceval-social-science-circular-4',
        'ceval-other-circular-4',
        'mmlu-humanities-circular-4',
        'mmlu-stem-circular-4',
        'mmlu-social-science-circular-4',
        'mmlu-other-circular-4',
        'cmmlu-humanities-circular-4',
        'cmmlu-stem-circular-4',
        'cmmlu-social-science-circular-4',
        'cmmlu-other-circular-4',
        'cmmlu-china-specific-circular-4',
    ],
    summary_groups=new_summary_groups,
)