mmlu_zero_shot_gen_47e2c0.py 3.65 KB
Newer Older
Fengzhe Zhou's avatar
Fengzhe Zhou committed
1
2
3
4
5
6
7
8
9
10
11
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import FixKRetriever, ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import MMLUDataset
from opencompass.utils.text_postprocessors import first_option_postprocess

# None of the mmlu dataset in huggingface is correctly parsed, so we use our own dataset reader
# Please download the dataset from https://people.eecs.berkeley.edu/~hendrycks/data.tar

mmlu_reader_cfg = dict(
12
13
    input_columns=['input', 'A', 'B', 'C', 'D'],
    output_column='target',
Fengzhe Zhou's avatar
Fengzhe Zhou committed
14
15
16
    train_split='dev')

mmlu_all_sets = [
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
    'college_biology',
    'college_chemistry',
    'college_computer_science',
    'college_mathematics',
    'college_physics',
    'electrical_engineering',
    'astronomy',
    'anatomy',
    'abstract_algebra',
    'machine_learning',
    'clinical_knowledge',
    'global_facts',
    'management',
    'nutrition',
    'marketing',
    'professional_accounting',
    'high_school_geography',
    'international_law',
    'moral_scenarios',
    'computer_security',
    'high_school_microeconomics',
    'professional_law',
    'medical_genetics',
    'professional_psychology',
    'jurisprudence',
    'world_religions',
    'philosophy',
    'virology',
    'high_school_chemistry',
    'public_relations',
    'high_school_macroeconomics',
    'human_sexuality',
    'elementary_mathematics',
    'high_school_physics',
    'high_school_computer_science',
    'high_school_european_history',
    'business_ethics',
    'moral_disputes',
    'high_school_statistics',
    'miscellaneous',
    'formal_logic',
    'high_school_government_and_politics',
    'prehistory',
    'security_studies',
    'high_school_biology',
    'logical_fallacies',
    'high_school_world_history',
    'professional_medicine',
    'high_school_mathematics',
    'college_medicine',
    'high_school_us_history',
    'sociology',
    'econometrics',
    'high_school_psychology',
    'human_aging',
    'us_foreign_policy',
    'conceptual_physics',
Fengzhe Zhou's avatar
Fengzhe Zhou committed
74
75
76
77
78
79
80
81
82
83
84
]


mmlu_datasets = []
for _name in mmlu_all_sets:
    _hint = f'There is a single choice question about {_name.replace("_", " ")}. Answer the question by replying A, B, C or D.'
    mmlu_infer_cfg = dict(
        ice_template=dict(
            type=PromptTemplate,
            template=dict(round=[
                dict(
85
                    role='HUMAN',
Fengzhe Zhou's avatar
Fengzhe Zhou committed
86
                    prompt=
87
                    f'{_hint}\nQ: {{input}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nA: '
Fengzhe Zhou's avatar
Fengzhe Zhou committed
88
                ),
89
                dict(role='BOT', prompt='{target}\n')
Fengzhe Zhou's avatar
Fengzhe Zhou committed
90
91
92
93
94
            ]),
        ),
        prompt_template=dict(
            type=PromptTemplate,
            template=dict(
95
                begin='</E>',
Fengzhe Zhou's avatar
Fengzhe Zhou committed
96
97
                round=[
                    dict(
98
                        role='HUMAN',
Fengzhe Zhou's avatar
Fengzhe Zhou committed
99
100
101
102
103
                        prompt=
                        f"{_hint}\nQ: {{input}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nLet's think step by step. A: "
                    ),
                ],
            ),
104
            ice_token='</E>',
Fengzhe Zhou's avatar
Fengzhe Zhou committed
105
106
107
108
109
110
111
112
113
114
115
        ),
        retriever=dict(type=ZeroRetriever),
        inferencer=dict(type=GenInferencer, max_out_len=256),
    )

    mmlu_eval_cfg = dict(
        evaluator=dict(type=AccEvaluator),
        pred_postprocessor=dict(type=first_option_postprocess, options='ABCD'))

    mmlu_datasets.append(
        dict(
116
            abbr=f'lukaemon_mmlu_{_name}',
Fengzhe Zhou's avatar
Fengzhe Zhou committed
117
            type=MMLUDataset,
118
            path='./data/mmlu/',
Fengzhe Zhou's avatar
Fengzhe Zhou committed
119
120
121
122
123
            name=_name,
            reader_cfg=mmlu_reader_cfg,
            infer_cfg=mmlu_infer_cfg,
            eval_cfg=mmlu_eval_cfg,
        ))