mmlu_ppl_ac766d.py 3.12 KB
Newer Older
gaotongxiao's avatar
gaotongxiao committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import FixKRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import MMLUDataset

# None of the mmlu dataset in huggingface is correctly parsed, so we use our own dataset reader
# Please download the dataset from https://people.eecs.berkeley.edu/~hendrycks/data.tar

mmlu_reader_cfg = dict(
    input_columns=["input", "A", "B", "C", "D"],
    output_column="target",
    train_split='dev')

mmlu_all_sets = [
    "college_biology",
    "college_chemistry",
    "college_computer_science",
    "college_mathematics",
    "college_physics",
    "electrical_engineering",
    "astronomy",
    "anatomy",
    "abstract_algebra",
    "machine_learning",
    "clinical_knowledge",
    "global_facts",
    "management",
    "nutrition",
    "marketing",
    "professional_accounting",
    "high_school_geography",
    "international_law",
    "moral_scenarios",
    "computer_security",
    "high_school_microeconomics",
    "professional_law",
    "medical_genetics",
    "professional_psychology",
    "jurisprudence",
    "world_religions",
    "philosophy",
    "virology",
    "high_school_chemistry",
    "public_relations",
    "high_school_macroeconomics",
    "human_sexuality",
    "elementary_mathematics",
    "high_school_physics",
    "high_school_computer_science",
    "high_school_european_history",
    "business_ethics",
    "moral_disputes",
    "high_school_statistics",
    "miscellaneous",
    "formal_logic",
    "high_school_government_and_politics",
    "prehistory",
    "security_studies",
    "high_school_biology",
    "logical_fallacies",
    "high_school_world_history",
    "professional_medicine",
    "high_school_mathematics",
    "college_medicine",
    "high_school_us_history",
    "sociology",
    "econometrics",
    "high_school_psychology",
    "human_aging",
    "us_foreign_policy",
    "conceptual_physics",
]

mmlu_datasets = []
for _name in mmlu_all_sets:
    _hint = f'The following are multiple choice questions (with answers) about  {_name.replace("_", " ")}.\n\n'
Fengzhe Zhou's avatar
Fengzhe Zhou committed
78
    question_overall = '{input}\nA. {A}\nB. {B}\nC. {C}\nD. {D}'
gaotongxiao's avatar
gaotongxiao committed
79
80
81
    mmlu_infer_cfg = dict(
        ice_template=dict(
            type=PromptTemplate,
Fengzhe Zhou's avatar
Fengzhe Zhou committed
82
            template={opt: f"{question_overall}\nAnswer: {opt}\n" for opt in ["A", "B", "C", "D"]},
gaotongxiao's avatar
gaotongxiao committed
83
84
85
        ),
        prompt_template=dict(
            type=PromptTemplate,
Fengzhe Zhou's avatar
Fengzhe Zhou committed
86
            template={opt: f"{_hint}</E>{question_overall}\nAnswer: {opt}" for opt in ["A", "B", "C", "D"]},
gaotongxiao's avatar
gaotongxiao committed
87
88
            ice_token="</E>",
        ),
89
90
        retriever=dict(type=FixKRetriever, fix_id_list=[0, 1, 2, 3, 4]),
        inferencer=dict(type=PPLInferencer),
gaotongxiao's avatar
gaotongxiao committed
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
    )

    mmlu_eval_cfg = dict(evaluator=dict(type=AccEvaluator), )

    mmlu_datasets.append(
        dict(
            abbr=f"lukaemon_mmlu_{_name}",
            type=MMLUDataset,
            path="./data/mmlu/",
            name=_name,
            reader_cfg=mmlu_reader_cfg,
            infer_cfg=mmlu_infer_cfg,
            eval_cfg=mmlu_eval_cfg,
        ))

del _name, _hint