Commit c94cc943 authored by Leymore's avatar Leymore Committed by gaotong
Browse files

Add release contribution

parent e6b5bdcb
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import FixKRetriever
from opencompass.openicl.icl_inferencer import GLMChoiceInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import CEvalDataset
ceval_reader_cfg = dict(
input_columns=['question', 'A', 'B', 'C', 'D'],
output_column='answer',
train_split='dev',
test_split="val")
ceval_prompt_template = dict(
type=PromptTemplate,
template=None,
ice_token='</E>',
)
ceval_infer_cfg = dict(
ice_template=dict(
type=PromptTemplate,
template={
answer:
f'{{question}}\n(A) {{/A}}\n(B) {{/B}}\n(C) {{/C}}\n(D) {{/D}}\n答案: ({answer}) {{{answer}}}\n'
for answer in ['A', 'B', 'C', 'D']
}),
prompt_template=ceval_prompt_template,
retriever=dict(type=FixKRetriever),
inferencer=dict(type=GLMChoiceInferencer, fix_id_list=[0, 1, 2, 3, 4]))
ceval_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
ceval_all_sets = [
"操作系统",
"初中地理",
"初中化学",
"初中历史",
"初中生物",
"初中数学",
"初中物理",
"初中政治",
"大学编程",
"大学化学",
"大学经济学",
"大学物理",
"大学中国史",
"导游资格",
"法律职业资格",
"法学",
"概率统计",
"高等数学",
"高中地理",
"高中化学",
"高中历史",
"高中生物",
"高中数学",
"高中物理",
"高中语文",
"高中政治",
"公务员",
"工商管理",
"环境影响评价工程师",
"基础医学",
"计算机网络",
"计算机组成",
"教师资格",
"教育学",
"离散数学",
"临床医学",
"逻辑学",
"马克思主义基本原理",
"毛泽东思想和中国特色社会主义理论体系概论",
"兽医学",
"税务师",
"思想道德修养与法律基础",
"体育学",
"医师资格",
"艺术学",
"植物保护",
"中国语言文学",
"注册城乡规划师",
"注册电气工程师",
"注册会计师",
"注册计量师",
"注册消防工程师",
]
ceval_datasets = []
for _name in ceval_all_sets:
ceval_datasets.append(
dict(
type=CEvalDataset,
path="./data/ceval/release_ceval",
name=_name,
abbr='ceval-' + _name,
reader_cfg=ceval_reader_cfg,
infer_cfg=ceval_infer_cfg.copy(),
eval_cfg=ceval_eval_cfg.copy()))
ceval_datasets[-1]['infer_cfg'][
'prompt_template'] = ceval_prompt_template.copy()
ceval_datasets[-1]['infer_cfg']['prompt_template']['template'] = dict(
begin=[
dict(
role='SYSTEM',
fallback_role='HUMAN',
prompt=f'以下是中国关于{_name}考试的单项选择题,请选出其中的正确答案。'),
'</E>',
],
round=[
dict(
role='HUMAN',
prompt=
'{question}\n(A) {A}\n(B) {B}\n(C) {C}\n(D) {D}\答案: ('),
],
)
del _name
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import FixKRetriever
from opencompass.openicl.icl_inferencer import GLMChoiceInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset
mmlu_reader_cfg = dict(
input_columns=['input', 'A', 'B', 'C', 'D'],
output_column='target',
train_split='validation')
mmlu_prompt_template = dict(
type=PromptTemplate,
template=None,
column_token_map={
'input': '</input>',
'A': '</A>',
'B': '</B>',
'C': '</C>',
'D': '</D>',
'target': '</target>'
},
ice_token='</E>',
)
mmlu_infer_cfg = dict(
ice_template=dict(
type=PromptTemplate,
template={
target: '</input>\n(A) </A>\n(B) </B>\n(C) </C>\n(D) </D>\n'
f'Answer: ({target}) </{target}>\n'
for target in ['A', 'B', 'C', 'D']
},
column_token_map={
'input': '</input>',
'A': '</A>',
'B': '</B>',
'C': '</C>',
'D': '</D>',
'target': '</target>'
}),
prompt_template=mmlu_prompt_template,
retriever=dict(type=FixKRetriever),
inferencer=dict(type=GLMChoiceInferencer, fix_id_list=[0, 1, 2, 3, 4]))
mmlu_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
mmlu_all_sets = [
"college_biology",
# "college_chemistry",
# "college_computer_science",
# "college_mathematics",
# "college_physics",
# "electrical_engineering",
# "astronomy",
# "anatomy",
# "abstract_algebra",
# "machine_learning",
# "clinical_knowledge",
# "global_facts",
# "management",
# "nutrition",
# "marketing",
# "professional_accounting",
# "high_school_geography",
# "international_law",
# "moral_scenarios",
# "computer_security",
# "high_school_microeconomics",
# "professional_law",
# "medical_genetics",
# "professional_psychology",
# "jurisprudence",
# "world_religions",
# "philosophy",
# "virology",
# "high_school_chemistry",
# "public_relations",
# "high_school_macroeconomics",
# "human_sexuality",
# "elementary_mathematics",
# "high_school_physics",
# "high_school_computer_science",
# "high_school_european_history",
# "business_ethics",
# "moral_disputes",
# "high_school_statistics",
# "miscellaneous",
# "formal_logic",
# "high_school_government_and_politics",
# "prehistory",
# "security_studies",
# "high_school_biology",
# "logical_fallacies",
# "high_school_world_history",
# "professional_medicine",
# "high_school_mathematics",
# "college_medicine",
# "high_school_us_history",
# "sociology",
# "econometrics",
# "high_school_psychology",
# "human_aging",
# "us_foreign_policy",
# "conceptual_physics",
]
mmlu_key_sets = [
'college_biology',
'college_chemistry',
'college_computer_science',
'college_mathematics',
'college_physics',
'electrical_engineering',
'astronomy',
'anatomy',
'abstract_algebra',
'machine_learning',
'clinical_knowledge',
'global_facts',
'management',
'nutrition',
'marketing',
'professional_accounting',
]
mmlu_datasets = []
for name in mmlu_all_sets:
mmlu_datasets.append(
dict(
type=HFDataset,
path='lukaemon/mmlu',
name=name,
reader_cfg=mmlu_reader_cfg,
infer_cfg=mmlu_infer_cfg.copy(),
eval_cfg=mmlu_eval_cfg))
mmlu_datasets[-1]['infer_cfg'][
'prompt_template'] = mmlu_prompt_template.copy()
mmlu_datasets[-1]['infer_cfg']['prompt_template']['template'] = dict(
begin=[
dict(
role='SYSTEM',
fallback_role='HUMAN',
prompt=
f'The following are multiple choice questions (with answers) about {name.replace("_", " ")}.'
),
'</E>',
],
round=[
dict(
role='HUMAN',
prompt=
'</input>\n(A) </A>\n(B) </B>\n(C) </C>\n(D) </D>\nAnswer: ('),
],
)
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import BleuEvaluator
from opencompass.datasets import GovRepcrsDataset
govrepcrs_reader_cfg = dict(
input_columns='content',
output_column='summary',
train_split='test',
test_split='test')
govrepcrs_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=
"Please summarize the following English report in English:{content}\n{summary}."),
retriever=dict(type=ZeroRetriever),
inferencer=dict(
type=GenInferencer, batch_size=4, max_out_len=500, max_seq_len=8192))
govrepcrs_eval_cfg = dict(
evaluator=dict(type=BleuEvaluator),
pred_postprocessor=dict(type='general_cn'),
dataset_postprocessor=dict(type='general_cn'))
govrepcrs_datasets = [
dict(
type=GovRepcrsDataset,
path='./data/govrep/',
abbr='GovRepcrs',
reader_cfg=govrepcrs_reader_cfg,
infer_cfg=govrepcrs_infer_cfg,
eval_cfg=govrepcrs_eval_cfg)
]
from mmengine.config import read_base
with read_base():
from .gsm8k_gen_2dd372 import gsm8k_datasets # noqa: F401, F403
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset
gsm8k_reader_cfg = dict(input_columns=['question'], output_column='answer')
gsm8k_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt="Question: Angelo and Melanie want to plan how many hours over the next week they should study together for their test next week. They have 2 chapters of their textbook to study and 4 worksheets to memorize. They figure out that they should dedicate 3 hours to each chapter of their textbook and 1.5 hours for each worksheet. If they plan to study no more than 4 hours each day, how many days should they plan to study total over the next week if they take a 10-minute break every hour, include 3 10-minute snack breaks each day, and 30 minutes for lunch each day?\nLet's think step by step\nAnswer:"),
dict(role='BOT', prompt="Angelo and Melanie think they should dedicate 3 hours to each of the 2 chapters, 3 hours x 2 chapters = 6 hours total.\nFor the worksheets they plan to dedicate 1.5 hours for each worksheet, 1.5 hours x 4 worksheets = 6 hours total.\nAngelo and Melanie need to start with planning 12 hours to study, at 4 hours a day, 12 / 4 = 3 days.\nHowever, they need to include time for breaks and lunch. Every hour they want to include a 10-minute break, so 12 total hours x 10 minutes = 120 extra minutes for breaks.\nThey also want to include 3 10-minute snack breaks, 3 x 10 minutes = 30 minutes.\nAnd they want to include 30 minutes for lunch each day, so 120 minutes for breaks + 30 minutes for snack breaks + 30 minutes for lunch = 180 minutes, or 180 / 60 minutes per hour = 3 extra hours.\nSo Angelo and Melanie want to plan 12 hours to study + 3 hours of breaks = 15 hours total.\nThey want to study no more than 4 hours each day, 15 hours / 4 hours each day = 3.75\nThey will need to plan to study 4 days to allow for all the time they need.\nThe answer is 4\n"),
dict(role='HUMAN', prompt="Question: Mark's basketball team scores 25 2 pointers, 8 3 pointers and 10 free throws. Their opponents score double the 2 pointers but half the 3 pointers and free throws. What's the total number of points scored by both teams added together?\nLet's think step by step\nAnswer:"),
dict(role='BOT', prompt="Mark's team scores 25 2 pointers, meaning they scored 25*2= 50 points in 2 pointers.\nHis team also scores 6 3 pointers, meaning they scored 8*3= 24 points in 3 pointers\nThey scored 10 free throws, and free throws count as one point so they scored 10*1=10 points in free throws.\nAll together his team scored 50+24+10= 84 points\nMark's opponents scored double his team's number of 2 pointers, meaning they scored 50*2=100 points in 2 pointers.\nHis opponents scored half his team's number of 3 pointers, meaning they scored 24/2= 12 points in 3 pointers.\nThey also scored half Mark's team's points in free throws, meaning they scored 10/2=5 points in free throws.\nAll together Mark's opponents scored 100+12+5=117 points\nThe total score for the game is both team's scores added together, so it is 84+117=201 points\nThe answer is 201\n"),
dict(role='HUMAN', prompt="Question: Bella has two times as many marbles as frisbees. She also has 20 more frisbees than deck cards. If she buys 2/5 times more of each item, what would be the total number of the items she will have if she currently has 60 marbles?\nLet's think step by step\nAnswer:"),
dict(role='BOT', prompt="When Bella buys 2/5 times more marbles, she'll have increased the number of marbles by 2/5*60 = 24\nThe total number of marbles she'll have is 60+24 = 84\nIf Bella currently has 60 marbles, and she has two times as many marbles as frisbees, she has 60/2 = 30 frisbees.\nIf Bella buys 2/5 times more frisbees, she'll have 2/5*30 = 12 more frisbees.\nThe total number of frisbees she'll have will increase to 30+12 = 42\nBella also has 20 more frisbees than deck cards, meaning she has 30-20 = 10 deck cards\nIf she buys 2/5 times more deck cards, she'll have 2/5*10 = 4 more deck cards.\nThe total number of deck cards she'll have is 10+4 = 14\nTogether, Bella will have a total of 14+42+84 = 140 items\nThe answer is 140\n"),
dict(role='HUMAN', prompt="Question: A group of 4 fruit baskets contains 9 apples, 15 oranges, and 14 bananas in the first three baskets and 2 less of each fruit in the fourth basket. How many fruits are there?\nLet's think step by step\nAnswer:"),
dict(role='BOT', prompt="For the first three baskets, the number of apples and oranges in one basket is 9+15=24\nIn total, together with bananas, the number of fruits in one basket is 24+14=38 for the first three baskets.\nSince there are three baskets each having 38 fruits, there are 3*38=114 fruits in the first three baskets.\nThe number of apples in the fourth basket is 9-2=7\nThere are also 15-2=13 oranges in the fourth basket\nThe combined number of oranges and apples in the fourth basket is 13+7=20\nThe fourth basket also contains 14-2=12 bananas.\nIn total, the fourth basket has 20+12=32 fruits.\nThe four baskets together have 32+114=146 fruits.\nThe answer is 146\n"),
dict(role='HUMAN', prompt="Question: {question}\nLet's think step by step\nAnswer:"),
],
)),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512))
gsm8k_eval_cfg = dict(evaluator=dict(type=AccEvaluator),
pred_postprocessor=dict(type='gsm8k'),
dataset_postprocessor=dict(type='gsm8k_dataset'))
gsm8k_datasets = [
dict(
abbr='gsm8k',
type=HFDataset,
path='gsm8k',
name='main',
reader_cfg=gsm8k_reader_cfg,
infer_cfg=gsm8k_infer_cfg,
eval_cfg=gsm8k_eval_cfg)
]
from mmengine.config import read_base
with read_base():
from .humaneval_gen_d428f1 import humaneval_datasets # noqa: F401, F403
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import HFDataset, HumanEvaluator
humaneval_reader_cfg = dict(
input_columns=['prompt'], output_column='task_id', train_split='test')
# TODO: allow empty output-column
humaneval_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template='{prompt}'),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512))
humaneval_eval_cfg = dict(
evaluator=dict(type=HumanEvaluator),
k=[1, 10, 100], # the parameter only for humaneval
pred_postprocessor=dict(type='humaneval'),
)
humaneval_datasets = [
dict(
type=HFDataset,
path='openai_humaneval',
reader_cfg=humaneval_reader_cfg,
infer_cfg=humaneval_infer_cfg,
eval_cfg=humaneval_eval_cfg)
]
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import RougeEvaluator
from opencompass.datasets import LCSTSDataset
lcsts_reader_cfg = dict(input_columns=['content'], output_column='abst')
lcsts_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(round=[
dict(role='HUMAN', prompt='阅读以下文章,并给出简短的摘要:{content}\n摘要如下:'),
])),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer))
lcsts_eval_cfg = dict(
evaluator=dict(type=RougeEvaluator),
pred_role='BOT',
pred_postprocessor=dict(type='lcsts'),
)
lcsts_datasets = [
dict(
type=LCSTSDataset,
abbr='lcsts',
path='./data/LCSTS',
reader_cfg=lcsts_reader_cfg,
infer_cfg=lcsts_infer_cfg,
eval_cfg=lcsts_eval_cfg)
]
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import MATHDataset, MATHEvaluator
math_reader_cfg = dict(input_columns=['problem'], output_column='solution')
math_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(round=[
dict(
role="HUMAN",
prompt=
"Problem:\nFind the domain of the expression $\frac{{\sqrt{{x-2}}}}{{\sqrt{{5-x}}}}$.}}\nSolution:"
),
dict(
role="BOT",
prompt=
"The expressions inside each square root must be non-negative. Therefore, $x-2 \ge 0$, so $x\ge2$, and $5 - x \ge 0$, so $x \le 5$. Also, the denominator cannot be equal to zero, so $5-x>0$, which gives $x<5$. Therefore, the domain of the expression is $\boxed{{[2,5)}}$.\nFinal Answer: The final answer is $[2,5)$. I hope it is correct."
),
dict(
role="HUMAN",
prompt=
"Problem:\nIf $\det \mathbf{{A}} = 2$ and $\det \mathbf{{B}} = 12,$ then find $\det (\mathbf{{A}} \mathbf{{B}}).$\nSolution:"
),
dict(
role="BOT",
prompt=
"We have that $\det (\mathbf{{A}} \mathbf{{B}}) = (\det \mathbf{{A}})(\det \mathbf{{B}}) = (2)(12) = \boxed{{24}}.$\nFinal Answer: The final answer is $24$. I hope it is correct."
),
dict(
role="HUMAN",
prompt=
"Problem:\nTerrell usually lifts two 20-pound weights 12 times. If he uses two 15-pound weights instead, how many times must Terrell lift them in order to lift the same total weight?\nSolution:"
),
dict(
role="BOT",
prompt=
"If Terrell lifts two 20-pound weights 12 times, he lifts a total of $2\cdot 12\cdot20=480$ pounds of weight. If he lifts two 15-pound weights instead for $n$ times, he will lift a total of $2\cdot15\cdot n=30n$ pounds of weight. Equating this to 480 pounds, we can solve for $n$: \begin{{align*}} 30n&=480\\ \Rightarrow\qquad n&=480/30=\boxed{{16}} \end{{align*}}\nFinal Answer: The final answer is $16$. I hope it is correct."
),
dict(
role="HUMAN",
prompt=
"Problem:\nIf the system of equations: \begin{{align*}} 6x-4y&=a,\\ 6y-9x &=b. \end{{align*}}has a solution $(x, y)$ where $x$ and $y$ are both nonzero, find $\frac{{a}}{{b}},$ assuming $b$ is nonzero.\nSolution:"
),
dict(
role="BOT",
prompt=
"If we multiply the first equation by $-\frac{{3}}{{2}}$, we obtain $$6y-9x=-\frac{{3}}{{2}}a.$$Since we also know that $6y-9x=b$, we have $$-\frac{{3}}{{2}}a=b\Rightarrow\frac{{a}}{{b}}=\boxed{{-\frac{{2}}{{3}}}}.$$\nFinal Answer: The final answer is $-\frac{{2}}{{3}}$. I hope it is correct."
),
dict(role="HUMAN", prompt="Problem:\n{problem}\nSolution:\n"),
])),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512))
math_eval_cfg = dict(
evaluator=dict(type=MATHEvaluator), pred_postprocessor=dict(type='math'))
math_datasets = [
dict(
type=MATHDataset,
abbr='math',
path='./data/math/math.json',
reader_cfg=math_reader_cfg,
infer_cfg=math_infer_cfg,
eval_cfg=math_eval_cfg)
]
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import FixKRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import MMLUDataset
# None of the mmlu dataset in huggingface is correctly parsed, so we use our own dataset reader
# Please download the dataset from https://people.eecs.berkeley.edu/~hendrycks/data.tar
mmlu_reader_cfg = dict(
input_columns=["input", "A", "B", "C", "D"],
output_column="target",
train_split='dev')
mmlu_all_sets = [
"college_biology",
"college_chemistry",
"college_computer_science",
"college_mathematics",
"college_physics",
"electrical_engineering",
"astronomy",
"anatomy",
"abstract_algebra",
"machine_learning",
"clinical_knowledge",
"global_facts",
"management",
"nutrition",
"marketing",
"professional_accounting",
"high_school_geography",
"international_law",
"moral_scenarios",
"computer_security",
"high_school_microeconomics",
"professional_law",
"medical_genetics",
"professional_psychology",
"jurisprudence",
"world_religions",
"philosophy",
"virology",
"high_school_chemistry",
"public_relations",
"high_school_macroeconomics",
"human_sexuality",
"elementary_mathematics",
"high_school_physics",
"high_school_computer_science",
"high_school_european_history",
"business_ethics",
"moral_disputes",
"high_school_statistics",
"miscellaneous",
"formal_logic",
"high_school_government_and_politics",
"prehistory",
"security_studies",
"high_school_biology",
"logical_fallacies",
"high_school_world_history",
"professional_medicine",
"high_school_mathematics",
"college_medicine",
"high_school_us_history",
"sociology",
"econometrics",
"high_school_psychology",
"human_aging",
"us_foreign_policy",
"conceptual_physics",
]
mmlu_datasets = []
for _name in mmlu_all_sets:
_hint = f'There is a single choice question about {_name.replace("_", " ")}. Answer the question by replying A, B, C or D.'
mmlu_infer_cfg = dict(
ice_template=dict(
type=PromptTemplate,
template=dict(round=[
dict(
role="HUMAN",
prompt=
f"{_hint}\nQ: {{input}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nA: "
),
dict(role="BOT", prompt="{target}\n")
]),
),
prompt_template=dict(
type=PromptTemplate,
template=dict(
begin="</E>",
round=[
dict(
role="HUMAN",
prompt=
f"{_hint}\nQ: {{input}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nA: "
),
],
),
ice_token="</E>",
),
retriever=dict(type=FixKRetriever),
inferencer=dict(type=GenInferencer, fix_id_list=[0, 1, 2, 3, 4]),
)
mmlu_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_postprocessor=dict(type="first-capital"))
mmlu_datasets.append(
dict(
abbr=f"lukaemon_mmlu_{_name}",
type=MMLUDataset,
path="./data/mmlu/",
name=_name,
reader_cfg=mmlu_reader_cfg,
infer_cfg=mmlu_infer_cfg,
eval_cfg=mmlu_eval_cfg,
))
del _name, _hint
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import FixKRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import MMLUDataset
# None of the mmlu dataset in huggingface is correctly parsed, so we use our own dataset reader
# Please download the dataset from https://people.eecs.berkeley.edu/~hendrycks/data.tar
mmlu_reader_cfg = dict(
input_columns=["input", "A", "B", "C", "D"],
output_column="target",
train_split='dev')
mmlu_all_sets = [
"college_biology",
"college_chemistry",
"college_computer_science",
"college_mathematics",
"college_physics",
"electrical_engineering",
"astronomy",
"anatomy",
"abstract_algebra",
"machine_learning",
"clinical_knowledge",
"global_facts",
"management",
"nutrition",
"marketing",
"professional_accounting",
"high_school_geography",
"international_law",
"moral_scenarios",
"computer_security",
"high_school_microeconomics",
"professional_law",
"medical_genetics",
"professional_psychology",
"jurisprudence",
"world_religions",
"philosophy",
"virology",
"high_school_chemistry",
"public_relations",
"high_school_macroeconomics",
"human_sexuality",
"elementary_mathematics",
"high_school_physics",
"high_school_computer_science",
"high_school_european_history",
"business_ethics",
"moral_disputes",
"high_school_statistics",
"miscellaneous",
"formal_logic",
"high_school_government_and_politics",
"prehistory",
"security_studies",
"high_school_biology",
"logical_fallacies",
"high_school_world_history",
"professional_medicine",
"high_school_mathematics",
"college_medicine",
"high_school_us_history",
"sociology",
"econometrics",
"high_school_psychology",
"human_aging",
"us_foreign_policy",
"conceptual_physics",
]
mmlu_datasets = []
for _name in mmlu_all_sets:
_hint = f'The following are multiple choice questions (with answers) about {_name.replace("_", " ")}.\n\n'
mmlu_infer_cfg = dict(
ice_template=dict(
type=PromptTemplate,
template=
"{input}\nA. {A}\nB. {B}\nC. {C}\nD. {D}\nAnswer: {target}\n",
),
prompt_template=dict(
type=PromptTemplate,
template=
f"{_hint}</E>{{input}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nAnswer:",
ice_token="</E>",
),
retriever=dict(type=FixKRetriever),
inferencer=dict(type=GenInferencer, fix_id_list=[0, 1, 2, 3, 4]),
)
mmlu_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_postprocessor=dict(type="first-capital"),
)
mmlu_datasets.append(
dict(
abbr=f"lukaemon_mmlu_{_name}",
type=MMLUDataset,
path="./data/mmlu/",
name=_name,
reader_cfg=mmlu_reader_cfg,
infer_cfg=mmlu_infer_cfg,
eval_cfg=mmlu_eval_cfg,
))
del _name, _hint
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import FixKRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import MMLUDataset
# None of the mmlu dataset in huggingface is correctly parsed, so we use our own dataset reader
# Please download the dataset from https://people.eecs.berkeley.edu/~hendrycks/data.tar
mmlu_reader_cfg = dict(
input_columns=["input", "A", "B", "C", "D"],
output_column="target",
train_split='dev')
mmlu_all_sets = [
"college_biology",
"college_chemistry",
"college_computer_science",
"college_mathematics",
"college_physics",
"electrical_engineering",
"astronomy",
"anatomy",
"abstract_algebra",
"machine_learning",
"clinical_knowledge",
"global_facts",
"management",
"nutrition",
"marketing",
"professional_accounting",
"high_school_geography",
"international_law",
"moral_scenarios",
"computer_security",
"high_school_microeconomics",
"professional_law",
"medical_genetics",
"professional_psychology",
"jurisprudence",
"world_religions",
"philosophy",
"virology",
"high_school_chemistry",
"public_relations",
"high_school_macroeconomics",
"human_sexuality",
"elementary_mathematics",
"high_school_physics",
"high_school_computer_science",
"high_school_european_history",
"business_ethics",
"moral_disputes",
"high_school_statistics",
"miscellaneous",
"formal_logic",
"high_school_government_and_politics",
"prehistory",
"security_studies",
"high_school_biology",
"logical_fallacies",
"high_school_world_history",
"professional_medicine",
"high_school_mathematics",
"college_medicine",
"high_school_us_history",
"sociology",
"econometrics",
"high_school_psychology",
"human_aging",
"us_foreign_policy",
"conceptual_physics",
]
mmlu_datasets = []
for _name in mmlu_all_sets:
_hint = f'There is a single choice question about {_name.replace("_", " ")}. Answer the question by replying A, B, C or D.'
mmlu_infer_cfg = dict(
ice_template=dict(
type=PromptTemplate,
template=dict(round=[
dict(
role="HUMAN",
prompt=
f"{_hint}\nQuestion: {{input}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nAnswer: "
),
dict(role="BOT", prompt="{target}\n")
]),
),
prompt_template=dict(
type=PromptTemplate,
template=dict(
begin="</E>",
round=[
dict(
role="HUMAN",
prompt=
f"{_hint}\nQ: {{input}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nA: "
),
],
),
ice_token="</E>",
),
retriever=dict(type=FixKRetriever),
inferencer=dict(type=GenInferencer, fix_id_list=[0, 1, 2, 3, 4]),
)
mmlu_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_postprocessor=dict(type="first-capital"))
mmlu_datasets.append(
dict(
abbr=f"lukaemon_mmlu_{_name}",
type=MMLUDataset,
path="./data/mmlu/",
name=_name,
reader_cfg=mmlu_reader_cfg,
infer_cfg=mmlu_infer_cfg,
eval_cfg=mmlu_eval_cfg,
))
del _name, _hint
from mmengine.config import read_base
with read_base():
from .nq_gen_c00b89 import nq_datasets # noqa: F401, F403
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import NaturalQuestionDataset, NQEvaluator
nq_reader_cfg = dict(
input_columns=['question'], output_column='answer', train_split='test')
nq_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='Answer these questions:\nQ: {question}?'),
dict(role='BOT', prompt='A:'),
], )),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer))
nq_eval_cfg = dict(evaluator=dict(type=NQEvaluator), pred_role="BOT")
nq_datasets = [
dict(
type=NaturalQuestionDataset,
abbr='nq',
path='./data/nq/',
reader_cfg=nq_reader_cfg,
infer_cfg=nq_infer_cfg,
eval_cfg=nq_eval_cfg)
]
from mmengine.config import read_base
with read_base():
from .piqa_ppl_788dbe import piqa_datasets # noqa: F401, F403
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import QASPERCUTDataset, TriviaQAEvaluator
qaspercut_reader_cfg = dict(
input_columns=['question', 'evidence'],
output_column='answer',
train_split='dev',
test_split='dev')
qaspercut_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template="{evidence}\nAnswer these questions:\nQ: {question}?\nA:"),
retriever=dict(type=ZeroRetriever),
inferencer=dict(
type=GenInferencer, max_out_len=50, max_seq_len=8192, batch_size=4))
qaspercut_eval_cfg = dict(evaluator=dict(type=TriviaQAEvaluator))
qaspercut_datasets = [
dict(
type=QASPERCUTDataset,
abbr='qaspercut',
path='./data/QASPER/',
reader_cfg=qaspercut_reader_cfg,
infer_cfg=qaspercut_infer_cfg,
eval_cfg=qaspercut_eval_cfg)
]
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import ToxicEvaluator
from opencompass.datasets import SafetyDataset
safety_reader_cfg = dict(
input_columns=['prompt'],
output_column='idx',
train_split='test',
test_split='test')
# TODO: allow empty output-column
safety_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template='{prompt}'),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer))
safety_eval_cfg = dict(evaluator=dict(type=ToxicEvaluator), )
safety_datasets = [
dict(
type=SafetyDataset,
path='./data/safety.txt',
reader_cfg=safety_reader_cfg,
infer_cfg=safety_infer_cfg,
eval_cfg=safety_eval_cfg)
]
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset
siqa_reader_cfg = dict(
input_columns=['context', 'question', 'answerA', 'answerB', 'answerC'],
output_column='label',
test_split='validation')
siqa_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
1: '{context} \nQ: {question}\nA: {answerA}',
2: '{context} \nQ: {question}\nA: {answerB}',
3: '{context} \nQ: {question}\nA: {answerC}',
}),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer))
siqa_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
siqa_datasets = [
dict(
abbr="siqa",
type=HFDataset,
path='social_i_qa',
name='social_i_qa',
reader_cfg=siqa_reader_cfg,
infer_cfg=siqa_infer_cfg,
eval_cfg=siqa_eval_cfg)
]
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset
siqa_reader_cfg = dict(
input_columns=['context', 'question', 'answerA', 'answerB', 'answerC'],
output_column='label',
test_split='validation')
siqa_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
1: 'The following makes sense:\n {context} \nQ: {question}\nA: {answerA}',
2: 'The following makes sense:\n {context} \nQ: {question}\nA: {answerB}',
3: 'The following makes sense:\n {context} \nQ: {question}\nA: {answerC}',
}),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer))
siqa_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
siqa_datasets = [
dict(
abbr="siqa",
type=HFDataset,
path='social_i_qa',
reader_cfg=siqa_reader_cfg,
infer_cfg=siqa_infer_cfg,
eval_cfg=siqa_eval_cfg)
]
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset
summedits_reader_cfg = dict(
input_columns=['doc', 'summary'],
output_column='label',
test_split='train')
summedits_prompt1 = "Given the document below, you have to determine if 'Yes' or 'No', the summary is factually consistent with the document."
summedits_prompt2 = "Document:\n{doc}\nSummary:\n{summary}\nIs the summary factually consistent with the document? "
summedits_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
0:
dict(
begin=[
dict(
role='SYSTEM',
fallback_role='HUMAN',
prompt=summedits_prompt1)
],
round=[
dict(role="HUMAN", prompt=summedits_prompt2),
dict(role="BOT", prompt="No")
]),
1:
dict(
begin=[
dict(
role='SYSTEM',
fallback_role='HUMAN',
prompt=summedits_prompt1)
],
round=[
dict(role="HUMAN", prompt=summedits_prompt2),
dict(role="BOT", prompt="Yes")
]),
}),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer))
summedits_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
summedits_datasets = [
dict(
type=HFDataset,
abbr='summedits',
path='json',
split='train',
data_files='./data/summedits/summedits.jsonl',
reader_cfg=summedits_reader_cfg,
infer_cfg=summedits_infer_cfg,
eval_cfg=summedits_eval_cfg)
]
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment