Commit 7d346000 authored by gaotongxiao's avatar gaotongxiao
Browse files

initial commit

parents
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import HFDataset, HumanEvaluator
humaneval_reader_cfg = dict(
input_columns=['prompt'], output_column='task_id', train_split='test')
# TODO: allow empty output-column
humaneval_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
begin=[
dict(
role='SYSTEM',
fallback_role='HUMAN',
prompt='Complete the following python code:'),
],
round=[
dict(role='HUMAN', prompt='{prompt}'),
])),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512))
humaneval_eval_cfg = dict(
evaluator=dict(type=HumanEvaluator),
pred_role='BOT',
k=[1, 10, 100], # the parameter only for humaneval
pred_postprocessor=dict(type='humaneval'),
)
humaneval_datasets = [
dict(
type=HFDataset,
path='openai_humaneval',
reader_cfg=humaneval_reader_cfg,
infer_cfg=humaneval_infer_cfg,
eval_cfg=humaneval_eval_cfg)
]
from mmengine.config import read_base
with read_base():
from .iwslt2017_gen_02ea0b import iwslt2017_datasets # noqa: F401, F403
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import BM25Retriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import BleuEvaluator
from opencompass.datasets import IWSLT2017Dataset
iwslt2017_reader_cfg = dict(
input_columns='en', output_column='de', train_split='validation')
iwslt2017_infer_cfg = dict(
ice_template=dict(type='PromptTemplate',
template=dict(
begin=[
dict(role='SYSTEM', fallback_role="HUMAN", prompt='Please translate the following English statements to German:'),
'</E>',
],
round=[
dict(role='HUMAN', prompt='{en}'),
dict(role='BOT', prompt='{de}'),
]
),
ice_token='</E>'),
retriever=dict(type=BM25Retriever, ice_num=1),
inferencer=dict(type=GenInferencer))
iwslt2017_eval_cfg = dict(
evaluator=dict(type=BleuEvaluator),
pred_role='BOT',
pred_postprocessor=dict(type='general_cn'),
dataset_postprocessor=dict(type='general_cn'))
iwslt2017_datasets = [
dict(
type=IWSLT2017Dataset,
path='iwslt2017',
name='iwslt2017-en-de',
reader_cfg=iwslt2017_reader_cfg,
infer_cfg=iwslt2017_infer_cfg,
eval_cfg=iwslt2017_eval_cfg)
]
\ No newline at end of file
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import lambadaDataset, LambadaEvaluator
lambada_reader_cfg = dict(
input_columns=['prompt'],
output_column='label',
train_split='test',
test_split='test')
lambada_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template='Please complete the following sentence: {prompt}'),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=5))
lambada_eval_cfg = dict(evaluator=dict(type=LambadaEvaluator))
lambada_datasets = [
dict(
abbr='lambada',
type=lambadaDataset,
path='craffel/openai_lambada',
reader_cfg=lambada_reader_cfg,
infer_cfg=lambada_infer_cfg,
eval_cfg=lambada_eval_cfg)
]
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import MATHDataset, MATHEvaluator
math_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template='''Problem:
Find the domain of the expression $\frac{{\sqrt{{x-2}}}}{{\sqrt{{5-x}}}}$.}}
Solution:
The expressions inside each square root must be non-negative. Therefore, $x-2 \ge 0$, so $x\ge2$, and $5 - x \ge 0$, so $x \le 5$. Also, the denominator cannot be equal to zero, so $5-x>0$, which gives $x<5$. Therefore, the domain of the expression is $\boxed{{[2,5)}}$.
Final Answer: The final answer is $[2,5)$. I hope it is correct.
Problem:
If $\det \mathbf{{A}} = 2$ and $\det \mathbf{{B}} = 12,$ then find $\det (\mathbf{{A}} \mathbf{{B}}).$
Solution:
We have that $\det (\mathbf{{A}} \mathbf{{B}}) = (\det \mathbf{{A}})(\det \mathbf{{B}}) = (2)(12) = \boxed{{24}}.$
Final Answer: The final answer is $24$. I hope it is correct.
Problem:
Terrell usually lifts two 20-pound weights 12 times. If he uses two 15-pound weights instead, how many times must Terrell lift them in order to lift the same total weight?
Solution:
If Terrell lifts two 20-pound weights 12 times, he lifts a total of $2\cdot 12\cdot20=480$ pounds of weight. If he lifts two 15-pound weights instead for $n$ times, he will lift a total of $2\cdot15\cdot n=30n$ pounds of weight. Equating this to 480 pounds, we can solve for $n$: \begin{{align*}} 30n&=480\\ \Rightarrow\qquad n&=480/30=\boxed{{16}} \end{{align*}}
Final Answer: The final answer is $16$. I hope it is correct.
Problem:
If the system of equations: \begin{{align*}} 6x-4y&=a,\\ 6y-9x &=b. \end{{align*}}has a solution $(x, y)$ where $x$ and $y$ are both nonzero, find $\frac{{a}}{{b}},$ assuming $b$ is nonzero.
Solution:
If we multiply the first equation by $-\frac{{3}}{{2}}$, we obtain $$6y-9x=-\frac{{3}}{{2}}a.$$Since we also know that $6y-9x=b$, we have $$-\frac{{3}}{{2}}a=b\Rightarrow\frac{{a}}{{b}}=\boxed{{-\frac{{2}}{{3}}}}.$$
Final Answer: The final answer is $-\frac{{2}}{{3}}$. I hope it is correct.
Problem:
{problem}Solution:
{solution}'''),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512))
math_eval_cfg = dict(
evaluator=dict(type=MATHEvaluator), pred_postprocessor=dict(type='math'))
math_datasets = [
dict(
type=MATHDataset,
abbr='math',
path='./data/math/math.json',
reader_cfg=dict(
input_columns=['problem'],
output_column='solution',
),
infer_cfg=math_infer_cfg,
eval_cfg=math_eval_cfg)
]
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import MBPPDataset, MBPPEvaluator
mbpp_reader_cfg = dict(
input_columns=['text', 'test_list'], output_column='code')
mbpp_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(
role="HUMAN",
prompt=
"You are an expert Python programmer, and here is your task: Write a function to find the similar elements from the given two tuple lists. Your code should pass these tests:\n\n assert similar_elements((3, 4, 5, 6),(5, 7, 4, 10)) == (4, 5)\n assert similar_elements((1, 2, 3, 4),(5, 4, 3, 7)) == (3, 4) \n assert similar_elements((11, 12, 14, 13),(17, 15, 14, 13)) == (13, 14) \n"
),
dict(
role="BOT",
prompt=
"[BEGIN]\n 'def similar_elements(test_tup1, test_tup2):\r\n res = tuple(set(test_tup1) & set(test_tup2))\r\n return (res)' \n[DONE] \n\n "
),
dict(
role="HUMAN",
prompt=
"You are an expert Python programmer, and here is your task: Write a python function to identify non-prime numbers. Your code should pass these tests:\n\n assert is_not_prime(2) == False \n assert is_not_prime(10) == True \n assert is_not_prime(35) == True \n"
),
dict(
role="BOT",
prompt=
"[BEGIN]\n 'import math\r\ndef is_not_prime(n):\r\n result = False\r\n for i in range(2,int(math.sqrt(n)) + 1):\r\n if n % i == 0:\r\n result = True\r\n return result' \n[DONE] \n\n "
),
dict(
role="HUMAN",
prompt=
"You are an expert Python programmer, and here is your task: Write a function to find the largest integers from a given list of numbers using heap queue algorithm. Your code should pass these tests:\n\n assert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],3)==[85, 75, 65] \n assert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],2)==[85, 75] \n assert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],5)==[85, 75, 65, 58, 35] \n"
),
dict(
role="BOT",
prompt=
"[BEGIN]\n 'import heapq as hq\r\ndef heap_queue_largest(nums,n):\r\n largest_nums = hq.nlargest(n, nums)\r\n return largest_nums' \n[DONE] \n\n "
),
dict(
role="HUMAN",
prompt=
"You are an expert Python programmer, and here is your task: {text} Your code should pass these tests:\n\n {test_list} \n"
),
dict(role="BOT", prompt="[BEGIN]\n"),
], )),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512))
mbpp_eval_cfg = dict(evaluator=dict(type=MBPPEvaluator), pred_role="BOT")
mbpp_datasets = [
dict(
type=MBPPDataset,
abbr='mbpp',
path='./data/mbpp/mbpp.jsonl',
reader_cfg=mbpp_reader_cfg,
infer_cfg=mbpp_infer_cfg,
eval_cfg=mbpp_eval_cfg)
]
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import FixKRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import MMLUDataset
# None of the mmlu dataset in huggingface is correctly parsed, so we use our own dataset reader
# Please download the dataset from https://people.eecs.berkeley.edu/~hendrycks/data.tar
mmlu_reader_cfg = dict(
input_columns=["input", "A", "B", "C", "D"],
output_column="target",
train_split='dev')
mmlu_prompt_template = dict(
type='PromptTemplate',
template=None,
ice_token='</E>')
mmlu_infer_cfg = dict(
ice_template=dict(
type=PromptTemplate,
template=dict(round=[
dict(
role='HUMAN',
prompt='{input}\nA. {A}\nB. {B}\nC. {C}\nD. {D}\nAnswer: '
),
dict(role='BOT', prompt='{target}\n')
])),
prompt_template=mmlu_prompt_template,
retriever=dict(type=FixKRetriever),
inferencer=dict(type=GenInferencer, fix_id_list=[0, 1, 2, 3, 4]))
mmlu_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_postprocessor=dict(type='first-capital'))
mmlu_all_sets = [
"college_biology",
"college_chemistry",
"college_computer_science",
"college_mathematics",
"college_physics",
"electrical_engineering",
"astronomy",
"anatomy",
"abstract_algebra",
"machine_learning",
"clinical_knowledge",
"global_facts",
"management",
"nutrition",
"marketing",
"professional_accounting",
"high_school_geography",
"international_law",
"moral_scenarios",
"computer_security",
"high_school_microeconomics",
"professional_law",
"medical_genetics",
"professional_psychology",
"jurisprudence",
"world_religions",
"philosophy",
"virology",
"high_school_chemistry",
"public_relations",
"high_school_macroeconomics",
"human_sexuality",
"elementary_mathematics",
"high_school_physics",
"high_school_computer_science",
"high_school_european_history",
"business_ethics",
"moral_disputes",
"high_school_statistics",
"miscellaneous",
"formal_logic",
"high_school_government_and_politics",
"prehistory",
"security_studies",
"high_school_biology",
"logical_fallacies",
"high_school_world_history",
"professional_medicine",
"high_school_mathematics",
"college_medicine",
"high_school_us_history",
"sociology",
"econometrics",
"high_school_psychology",
"human_aging",
"us_foreign_policy",
"conceptual_physics",
]
mmlu_datasets = []
for _name in mmlu_all_sets:
mmlu_datasets.append(
dict(
abbr=f"lukaemon_mmlu_{_name}",
type=MMLUDataset,
path="./data/mmlu/",
name=_name,
reader_cfg=mmlu_reader_cfg,
infer_cfg=mmlu_infer_cfg.copy(),
eval_cfg=mmlu_eval_cfg))
mmlu_datasets[-1]['infer_cfg'][
'prompt_template'] = mmlu_prompt_template.copy()
mmlu_datasets[-1]['infer_cfg']['prompt_template']['template'] = \
dict(
begin=[
dict(role='SYSTEM', fallback_role='HUMAN', prompt=f'The following are multiple choice questions (with answers) about {_name.replace("_", " ")}.'),
'</E>',
],
round=[
dict(role='HUMAN', prompt='{input}\nA. {A}\nB. {B}\nC. {C}\nD. {D}\nAnswer: '),
]
)
del _name
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import FixKRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import MMLUDataset
# None of the mmlu dataset in huggingface is correctly parsed, so we use our own dataset reader
# Please download the dataset from https://people.eecs.berkeley.edu/~hendrycks/data.tar
mmlu_reader_cfg = dict(
input_columns=["input", "A", "B", "C", "D"],
output_column="target",
train_split='dev')
mmlu_all_sets = [
"college_biology",
"college_chemistry",
"college_computer_science",
"college_mathematics",
"college_physics",
"electrical_engineering",
"astronomy",
"anatomy",
"abstract_algebra",
"machine_learning",
"clinical_knowledge",
"global_facts",
"management",
"nutrition",
"marketing",
"professional_accounting",
"high_school_geography",
"international_law",
"moral_scenarios",
"computer_security",
"high_school_microeconomics",
"professional_law",
"medical_genetics",
"professional_psychology",
"jurisprudence",
"world_religions",
"philosophy",
"virology",
"high_school_chemistry",
"public_relations",
"high_school_macroeconomics",
"human_sexuality",
"elementary_mathematics",
"high_school_physics",
"high_school_computer_science",
"high_school_european_history",
"business_ethics",
"moral_disputes",
"high_school_statistics",
"miscellaneous",
"formal_logic",
"high_school_government_and_politics",
"prehistory",
"security_studies",
"high_school_biology",
"logical_fallacies",
"high_school_world_history",
"professional_medicine",
"high_school_mathematics",
"college_medicine",
"high_school_us_history",
"sociology",
"econometrics",
"high_school_psychology",
"human_aging",
"us_foreign_policy",
"conceptual_physics",
]
mmlu_datasets = []
for _name in mmlu_all_sets:
_hint = f'The following are multiple choice questions (with answers) about {_name.replace("_", " ")}.\n\n'
mmlu_infer_cfg = dict(
ice_template=dict(
type=PromptTemplate,
template={
opt:
f"{{input}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nAnswer: {opt}\n"
for opt in ["A", "B", "C", "D"]
},
),
prompt_template=dict(
type=PromptTemplate,
template={
opt:
f"{_hint}</E>{{input}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nAnswer: {opt}"
for opt in ["A", "B", "C", "D"]
},
ice_token="</E>",
),
retriever=dict(type=FixKRetriever),
inferencer=dict(type=PPLInferencer, fix_id_list=[0, 1, 2, 3, 4]),
)
mmlu_eval_cfg = dict(evaluator=dict(type=AccEvaluator), )
mmlu_datasets.append(
dict(
abbr=f"lukaemon_mmlu_{_name}",
type=MMLUDataset,
path="./data/mmlu/",
name=_name,
reader_cfg=mmlu_reader_cfg,
infer_cfg=mmlu_infer_cfg,
eval_cfg=mmlu_eval_cfg,
))
del _name, _hint
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import NaturalQuestionDataset, NQEvaluator
nq_reader_cfg = dict(
input_columns=['question'], output_column='answer', train_split='test')
nq_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template="Answer these questions:\nQ: {question}?\nA:{answer}",
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer))
nq_eval_cfg = dict(evaluator=dict(type=NQEvaluator), pred_role="BOT")
nq_datasets = [
dict(
type=NaturalQuestionDataset,
abbr='nq',
path='./data/nq/',
reader_cfg=nq_reader_cfg,
infer_cfg=nq_infer_cfg,
eval_cfg=nq_eval_cfg)
]
from mmengine.config import read_base
with read_base():
from .obqa_gen_b2cde9 import obqa_datasets # noqa: F401, F403
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import OBQADataset
_input_columns = [
["question_stem", "A", "B", "C", "D"],
["question_stem", "A", "B", "C", "D", "fact1"],
]
_template = [
dict(
round=[
dict(
role="HUMAN",
prompt="Question: {question_stem}\nA. {A}\nB. {B}\nC. {C}\nD. {D}\nAnswer:"
),
], ),
dict(
round=[
dict(
role="HUMAN",
prompt="Given the fact: {fact1}\nQuestion: {question_stem}\nA. {A}\nB. {B}\nC. {C}\nD. {D}\nAnswer:",
),
], ),
]
obqa_datasets = [
dict(
abbr="openbookqa",
type=OBQADataset,
path="openbookqa",
split="test",
),
dict(
abbr="openbookqa_fact",
type=OBQADataset,
path="openbookqa",
name="additional",
split="test",
),
]
for _i in range(2):
obqa_reader_cfg = dict(
input_columns=_input_columns[_i], output_column="answerKey")
obqa_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=_template[_i]),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer),
)
obqa_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_role="BOT",
pred_postprocessor=dict(type="first-capital"),
)
obqa_datasets[_i]["reader_cfg"] = obqa_reader_cfg
obqa_datasets[_i]["infer_cfg"] = obqa_infer_cfg
obqa_datasets[_i]["eval_cfg"] = obqa_eval_cfg
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import QASPERDataset, TriviaQAEvaluator
qasper_reader_cfg = dict(
input_columns=['question', 'evidence'],
output_column='answer',
train_split='dev',
test_split='dev')
qasper_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(
role='HUMAN',
prompt='{evidence}\nAnswer these questions:\nQ: {question}?A:'),
dict(role='BOT', prompt=''),
], )),
retriever=dict(type=ZeroRetriever),
inferencer=dict(
type=GenInferencer, max_out_len=50, max_seq_len=8192, batch_size=4))
qasper_eval_cfg = dict(evaluator=dict(type=TriviaQAEvaluator), pred_role='BOT')
qasper_datasets = [
dict(
type=QASPERDataset,
abbr='QASPER',
path='./data/QASPER/',
reader_cfg=qasper_reader_cfg,
infer_cfg=qasper_infer_cfg,
eval_cfg=qasper_eval_cfg)
]
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import QASPERDataset, TriviaQAEvaluator
qasper_reader_cfg = dict(
input_columns=['question', 'evidence'],
output_column='answer',
train_split='dev',
test_split='dev')
qasper_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template="{evidence}\nAnswer these questions:\nQ: {question}?\nA:"),
retriever=dict(type=ZeroRetriever),
inferencer=dict(
type=GenInferencer, max_out_len=50, max_seq_len=8192, batch_size=4))
qasper_eval_cfg = dict(evaluator=dict(type=TriviaQAEvaluator))
qasper_datasets = [
dict(
type=QASPERDataset,
abbr='QASPER',
path='./data/QASPER/',
reader_cfg=qasper_reader_cfg,
infer_cfg=qasper_infer_cfg,
eval_cfg=qasper_eval_cfg)
]
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import RaceDataset
race_reader_cfg = dict(
input_columns=['article', 'question', 'A', 'B', 'C', 'D'],
output_column='answer')
race_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
ans: dict(round=[
dict(
role="HUMAN",
prompt=
"Read the article, and answer the question by replying A, B, C or D.\n\nArticle:\n{article}\n\nQ: {question}\n\nA. {A}\nB. {B}\nC. {C}\nD. {D}"
),
dict(role="BOT", prompt=ans_token),
])
for ans, ans_token in [["A", "{A}"], ["B", "{B}"], ["C", "{C}"],
["D", "{D}"]]
}),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer))
race_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
race_datasets = [
dict(
type=RaceDataset,
abbr='race-middle',
path='race',
name='middle',
reader_cfg=race_reader_cfg,
infer_cfg=race_infer_cfg,
eval_cfg=race_eval_cfg),
dict(
type=RaceDataset,
abbr='race-high',
path='race',
name='high',
reader_cfg=race_reader_cfg,
infer_cfg=race_infer_cfg,
eval_cfg=race_eval_cfg)
]
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import RaceDataset
race_reader_cfg = dict(
input_columns=['article', 'question', 'A', 'B', 'C', 'D'],
output_column='answer')
race_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
'A':
'Read the article, and answer the question by replying A, B, C or D.\n\n{article}\n\nQ: {question}\n\nA. {A}\nB. {B}\nC. {C}\nD. {D}\n\nAnswer: A',
'B':
'Read the article, and answer the question by replying A, B, C or D.\n\n{article}\n\nQ: {question}\n\nA. {A}\nB. {B}\nC. {C}\nD. {D}\n\nAnswer: B',
'C':
'Read the article, and answer the question by replying A, B, C or D.\n\n{article}\n\nQ: {question}\n\nA. {A}\nB. {B}\nC. {C}\nD. {D}\n\nAnswer: C',
'D':
'Read the article, and answer the question by replying A, B, C or D.\n\n{article}\n\nQ: {question}\n\nA. {A}\nB. {B}\nC. {C}\nD. {D}\n\nAnswer: D',
}),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer))
race_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
race_datasets = [
dict(
type=RaceDataset,
abbr='race-middle',
path='race',
name='middle',
reader_cfg=race_reader_cfg,
infer_cfg=race_infer_cfg,
eval_cfg=race_eval_cfg),
dict(
type=RaceDataset,
abbr='race-high',
path='race',
name='high',
reader_cfg=race_reader_cfg,
infer_cfg=race_infer_cfg,
eval_cfg=race_eval_cfg)
]
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import ToxicEvaluator
from opencompass.datasets import RealToxicPromptsDataset
realtoxicprompts_reader_cfg = dict(
input_columns=['prompt_text'],
output_column='filename',
train_split='train',
test_split='train')
# TODO: allow empty output-column
realtoxicprompts_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template='{prompt_text}'),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer))
# When key is set to "ENV", the key will be fetched from the environment
# variable $PerspectiveAPIkey. Otherwise, set key in here directly.
realtoxicprompts_eval_cfg = dict(
evaluator=dict(type=ToxicEvaluator, key='ENV'), )
realtoxicprompts_datasets = [
dict(
type=RealToxicPromptsDataset,
path='allenai/real-toxicity-prompts',
challenging_subset=True,
reader_cfg=realtoxicprompts_reader_cfg,
infer_cfg=realtoxicprompts_infer_cfg,
eval_cfg=realtoxicprompts_eval_cfg)
]
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset
siqa_reader_cfg = dict(
input_columns=['context', 'question', 'answerA', 'answerB', 'answerC'],
output_column='label',
test_split='validation')
siqa_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
1:
dict(round=[
dict(role='HUMAN', prompt="{context}\nQuestion: {question}\nAnswer:"),
dict(role='BOT', prompt="{answerA}")
]),
2:
dict(round=[
dict(role='HUMAN', prompt="{context}\nQuestion: {question}\nAnswer:"),
dict(role='BOT', prompt="{answerB}")
]),
3:
dict(round=[
dict(role='HUMAN', prompt="{context}\nQuestion: {question}\nAnswer:"),
dict(role='BOT', prompt="{answerC}")
]),
}),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer))
siqa_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
siqa_datasets = [
dict(
abbr="siqa",
type=HFDataset,
path='social_i_qa',
reader_cfg=siqa_reader_cfg,
infer_cfg=siqa_infer_cfg,
eval_cfg=siqa_eval_cfg)
]
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import storyclozeDataset_V2
storycloze_reader_cfg = dict(
input_columns=["context", "sentence_quiz1", "sentence_quiz2"],
output_column="answer_right_ending",
)
storycloze_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(round=[
dict(
role="HUMAN",
prompt=
"{context}\nQuestion: Which ending makes the most sense?\nA. {sentence_quiz1}\nB. {sentence_quiz2}\nYou may choose between 'A' and 'B'.\nAnswer:",
),
]),
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer),
)
storycloze_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_role="BOT",
pred_postprocessor=dict(type="first-capital"),
)
# The original story cloze dataset and repo are not long maintaining.
# Using multilingual version of this dataset.
storycloze_datasets = [
dict(
abbr="story_cloze",
type=storyclozeDataset_V2,
path="juletxara/xstory_cloze",
name="en",
reader_cfg=storycloze_reader_cfg,
infer_cfg=storycloze_infer_cfg,
eval_cfg=storycloze_eval_cfg,
)
]
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset
strategyqa_reader_cfg = dict(
input_columns=['question'],
output_column='answer',
train_split='test',
test_split='test')
strategyqa_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template='''Yes or no: Q: Do hamsters provide food for any animals?
A: Hamsters are prey animals. Prey are food for predators. Thus, hamsters provide food for some animals.
So the answer is yes.
Q: Yes or no: Could Brooke Shields succeed at University of Pennsylvania?
A: Brooke Shields went to Princeton University. Princeton University is about as academically rigorous as the University of Pennsylvania. Thus, Brooke Shields could also succeed at the University of Pennsylvania.
So the answer is yes.
Q: Yes or no: Hydrogen's atomic number squared exceeds number of Spice Girls?
A: Hydrogen has an atomic number of 1. 1 squared is 1. There are 5 Spice Girls. Thus, Hydrogen's atomic number squared is less than 5.
So the answer is no.
Q: Yes or no: Is it common to see frost during some college commencements?
A: College commencement ceremonies can happen in December, May, and June. December is in the winter, so there can be frost. Thus, there could be frost at some commencements.
So the answer is yes.
Q: Yes or no: Could a llama birth twice during War in Vietnam (1945-46)?
A: The War in Vietnam was 6 months. The gestation period for a llama is 11 months, which is more than 6 months. Thus, a llama could not give birth twice during the War in Vietnam.
So the answer is no.
Q: Yes or no: Would a pear sink in water?
A: The density of a pear is about 0.6g/cm3, which is less than water. Objects less dense than water float. Thus, a pear would float.
So the answer is no.
Q: {question}{answer}
'''),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512))
strategyqa_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_postprocessor=dict(type='strategyqa'),
dataset_postprocessor=dict(type='strategyqa_dataset'))
strategyqa_datasets = [
dict(
abbr='strategyqa',
type=HFDataset,
path='wics/strategy-qa',
reader_cfg=strategyqa_reader_cfg,
infer_cfg=strategyqa_infer_cfg,
eval_cfg=strategyqa_eval_cfg)
]
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset
strategyqa_reader_cfg = dict(
input_columns=['question'],
output_column='answer',
train_split='test',
test_split='test')
strategyqa_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(
role='HUMAN',
prompt=
'Question: Do hamsters provide food for any animals?\nAnswer:'
),
dict(
role='BOT',
prompt=
'Hamsters are prey animals. Prey are food for predators. Thus, hamsters provide food for some animals.\nSo the answer is yes'
),
dict(
role='HUMAN',
prompt=
'Question: Could Brooke Shields succeed at University of Pennsylvania?\nAnswer:'
),
dict(
role='BOT',
prompt=
'Brooke Shields went to Princeton University. Princeton University is about as academically rigorous as the University of Pennsylvania. Thus, Brooke Shields could also succeed at the University of Pennsylvania.\nSo the answer is yes'
),
dict(
role='HUMAN',
prompt=
'Question: Hydrogen\'s atomic number squared exceeds number of Spice Girls?\nAnswer:'
),
dict(
role='BOT',
prompt=
'Hydrogen has an atomic number of 1. 1 squared is 1. There are 5 Spice Girls. Thus, Hydrogen\'s atomic number squared is less than 5.\nSo the answer is no'
),
dict(
role='HUMAN',
prompt=
'Question: Is it common to see frost during some college commencements?\nAnswer:'
),
dict(
role='BOT',
prompt=
'College commencement ceremonies can happen in December, May, and June. December is in the winter, so there can be frost. Thus, there could be frost at some commencements.\nSo the answer is yes'
),
dict(
role='HUMAN',
prompt=
'Question: Yes or no: Could a llama birth twice during War in Vietnam (1945-46)?\nAnswer:'
),
dict(
role='BOT',
prompt=
'The War in Vietnam was 6 months. The gestation period for a llama is 11 months, which is more than 6 months. Thus, a llama could not give birth twice during the War in Vietnam.\nSo the answer is no'
),
dict(
role='HUMAN',
prompt='Question: Would a pear sink in water?\nAnswer:'),
dict(
role='BOT',
prompt=
'The density of a pear is about 0.6g/cm3, which is less than water. Objects less dense than water float. Thus, a pear would float.\nSo the answer is no'
),
dict(role='HUMAN', prompt='Question: {question}\nAnswer:'),
], )),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512))
strategyqa_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_postprocessor=dict(type='strategyqa'),
dataset_postprocessor=dict(type='strategyqa_dataset'))
strategyqa_datasets = [
dict(
abbr='strategyqa',
type=HFDataset,
path='wics/strategy-qa',
reader_cfg=strategyqa_reader_cfg,
infer_cfg=strategyqa_infer_cfg,
eval_cfg=strategyqa_eval_cfg)
]
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment