Commit cbe9fe2c authored by Ezra-Yu's avatar Ezra-Yu Committed by gaotong
Browse files

Add Release Contraibution

parent 36f11110
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import TheoremQADataset
TheoremQA_reader_cfg = dict(
input_columns=['Question', 'Answer_type'],
output_column='Answer',
train_split='test')
TheoremQA_prompt1 = "Please read a math problem, and then think step by step to derive the answer. The answer is decided by Answer Type. " \
"If the Answer type in [bool], the answer needs to be True or False. " \
"Else if the Answer type in [integer, float] , The answer needs to be in numerical form. " \
"Else if the Answer type in [list of integer, list of float] , the answer needs to be a list of number like [2, 3, 4]. " \
"Else if the Answer type in [option], the answer needs to be an option like (a), (b), (c), (d)." \
"You need to output the answer in your final sentence like 'Therefore, the answer is ...'."
TheoremQA_prompt2 = f"Below is an instruction that describes a task, paired with an input that provides further context. " \
f"Write a response that appropriately completes the request.\n\n### Instruction:\n{TheoremQA_prompt1}\n\n### Input:\n{{Question}}\nAnswer_type:{{Answer_type}}\n### Response:\n"
TheoremQA_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=TheoremQA_prompt2),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer))
TheoremQA_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_postprocessor=dict(type='TheoremQA'))
TheoremQA_datasets = [
dict(
abbr='TheoremQA',
type=TheoremQADataset,
path="./data/TheoremQA/test.csv",
reader_cfg=TheoremQA_reader_cfg,
infer_cfg=TheoremQA_infer_cfg,
eval_cfg=TheoremQA_eval_cfg)
]
from mmengine.config import read_base
with read_base():
from .ceval_gen_ee2cb0 import ceval_datasets # noqa: F401, F403
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import CLPInferencer
from opencompass.openicl.icl_evaluator import AUCROCEvaluator
from opencompass.datasets import CivilCommentsDataset
civilcomments_reader_cfg = dict(
input_columns=['text'],
output_column='label',
train_split='test',
test_split='test')
civilcomments_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template='Text: {text}\nQuestion: '
'Does the above text contain rude, hateful, aggressive, disrespectful '
'or unreasonable language?\nAnswer:'),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=CLPInferencer))
civilcomments_eval_cfg = dict(evaluator=dict(type=AUCROCEvaluator), )
civilcomments_datasets = [
dict(
type=CivilCommentsDataset,
path='civil_comments',
reader_cfg=civilcomments_reader_cfg,
infer_cfg=civilcomments_infer_cfg,
eval_cfg=civilcomments_eval_cfg)
]
from mmengine.config import read_base
with read_base():
from ..mmlu.mmlu_gen_a568f1 import mmlu_datasets
from ..ceval.ceval_gen_ee2cb0 import ceval_datasets
from ..agieval.agieval_gen_dc7dae import agieval_datasets
from ..GaokaoBench.GaokaoBench_gen_aed980 import GaokaoBench_datasets
from ..bbh.bbh_gen_58abc3 import bbh_datasets
from ..humaneval.humaneval_gen_d428f1 import humaneval_datasets
from ..mbpp.mbpp_gen_4104e4 import mbpp_datasets
from ..CLUE_C3.CLUE_C3_gen_9e3de9 import C3_datasets
from ..CLUE_CMRC.CLUE_CMRC_gen_72a8d5 import CMRC_datasets
from ..CLUE_DRCD.CLUE_DRCD_gen_03b96b import DRCD_datasets
from ..CLUE_afqmc.CLUE_afqmc_gen_db509b import afqmc_datasets
from ..CLUE_cmnli.CLUE_cmnli_gen_316313 import cmnli_datasets
from ..CLUE_ocnli.CLUE_ocnli_gen_7c44b0 import ocnli_datasets
from ..FewCLUE_bustm.FewCLUE_bustm_gen_305431 import bustm_datasets
from ..FewCLUE_chid.FewCLUE_chid_gen_686c63 import chid_datasets
from ..FewCLUE_cluewsc.FewCLUE_cluewsc_gen_276956 import cluewsc_datasets
from ..FewCLUE_csl.FewCLUE_csl_gen_1b0c02 import csl_datasets
from ..FewCLUE_eprstmt.FewCLUE_eprstmt_gen_d6d06d import eprstmt_datasets
from ..FewCLUE_ocnli_fc.FewCLUE_ocnli_fc_gen_bef37f import ocnli_fc_datasets
from ..FewCLUE_tnews.FewCLUE_tnews_gen_8d59ba import tnews_datasets
from ..lcsts.lcsts_gen_427fde import lcsts_datasets
from ..lambada.lambada_gen_7ffe3d import lambada_datasets
from ..storycloze.storycloze_gen_c5a230 import storycloze_datasets
from ..SuperGLUE_AX_b.SuperGLUE_AX_b_gen_477186 import AX_b_datasets
from ..SuperGLUE_AX_g.SuperGLUE_AX_g_gen_7a5dee import AX_g_datasets
from ..SuperGLUE_BoolQ.SuperGLUE_BoolQ_gen_8525d1 import BoolQ_datasets
from ..SuperGLUE_CB.SuperGLUE_CB_gen_bb97e1 import CB_datasets
from ..SuperGLUE_COPA.SuperGLUE_COPA_gen_6d5e67 import COPA_datasets
from ..SuperGLUE_MultiRC.SuperGLUE_MultiRC_gen_26c9dc import MultiRC_datasets
from ..SuperGLUE_RTE.SuperGLUE_RTE_gen_ce346a import RTE_datasets
from ..SuperGLUE_ReCoRD.SuperGLUE_ReCoRD_gen_d8f19c import ReCoRD_datasets
from ..SuperGLUE_WiC.SuperGLUE_WiC_gen_c39367 import WiC_datasets
from ..SuperGLUE_WSC.SuperGLUE_WSC_gen_d8d441 import WSC_datasets
from ..race.race_gen_12de48 import race_datasets
from ..Xsum.Xsum_gen_d2126e import Xsum_datasets
from ..gsm8k.gsm8k_gen_2dd372 import gsm8k_datasets
from ..summedits.summedits_gen_4f35b5 import summedits_datasets
from ..math.math_gen_78bcba import math_datasets
from ..TheoremQA.TheoremQA_gen_891fcf import TheoremQA_datasets
from ..hellaswag.hellaswag_gen_cae9cb import hellaswag_datasets
from ..ARC_e.ARC_e_gen_0a29bf import ARC_e_datasets
from ..ARC_c.ARC_c_gen_3f3039 import ARC_c_datasets
from ..commonsenseqa.commonsenseqa_gen_a58dbd import commonsenseqa_datasets
from ..piqa.piqa_gen_8287ae import piqa_datasets
from ..siqa.siqa_gen_a3c714 import siqa_datasets
from ..strategyqa.strategyqa_gen_be3f8d import strategyqa_datasets
from ..winogrande.winogrande_gen_c19d87 import winogrande_datasets
from ..obqa.obqa_gen_b2cde9 import obqa_datasets
from ..nq.nq_gen_a6ffca import nq_datasets
from ..triviaqa.triviaqa_gen_cc3cbf import triviaqa_datasets
from ..flores.flores_gen_8eb9ca import flores_datasets
from ..crowspairs.crowspairs_gen_dd110a import crowspairs_datasets
datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import MDLRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import commonsenseqaDataset
commonsenseqa_reader_cfg = dict(
input_columns=["question", "A", "B", "C", "D", "E"],
output_column="answerKey",
test_split="validation")
_ice_template = dict(
type=PromptTemplate,
template=dict(
begin="</E>",
round=[
dict(
role="HUMAN",
prompt=
"{question}\nA. {A}\nB. {B}\nC. {C}\nD. {D}\nE. {E}\nAnswer:",
),
dict(
role="BOT",
prompt="{answerKey}",
),
],
),
ice_token="</E>",
)
commonsenseqa_infer_cfg = dict(
ice_template=_ice_template,
retriever=dict(
type=MDLRetriever,
ice_num=8,
candidate_num=30,
select_time=10,
seed=1,
batch_size=12,
ice_template=_ice_template,
),
inferencer=dict(type=GenInferencer),
)
commonsenseqa_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_postprocessor=dict(type="first-capital"),
)
commonsenseqa_datasets = [
dict(
type=commonsenseqaDataset,
path="commonsense_qa",
reader_cfg=commonsenseqa_reader_cfg,
infer_cfg=commonsenseqa_infer_cfg,
eval_cfg=commonsenseqa_eval_cfg,
)
]
del _ice_template
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import C3Dataset
C3_reader_cfg = dict(
input_columns=[
'question', 'content', 'choice0', 'choice1', 'choice2', 'choice3',
'choices'
],
output_column='label')
C3_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
0:
"阅读以下内容,选择合适的选项回答: {content} 问题:{question}\n 选项: -{choice0} -{choice1} -{choice2} -{choice3} 答: [MASK]-{choice0}",
1:
"阅读以下内容,选择合适的选项回答: {content} 问题:{question}\n 选项: -{choice0} -{choice1} -{choice2} -{choice3} 答: [MASK]-{choice1}",
2:
"阅读以下内容,选择合适的选项回答: {content} 问题:{question}\n 选项: -{choice0} -{choice1} -{choice2} -{choice3} 答: [MASK]-{choice2}",
3:
"阅读以下内容,选择合适的选项回答: {content} 问题:{question}\n 选项: -{choice0} -{choice1} -{choice2} -{choice3} 答: [MASK]-{choice3}",
}),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer))
C3_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
C3_datasets = [
dict(
type=C3Dataset,
abbr='C3',
path='./data/CLUE/C3/dev_0.json',
reader_cfg=C3_reader_cfg,
infer_cfg=C3_infer_cfg,
eval_cfg=C3_eval_cfg)
]
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GLMChoiceInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import TNewsDataset
tnews_reader_cfg = dict(input_columns='sentence', output_column='label_desc2')
tnews_labels = [
'农业新闻', '旅游新闻', '游戏新闻', '科技类别公司新闻', '体育类别新闻', '初升高教育新闻', '娱乐圈新闻', '投资资讯',
'军事类别常识', '车辆新闻', '楼市新闻', '环球不含中国类别新闻', '书籍文化历史类别新闻', '故事类别新闻', '股票市场类别新闻'
]
tnews_infer_cfg = dict(
ice_template=dict(
type=PromptTemplate,
template={lb: f'</E></S>这篇新闻属于:{lb}'
for lb in tnews_labels},
column_token_map={'sentence': '</S>'},
ice_token='</E>'),
prompt_template=dict(
type=PromptTemplate,
template='</E></S>\n以上这篇新闻属于',
column_token_map={'sentence': '</S>'},
ice_token='</E>'),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GLMChoiceInferencer, choices=tnews_labels))
tnews_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
tnews_datasets = [
dict(
type=TNewsDataset,
path='json',
abbr='tnews',
data_files='./data/FewCLUE/tnews/test_public.json',
split='train',
reader_cfg=tnews_reader_cfg,
infer_cfg=tnews_infer_cfg,
eval_cfg=tnews_eval_cfg)
]
from mmengine.config import read_base
with read_base():
from .govrepcrs_gen_455586 import govrepcrs_datasets # noqa: F401, F403
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import BleuEvaluator
from opencompass.datasets import GovRepcrsDataset
govrepcrs_reader_cfg = dict(
input_columns='content',
output_column='summary',
train_split='test',
test_split='test')
govrepcrs_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
begin=[
dict(
role='SYSTEM',
fallback_role="HUMAN",
prompt=
'Please summarize the following English report in English:'
),
],
round=[
dict(role='HUMAN', prompt='{content}'),
dict(role='BOT', prompt='{summary}'),
])),
retriever=dict(type=ZeroRetriever),
inferencer=dict(
type=GenInferencer, batch_size=4, max_out_len=500, max_seq_len=8192))
govrepcrs_eval_cfg = dict(
evaluator=dict(type=BleuEvaluator),
pred_role='BOT',
pred_postprocessor=dict(type='general_cn'),
dataset_postprocessor=dict(type='general_cn'))
govrepcrs_datasets = [
dict(
type=GovRepcrsDataset,
path='./data/govrep/',
abbr='GovRepcrs',
reader_cfg=govrepcrs_reader_cfg,
infer_cfg=govrepcrs_infer_cfg,
eval_cfg=govrepcrs_eval_cfg)
]
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import hellaswagDataset_V2
hellaswag_reader_cfg = dict(
input_columns=["ctx", "A", "B", "C", "D"],
output_column="label",
test_split="validation")
hellaswag_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(round=[
dict(
role="HUMAN",
prompt=(
"{ctx}\nQuestion: Which ending makes the most sense?\n"
"A. {A}\nB. {B}\nC. {C}\nD. {D}\n"
"You may choose from 'A', 'B', 'C', 'D'.\n"
"Answer:"),
),
]),
),
retriever=dict(type=ZeroRetriever, ),
inferencer=dict(type=GenInferencer),
)
hellaswag_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_role="BOT",
pred_postprocessor=dict(type="first-capital"),
)
hellaswag_datasets = [
dict(
type=hellaswagDataset_V2,
path="hellaswag",
reader_cfg=hellaswag_reader_cfg,
infer_cfg=hellaswag_infer_cfg,
eval_cfg=hellaswag_eval_cfg)
]
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import BM25Retriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import BleuEvaluator
from opencompass.datasets import IWSLT2017Dataset
iwslt2017_reader_cfg = dict(
input_columns='en', output_column='de', train_split='validation')
iwslt2017_infer_cfg = dict(
ice_template=dict(
type=PromptTemplate,
template=dict(
begin='</E>',
round=[
dict(role='HUMAN', prompt='Please translate the following English statements to German:\n{en}'),
dict(role='BOT', prompt='{de}'),
]
),
ice_token='</E>'),
retriever=dict(type=BM25Retriever, ice_num=1),
inferencer=dict(type=GenInferencer))
iwslt2017_eval_cfg = dict(
evaluator=dict(type=BleuEvaluator),
pred_role='BOT',
pred_postprocessor=dict(type='general_cn'),
dataset_postprocessor=dict(type='general_cn'))
iwslt2017_datasets = [
dict(
type=IWSLT2017Dataset,
path='iwslt2017',
name='iwslt2017-en-de',
reader_cfg=iwslt2017_reader_cfg,
infer_cfg=iwslt2017_infer_cfg,
eval_cfg=iwslt2017_eval_cfg)
]
\ No newline at end of file
from mmengine.config import read_base
with read_base():
from .lcsts_gen_427fde import lcsts_datasets # noqa: F401, F403
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import MBPPDataset, MBPPEvaluator
mbpp_reader_cfg = dict(
input_columns=['text', 'test_list'], output_column='code')
mbpp_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(
role="HUMAN",
prompt=
"You are an expert Python programmer, and here is your task: Write a function to find the similar elements from the given two tuple lists. Your code should pass these tests:\n\n assert similar_elements((3, 4, 5, 6),(5, 7, 4, 10)) == (4, 5)\n assert similar_elements((1, 2, 3, 4),(5, 4, 3, 7)) == (3, 4) \n assert similar_elements((11, 12, 14, 13),(17, 15, 14, 13)) == (13, 14) \n"
),
dict(
role="BOT",
prompt=
"[BEGIN]\n 'def similar_elements(test_tup1, test_tup2):\r\n res = tuple(set(test_tup1) & set(test_tup2))\r\n return (res)' \n[DONE] \n\n "
),
dict(
role="HUMAN",
prompt=
"You are an expert Python programmer, and here is your task: Write a python function to identify non-prime numbers. Your code should pass these tests:\n\n assert is_not_prime(2) == False \n assert is_not_prime(10) == True \n assert is_not_prime(35) == True \n"
),
dict(
role="BOT",
prompt=
"[BEGIN]\n 'import math\r\ndef is_not_prime(n):\r\n result = False\r\n for i in range(2,int(math.sqrt(n)) + 1):\r\n if n % i == 0:\r\n result = True\r\n return result' \n[DONE] \n\n "
),
dict(
role="HUMAN",
prompt=
"You are an expert Python programmer, and here is your task: Write a function to find the largest integers from a given list of numbers using heap queue algorithm. Your code should pass these tests:\n\n assert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],3)==[85, 75, 65] \n assert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],2)==[85, 75] \n assert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],5)==[85, 75, 65, 58, 35] \n"
),
dict(
role="BOT",
prompt=
"[BEGIN]\n 'import heapq as hq\r\ndef heap_queue_largest(nums,n):\r\n largest_nums = hq.nlargest(n, nums)\r\n return largest_nums' \n[DONE] \n\n "
),
dict(
role="HUMAN",
prompt=
"You are an expert Python programmer, and here is your task: {text} Your code should pass these tests:\n\n {test_list} \n"
),
dict(role="BOT", prompt="[BEGIN]\n"),
], )),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer))
mbpp_eval_cfg = dict(evaluator=dict(type=MBPPEvaluator), pred_role="BOT")
mbpp_datasets = [
dict(
type=MBPPDataset,
abbr='mbpp',
path='./data/mbpp/mbpp.jsonl',
reader_cfg=mbpp_reader_cfg,
infer_cfg=mbpp_infer_cfg,
eval_cfg=mbpp_eval_cfg)
]
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import NarrativeQADataset, TriviaQAEvaluator
narrativeqa_reader_cfg = dict(
input_columns=['question', 'evidence'],
output_column='answer',
train_split='valid',
test_split='valid')
narrativeqa_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template="{evidence}\nAnswer these questions:\nQ: {question}?\nA:"),
retriever=dict(type=ZeroRetriever),
inferencer=dict(
type=GenInferencer, max_out_len=50, max_seq_len=8192, batch_size=4))
narrativeqa_eval_cfg = dict(evaluator=dict(type=TriviaQAEvaluator))
narrativeqa_datasets = [
dict(
type=NarrativeQADataset,
abbr='NarrativeQA',
path='./data/narrativeqa/',
reader_cfg=narrativeqa_reader_cfg,
infer_cfg=narrativeqa_infer_cfg,
eval_cfg=narrativeqa_eval_cfg)
]
from mmengine.config import read_base
with read_base():
from .obqa_ppl_2b5b12 import obqa_datasets # noqa: F401, F403
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import OBQADataset
_input_columns = [
['question_stem', 'A', 'B', 'C', 'D'],
['question_stem', 'A', 'B', 'C', 'D', 'fact1'],
]
_template = [
{
ans: dict(
round=[
dict(
role="HUMAN",
prompt=
"Question: {question_stem}\nA. {A}\nB. {B}\nC. {C}\nD. {D}\nAnswer:"
),
dict(role="BOT", prompt=ans),
], )
for ans in ['A', 'B', 'C', 'D']
},
{
ans: dict(
round=[
dict(
role="HUMAN",
prompt=
"Given the fact: {fact1}\nQuestion: {question_stem}\nA. {A}\nB. {B}\nC. {C}\nD. {D}\nAnswer:"
),
dict(role="BOT", prompt=ans),
], )
for ans in ['A', 'B', 'C', 'D']
}
]
obqa_datasets = [
dict(
type=OBQADataset,
path='openbookqa',
split='test',
),
dict(
abbr='openbookqa_fact',
type=OBQADataset,
path='openbookqa',
name='additional',
split='test',
),
]
for _i in range(2):
obqa_reader_cfg = dict(
input_columns=_input_columns[_i], output_column="answerKey")
obqa_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=_template[_i]),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer),
)
obqa_eval_cfg = dict(evaluator=dict(type=AccEvaluator), )
obqa_datasets[_i]["reader_cfg"] = obqa_reader_cfg
obqa_datasets[_i]["infer_cfg"] = obqa_infer_cfg
obqa_datasets[_i]["eval_cfg"] = obqa_eval_cfg
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import OBQADataset
_input_columns = [
['question_stem', 'A', 'B', 'C', 'D'],
['question_stem', 'A', 'B', 'C', 'D', 'fact1'],
]
_template = [{
'A': "{question_stem} {A}",
'B': "{question_stem} {B}",
'C': "{question_stem} {C}",
'D': "{question_stem} {D}",
}, {
'A': "Given the fact {fact1}, we know that {question_stem} {A}",
'B': "Given the fact {fact1}, we know that {question_stem} {B}",
'C': "Given the fact {fact1}, we know that {question_stem} {C}",
'D': "Given the fact {fact1}, we know that {question_stem} {D}",
}]
obqa_datasets = [
dict(
abbr="openbookqa",
type=OBQADataset,
path="openbookqa",
split="test",
),
dict(
abbr="openbookqa_fact",
type=OBQADataset,
path="openbookqa",
name="additional",
split="test",
),
]
for _i in range(2):
obqa_reader_cfg = dict(
input_columns=_input_columns[_i], output_column="answerKey")
obqa_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=_template[_i]),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer),
)
obqa_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
obqa_datasets[_i]["reader_cfg"] = obqa_reader_cfg
obqa_datasets[_i]["infer_cfg"] = obqa_infer_cfg
obqa_datasets[_i]["eval_cfg"] = obqa_eval_cfg
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import ToxicEvaluator
from opencompass.datasets import RealToxicPromptsDataset
realtoxicprompts_reader_cfg = dict(
input_columns=['prompt_text'],
output_column='filename',
train_split='train',
test_split='train')
# TODO: allow empty output-column
realtoxicprompts_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(round=[dict(role="HUMAN", prompt="{prompt_text}")])),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer))
# When key is set to "ENV", the key will be fetched from the environment
# variable $PerspectiveAPIkey. Otherwise, set key in here directly.
realtoxicprompts_eval_cfg = dict(
evaluator=dict(type=ToxicEvaluator, key='ENV'),
pred_role='BOT',
)
realtoxicprompts_datasets = [
dict(
type=RealToxicPromptsDataset,
path='allenai/real-toxicity-prompts',
challenging_subset=True,
reader_cfg=realtoxicprompts_reader_cfg,
infer_cfg=realtoxicprompts_infer_cfg,
eval_cfg=realtoxicprompts_eval_cfg)
]
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import storyclozeDataset
storycloze_reader_cfg = dict(
input_columns=['context', 'sentence_quiz1', 'sentence_quiz2'],
output_column='answer_right_ending',
train_split='test',
test_split='test')
storycloze_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
i: dict(round=[
dict(role="HUMAN", prompt="{context}"),
dict(role="BOT", prompt=f"{{sentence_quiz{i}}}"),
])
for i in range(1, 3)
}),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer))
storycloze_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
# The original story cloze dataset and repo are not long maintaining.
# Using multilingual version of this dataset.
storycloze_datasets = [
dict(
abbr='story_cloze',
type=storyclozeDataset,
path='juletxara/xstory_cloze',
name='en',
reader_cfg=storycloze_reader_cfg,
infer_cfg=storycloze_infer_cfg,
eval_cfg=storycloze_eval_cfg)
]
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset
summedits_reader_cfg = dict(
input_columns=['doc', 'summary'],
output_column='label',
test_split='train')
summedits_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
0:
dict(round=[
dict(
role="HUMAN",
prompt=
"""\nDocument:\n{doc}\nSummary:\n{summary}\nIs the summary factually consistent with the document? """
),
dict(role="BOT", prompt="No")
]),
1:
dict(round=[
dict(
role="HUMAN",
prompt=
"""Document:\n{doc}\nSummary:\n{summary}\nIs the summary factually consistent with the document? """
),
dict(role="BOT", prompt="Yes")
]),
}),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer))
summedits_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
summedits_datasets = [
dict(
type=HFDataset,
abbr='summedits',
path='json',
split='train',
data_files='./data/summedits/summedits.jsonl',
reader_cfg=summedits_reader_cfg,
infer_cfg=summedits_infer_cfg,
eval_cfg=summedits_eval_cfg)
]
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment