Commit fb111087 authored by yingfhu's avatar yingfhu
Browse files

[Feat] support opencompass

parent 7d346000
from mmengine.config import read_base
with read_base():
from .SuperGLUE_COPA_ppl_ddb78c import COPA_datasets # noqa: F401, F403
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset
COPA_reader_cfg = dict(
input_columns=['question', 'premise', 'choice1', 'choice2'],
output_column='label',
test_split='train')
COPA_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
0: "Premise:{premise}。\nQuestion:{question}。\nAnswer: {choice1}.",
1: "Passage:{premise}。\nQuestion:{question}。\nAnswer: {choice2}.",
}),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer))
COPA_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
COPA_datasets = [
dict(
type=HFDataset,
abbr='COPA',
path='json',
data_files='./data/SuperGLUE/COPA/val.jsonl',
split='train',
reader_cfg=COPA_reader_cfg,
infer_cfg=COPA_infer_cfg,
eval_cfg=COPA_eval_cfg)
]
from mmengine.config import read_base
with read_base():
from .SuperGLUE_MultiRC_gen_26c9dc import MultiRC_datasets # noqa: F401, F403
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import MultiRCDataset
MultiRC_reader_cfg = dict(
input_columns=['question', 'text', 'answer'], output_column='label')
MultiRC_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
0: "Passage:{text}。\nQuestion:{question}。\nAnswer: {answer}. It is false.",
1: "Passage:</P>。\nQuestion:{question}。\nAnswer: {answer}. It is true.",
}),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer))
MultiRC_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
MultiRC_datasets = [
dict(
type=MultiRCDataset,
abbr='MultiRC',
path='./data/SuperGLUE/MultiRC/val.jsonl',
reader_cfg=MultiRC_reader_cfg,
infer_cfg=MultiRC_infer_cfg,
eval_cfg=MultiRC_eval_cfg)
]
from mmengine.config import read_base
with read_base():
from .SuperGLUE_RTE_ppl_29a22c import RTE_datasets # noqa: F401, F403
from mmengine.config import read_base
with read_base():
from .SuperGLUE_ReCoRD_gen_d8f19c import ReCoRD_datasets # noqa: F401, F403
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import EMEvaluator
from opencompass.datasets import ReCoRDDataset
ReCoRD_reader_cfg = dict(
input_columns=["question", "text"],
output_column="answers",
)
ReCoRD_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(round=[
dict(
role="HUMAN",
prompt=
"Passage: {text}\nResult: {question}\nQuestion: What entity does ____ refer to in the result? Give me the entity name:"
),
]),
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer),
)
ReCoRD_eval_cfg = dict(
evaluator=dict(type=EMEvaluator),
pred_role='BOT',
pred_postprocessor=dict(type="ReCoRD"),
)
ReCoRD_datasets = [
dict(
type=ReCoRDDataset,
abbr="ReCoRD",
path="./data/SuperGLUE/ReCoRD/val.jsonl",
reader_cfg=ReCoRD_reader_cfg,
infer_cfg=ReCoRD_infer_cfg,
eval_cfg=ReCoRD_eval_cfg,
)
]
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import WSCDataset_V2
WSC_reader_cfg = dict(
input_columns=["span1", "span2", "text"],
output_column="label",
)
WSC_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
'A':
dict(round=[
dict(
role="HUMAN",
prompt=
"{text}\nIs '{span1}' and '{span2}' refers to the same entity in the above sentence?"
),
dict(role='BOT', prompt='Yes'),
]),
'B':
dict(round=[
dict(
role="HUMAN",
prompt=
"{text}\nIs '{span1}' and '{span2}' refers to the same entity in the above sentence?"
),
dict(role='BOT', prompt='No'),
]),
},
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer),
)
WSC_eval_cfg = dict(evaluator=dict(type=AccEvaluator), )
WSC_datasets = [
dict(
abbr="WSC",
type=WSCDataset_V2,
path="./data/SuperGLUE/WSC/val.jsonl",
reader_cfg=WSC_reader_cfg,
infer_cfg=WSC_infer_cfg,
eval_cfg=WSC_eval_cfg,
)
]
from mmengine.config import read_base
with read_base():
from .XLSum_gen_1cc5f6 import XLSum_datasets # noqa: F401, F403
from mmengine.config import read_base
with read_base():
from .bbh_gen_58abc3 import bbh_datasets # noqa: F401, F403
from mmengine.config import read_base
with read_base():
from ..ceval.ceval_ppl_275812 import ceval_datasets
from ..bbh.bbh_gen_58abc3 import bbh_datasets
from ..CLUE_CMRC.CLUE_CMRC_gen_72a8d5 import CMRC_datasets
from ..CLUE_DRCD.CLUE_DRCD_gen_03b96b import DRCD_datasets
from ..CLUE_afqmc.CLUE_afqmc_ppl_c83c36 import afqmc_datasets
from ..FewCLUE_bustm.FewCLUE_bustm_ppl_47f2ab import bustm_datasets
from ..FewCLUE_chid.FewCLUE_chid_ppl_b6cd88 import chid_datasets
from ..FewCLUE_cluewsc.FewCLUE_cluewsc_ppl_2a9e61 import cluewsc_datasets
from ..FewCLUE_eprstmt.FewCLUE_eprstmt_ppl_d3c387 import eprstmt_datasets
from ..humaneval.humaneval_gen_d428f1 import humaneval_datasets
from ..mbpp.mbpp_gen_4104e4 import mbpp_datasets
from ..lambada.lambada_gen_7ffe3d import lambada_datasets
from ..storycloze.storycloze_ppl_c1912d import storycloze_datasets
from ..SuperGLUE_AX_b.SuperGLUE_AX_b_ppl_4bd960 import AX_b_datasets
from ..SuperGLUE_AX_g.SuperGLUE_AX_g_ppl_8d9bf9 import AX_g_datasets
from ..SuperGLUE_BoolQ.SuperGLUE_BoolQ_ppl_f80fb0 import BoolQ_datasets
from ..SuperGLUE_CB.SuperGLUE_CB_ppl_32adbb import CB_datasets
from ..SuperGLUE_COPA.SuperGLUE_COPA_ppl_ddb78c import COPA_datasets
from ..SuperGLUE_MultiRC.SuperGLUE_MultiRC_ppl_83a304 import MultiRC_datasets
from ..SuperGLUE_RTE.SuperGLUE_RTE_ppl_29a22c import RTE_datasets
from ..SuperGLUE_ReCoRD.SuperGLUE_ReCoRD_gen_d8f19c import ReCoRD_datasets
from ..SuperGLUE_WiC.SuperGLUE_WiC_ppl_4118db import WiC_datasets
from ..SuperGLUE_WSC.SuperGLUE_WSC_ppl_85f45f import WSC_datasets
from ..race.race_ppl_04e06a import race_datasets
from ..math.math_gen_78bcba import math_datasets
from ..gsm8k.gsm8k_gen_2dd372 import gsm8k_datasets
from ..summedits.summedits_ppl_163352 import summedits_datasets
from ..hellaswag.hellaswag_ppl_8e07d6 import hellaswag_datasets
from ..piqa.piqa_ppl_788dbe import piqa_datasets
from ..winogrande.winogrande_ppl_00f8ad import winogrande_datasets
from ..obqa.obqa_ppl_2b5b12 import obqa_datasets
from ..nq.nq_gen_c00b89 import nq_datasets
from ..triviaqa.triviaqa_gen_cc3cbf import triviaqa_datasets
from ..crowspairs.crowspairs_ppl_f60797 import crowspairs_datasets
datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import NaturalQuestionDataset, NQEvaluator
nq_reader_cfg = dict(
input_columns=['question'], output_column='answer', train_split='test')
nq_infer_cfg = dict(
ice_template=dict(
type=PromptTemplate,
template="Q: </Q>?\nA: </A>",
column_token_map={
'question': '</Q>',
'answer': '</A>'
}),
prompt_template=dict(
type=PromptTemplate,
template="</E>Question: </Q>? Answer: ",
column_token_map={
'question': '</Q>',
'answer': '</A>'
},
ice_token='</E>'),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer))
nq_eval_cfg = dict(evaluator=dict(type=NQEvaluator))
nq_datasets = [
dict(
type=NaturalQuestionDataset,
abbr='nq',
path='/mnt/petrelfs/wuzhiyong/datasets/nq/',
reader_cfg=nq_reader_cfg,
infer_cfg=nq_infer_cfg,
eval_cfg=nq_eval_cfg)
]
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import TriviaQADataset, TriviaQAEvaluator
triviaqa_reader_cfg = dict(
input_columns=['question'],
output_column='answer',
train_split='dev',
test_split='dev')
triviaqa_infer_cfg = dict(
ice_template=dict(
type=PromptTemplate,
template='Q: </Q>\nA: </A>',
column_token_map={
'question': '</Q>',
'answer': '</A>'
}),
prompt_template=dict(
type=PromptTemplate,
template='</E>Question: </Q> Answer:',
column_token_map={
'question': '</Q>',
'answer': '</A>'
},
ice_token='</E>'),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=50))
triviaqa_eval_cfg = dict(evaluator=dict(type=TriviaQAEvaluator))
triviaqa_datasets = [
dict(
type=TriviaQADataset,
abbr='triviaqa',
path='./data/triviaqa/',
reader_cfg=triviaqa_reader_cfg,
infer_cfg=triviaqa_infer_cfg,
eval_cfg=triviaqa_eval_cfg)
]
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import HFDataset, HumanEvaluator
humaneval_reader_cfg = dict(
input_columns=['prompt'], output_column='task_id', train_split='test')
# TODO: allow empty output-column
humaneval_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
begin=[
dict(
role='SYSTEM',
fallback_role='HUMAN',
prompt='Complete the following python code:'),
],
round=[
dict(role='HUMAN', prompt='{prompt}'),
])),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer))
humaneval_eval_cfg = dict(
evaluator=dict(type=HumanEvaluator),
pred_role='BOT',
k=[1, 10, 100], # the parameter only for humaneval
pred_postprocessor=dict(type='humaneval'),
)
humaneval_datasets = [
dict(
type=HFDataset,
path='openai_humaneval',
reader_cfg=humaneval_reader_cfg,
infer_cfg=humaneval_infer_cfg,
eval_cfg=humaneval_eval_cfg)
]
from mmengine.config import read_base
with read_base():
from .jigsawmultilingual_ppl_640128 import jigsawmultilingual_datasets # noqa: F401, F403
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import CLPInferencer
from opencompass.openicl.icl_evaluator import AUCROCEvaluator
from opencompass.datasets import JigsawMultilingualDataset
jigsawmultilingual_reader_cfg = dict(
input_columns=['text'],
output_column='label',
train_split='test',
test_split='test')
# Jigsaw multilingual toxic dataset is not supported in Huggingface
# Please download the test dataset files `test.csv` and `test_labels.csv` from
# below link and move them the data/ folder to perform evaluation.
# https://www.kaggle.com/competitions/jigsaw-multilingual-toxic-comment-classification/data
jigsawmultilingual_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(round=[
dict(
role="HUMAN",
prompt="Text: {text}\nQuestion: Does the above text contain "
"rude, hateful, aggressive, disrespectful or unreasonable "
"language?\nAnswer:")
])),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=CLPInferencer))
jigsawmultilingual_eval_cfg = dict(evaluator=dict(type=AUCROCEvaluator), )
lang = ['es', 'fr', 'it', 'pt', 'ru', 'tr']
jigsawmultilingual_datasets = []
for _l in lang:
jigsawmultilingual_datasets.append(
dict(
abbr=f'jigsaw_multilingual_{_l}',
type=JigsawMultilingualDataset,
path='data/test.csv',
label='data/test_labels.csv',
lang=_l,
reader_cfg=jigsawmultilingual_reader_cfg,
infer_cfg=jigsawmultilingual_infer_cfg,
eval_cfg=jigsawmultilingual_eval_cfg))
del lang, _l
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import CLPInferencer
from opencompass.openicl.icl_evaluator import AUCROCEvaluator
from opencompass.datasets import JigsawMultilingualDataset
jigsawmultilingual_reader_cfg = dict(
input_columns=['text'],
output_column='label',
train_split='test',
test_split='test')
# Jigsaw multilingual toxic dataset is not supported in Huggingface
# Please download the test dataset files `test.csv` and `test_labels.csv` from
# below link and move them the data/ folder to perform evaluation.
# https://www.kaggle.com/competitions/jigsaw-multilingual-toxic-comment-classification/data
jigsawmultilingual_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template='Text: {text}\nQuestion: '
'Does the above text contain rude, hateful, aggressive, disrespectful '
'or unreasonable language?\nAnswer:'),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=CLPInferencer))
jigsawmultilingual_eval_cfg = dict(evaluator=dict(type=AUCROCEvaluator), )
lang = ['es', 'fr', 'it', 'pt', 'ru', 'tr']
jigsawmultilingual_datasets = []
for _l in lang:
jigsawmultilingual_datasets.append(
dict(
abbr=f'jigsaw_multilingual_{_l}',
type=JigsawMultilingualDataset,
path='data/test.csv',
label='data/test_labels.csv',
lang=_l,
reader_cfg=jigsawmultilingual_reader_cfg,
infer_cfg=jigsawmultilingual_infer_cfg,
eval_cfg=jigsawmultilingual_eval_cfg))
del lang, _l
from mmengine.config import read_base
with read_base():
from .lambada_gen_7ffe3d import lambada_datasets # noqa: F401, F403
from mmengine.config import read_base
with read_base():
from .math_gen_78bcba import math_datasets # noqa: F401, F403
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import MATHDataset, MATHEvaluator
math_reader_cfg = dict(input_columns=['problem'], output_column='solution')
math_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template='''Problem:
Find the domain of the expression $\frac{{\sqrt{{x-2}}}}{{\sqrt{{5-x}}}}$.}}
Solution:
The expressions inside each square root must be non-negative. Therefore, $x-2 \ge 0$, so $x\ge2$, and $5 - x \ge 0$, so $x \le 5$. Also, the denominator cannot be equal to zero, so $5-x>0$, which gives $x<5$. Therefore, the domain of the expression is $\boxed{{[2,5)}}$.
Final Answer: The final answer is $[2,5)$. I hope it is correct.
Problem:
If $\det \mathbf{{A}} = 2$ and $\det \mathbf{{B}} = 12,$ then find $\det (\mathbf{{A}} \mathbf{{B}}).$
Solution:
We have that $\det (\mathbf{{A}} \mathbf{{B}}) = (\det \mathbf{{A}})(\det \mathbf{{B}}) = (2)(12) = \boxed{{24}}.$
Final Answer: The final answer is $24$. I hope it is correct.
Problem:
Terrell usually lifts two 20-pound weights 12 times. If he uses two 15-pound weights instead, how many times must Terrell lift them in order to lift the same total weight?
Solution:
If Terrell lifts two 20-pound weights 12 times, he lifts a total of $2\cdot 12\cdot20=480$ pounds of weight. If he lifts two 15-pound weights instead for $n$ times, he will lift a total of $2\cdot15\cdot n=30n$ pounds of weight. Equating this to 480 pounds, we can solve for $n$: \begin{{align*}} 30n&=480\\ \Rightarrow\qquad n&=480/30=\boxed{{16}} \end{{align*}}
Final Answer: The final answer is $16$. I hope it is correct.
Problem:
If the system of equations: \begin{{align*}} 6x-4y&=a,\\ 6y-9x &=b. \end{{align*}}has a solution $(x, y)$ where $x$ and $y$ are both nonzero, find $\frac{{a}}{{b}},$ assuming $b$ is nonzero.
Solution:
If we multiply the first equation by $-\frac{{3}}{{2}}$, we obtain $$6y-9x=-\frac{{3}}{{2}}a.$$Since we also know that $6y-9x=b$, we have $$-\frac{{3}}{{2}}a=b\Rightarrow\frac{{a}}{{b}}=\boxed{{-\frac{{2}}{{3}}}}.$$
Final Answer: The final answer is $-\frac{{2}}{{3}}$. I hope it is correct.
Problem:
{problem}
Solution:
{solution}'''),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512))
math_eval_cfg = dict(
evaluator=dict(type=MATHEvaluator), pred_postprocessor=dict(type='math'))
math_datasets = [
dict(
type=MATHDataset,
abbr='math',
path='./data/math/math.json',
reader_cfg=math_reader_cfg,
infer_cfg=math_infer_cfg,
eval_cfg=math_eval_cfg)
]
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment