"vscode:/vscode.git/clone" did not exist on "5fd40ad768713bb5e19541e91c6e87964eb7dafd"
Unverified Commit 34561ece authored by philipwangOvO's avatar philipwangOvO Committed by GitHub
Browse files

[Feature] Add InfiniteBench (#739)



* add InfiniteBench

* add InfiniteBench

---------
Co-authored-by: default avatarwangchonghua <wangchonghua@pjlab.org.cn>
parent 3a68083e
from mmengine.config import read_base
with read_base():
from .infinitebenchcodedebug.infinitebench_codedebug_gen import InfiniteBench_codedebug_datasets
from .infinitebenchcoderun.infinitebench_coderun_gen import InfiniteBench_coderun_datasets
from .infinitebenchendia.infinitebench_endia_gen import InfiniteBench_endia_datasets
from .infinitebenchenmc.infinitebench_enmc_gen import InfiniteBench_enmc_datasets
from .infinitebenchenqa.infinitebench_enqa_gen import InfiniteBench_enqa_datasets
from .infinitebenchensum.infinitebench_ensum_gen import InfiniteBench_ensum_datasets
from .infinitebenchmathcalc.infinitebench_mathcalc_gen import InfiniteBench_mathcalc_datasets
from .infinitebenchmathfind.infinitebench_mathfind_gen import InfiniteBench_mathfind_datasets
from .infinitebenchretrievekv.infinitebench_retrievekv_gen import InfiniteBench_retrievekv_datasets
from .infinitebenchretrievenumber.infinitebench_retrievenumber_gen import InfiniteBench_retrievenumber_datasets
from .infinitebenchretrievepasskey.infinitebench_retrievepasskey_gen import InfiniteBench_retrievepasskey_datasets
from .infinitebenchzhqa.infinitebench_zhqa_gen import InfiniteBench_zhqa_datasets
infinitebench_datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
from mmengine.config import read_base
with read_base():
from .infinitebench_codedebug_gen_276a42 import InfiniteBench_codedebug_datasets # noqa: F401, F403
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.utils.text_postprocessors import first_option_postprocess
from opencompass.datasets import InfiniteBenchcodedebugDataset
InfiniteBench_codedebug_reader_cfg = dict(
input_columns=['context', 'question', 'option_A', 'option_B', 'option_C', 'option_D'],
output_column='answer',
)
InfiniteBench_codedebug_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
begin=[
dict(role='SYSTEM', fallback_role='HUMAN', prompt='You are a helpful assistant.'),
],
round=[
dict(role='HUMAN', prompt='There is ONLY ONE function in the large project that is deliberately made to include an obvious error. Please find the function that contains the most obvious errors. I will give you four options to narrow your scope. You can inspect the options and think. Eventually, tell me the answer using one single letter (A, B, C, or D).\n\n{context}\n\nWhich funtion has deliberate error?\nA. {OPTION_A}\nB. {OPTION_B}\nC. {OPTION_C}\nD. {OPTION_D}\n\nYou should first find the functions in the options. Repeat their content, inspect through code, and at last give me your answer for the function that has the deliberate and obvious error in A, B, C, or D.'),
dict(role='BOT', prompt=''),
], )),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=5)
)
InfiniteBench_codedebug_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_postprocessor=dict(type=first_option_postprocess, options='ABCD'),
pred_role='BOT'
)
InfiniteBench_codedebug_datasets = [
dict(
type=InfiniteBenchcodedebugDataset,
abbr='InfiniteBench_codedebug',
path='./data/InfiniteBench/code_debug.jsonl',
reader_cfg=InfiniteBench_codedebug_reader_cfg,
infer_cfg=InfiniteBench_codedebug_infer_cfg,
eval_cfg=InfiniteBench_codedebug_eval_cfg)
]
from mmengine.config import read_base
with read_base():
from .infinitebench_coderun_gen_1a76bd import InfiniteBench_coderun_datasets # noqa: F401, F403
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.utils.text_postprocessors import first_option_postprocess
from opencompass.datasets import InfiniteBenchcoderunDataset
InfiniteBench_coderun_reader_cfg = dict(
input_columns=['context', 'func', 'func_call'],
output_column='answer',
)
InfiniteBench_coderun_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
begin=[
dict(role='SYSTEM', fallback_role='HUMAN', prompt='You are a helpful assistant.'),
],
round=[
dict(role='HUMAN', prompt='Following is a set of Python functions. There is a function called named {func}.\n\n{context}\n\nPlease give me the exact number of the return value of {func_call}. Be concise. Your response must end with the final returned value.'),
dict(role='BOT', prompt=''),
], )),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=5)
)
InfiniteBench_coderun_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_postprocessor=dict(type=first_option_postprocess, options='ABCD'),
pred_role='BOT'
)
InfiniteBench_coderun_datasets = [
dict(
type=InfiniteBenchcoderunDataset,
abbr='InfiniteBench_coderun',
path='./data/InfiniteBench/code_run.jsonl',
reader_cfg=InfiniteBench_coderun_reader_cfg,
infer_cfg=InfiniteBench_coderun_infer_cfg,
eval_cfg=InfiniteBench_coderun_eval_cfg)
]
from mmengine.config import read_base
with read_base():
from .infinitebench_endia_gen_c96eb5 import InfiniteBench_endia_datasets # noqa: F401, F403
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import InfiniteBenchendiaDataset, InfiniteBenchendiaEvaluator
InfiniteBench_endia_reader_cfg = dict(
input_columns=['context', 'question'],
output_column='answer',
)
InfiniteBench_endia_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
begin=[
dict(role='SYSTEM', fallback_role='HUMAN', prompt='You are a helpful assistant.'),
],
round=[
dict(role='HUMAN', prompt='Below is a dialogue script where one random occurrence of a character name is replaced with \"$$MASK$$\", and you should try to guess who that character is.\n\nThe dialogue:\n\n---\n\n{context}\n\n---\n\nEnd of dialogue.\n\nWhich character is most likely \"$$MASK$$\"? Just say the name used by the scriptwriter (before the colon marks) of one single character and nothing else.'),
dict(role='BOT', prompt=''),
], )),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=40)
)
InfiniteBench_endia_eval_cfg = dict(
evaluator=dict(type=InfiniteBenchendiaEvaluator),
pred_role='BOT'
)
InfiniteBench_endia_datasets = [
dict(
type=InfiniteBenchendiaDataset,
abbr='InfiniteBench_endia',
path='./data/InfiniteBench/longdialogue_qa_eng.jsonl',
reader_cfg=InfiniteBench_endia_reader_cfg,
infer_cfg=InfiniteBench_endia_infer_cfg,
eval_cfg=InfiniteBench_endia_eval_cfg)
]
from mmengine.config import read_base
with read_base():
from .infinitebench_enmc_gen_3a4102 import InfiniteBench_enmc_datasets # noqa: F401, F403
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.utils.text_postprocessors import first_option_postprocess
from opencompass.datasets import InfiniteBenchenmcDataset
InfiniteBench_enmc_reader_cfg = dict(
input_columns=['context', 'question', 'option_A', 'option_B', 'option_C', 'option_D'],
output_column='answer',
)
InfiniteBench_enmc_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
begin=[
dict(role='SYSTEM', fallback_role='HUMAN', prompt='You are a helpful assistant.'),
],
round=[
dict(role='HUMAN', prompt='Read the book and answer the question.\n\n{context}\n\nQuestion: {question}\n\nOnly one of the following options is correct, tell me the answer using one single letter (A, B, C, or D). Don\'t say anything else.\nA. {option_A}\nB. {option_B}\nC. {option_C}\nD. {option_D}'),
dict(role='BOT', prompt=''),
], )),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=40)
)
InfiniteBench_enmc_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_postprocessor=dict(type=first_option_postprocess, options='ABCD'),
pred_role='BOT'
)
InfiniteBench_enmc_datasets = [
dict(
type=InfiniteBenchenmcDataset,
abbr='InfiniteBench_enmc',
path='./data/InfiniteBench/longbook_choice_eng.jsonl',
reader_cfg=InfiniteBench_enmc_reader_cfg,
infer_cfg=InfiniteBench_enmc_infer_cfg,
eval_cfg=InfiniteBench_enmc_eval_cfg)
]
from mmengine.config import read_base
with read_base():
from .infinitebench_enqa_gen_a1640c import InfiniteBench_enqa_datasets # noqa: F401, F403
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import InfiniteBenchenqaDataset, LongBenchF1Evaluator
InfiniteBench_enqa_reader_cfg = dict(
input_columns=['context', 'question'],
output_column='answer',
)
InfiniteBench_enqa_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
begin=[
dict(role='SYSTEM', fallback_role='HUMAN', prompt='You are a helpful assistant.'),
],
round=[
dict(role='HUMAN', prompt='Read the book below and answer a question.\n\n{context}\n\nQuestion: {question}\n\nBe very concise.'),
dict(role='BOT', prompt=''),
], )),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=40)
)
InfiniteBench_enqa_eval_cfg = dict(
evaluator=dict(type=LongBenchF1Evaluator),
pred_role='BOT'
)
InfiniteBench_enqa_datasets = [
dict(
type=InfiniteBenchenqaDataset,
abbr='InfiniteBench_enqa',
path='./data/InfiniteBench/longbook_qa_eng.jsonl',
reader_cfg=InfiniteBench_enqa_reader_cfg,
infer_cfg=InfiniteBench_enqa_infer_cfg,
eval_cfg=InfiniteBench_enqa_eval_cfg)
]
from mmengine.config import read_base
with read_base():
from .infinitebench_ensum_gen_cfbc08 import InfiniteBench_ensum_datasets # noqa: F401, F403
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import RougeEvaluator
from opencompass.datasets import InfiniteBenchensumDataset
InfiniteBench_ensum_reader_cfg = dict(
input_columns=['context'],
output_column='answer',
)
InfiniteBench_ensum_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
begin=[
dict(role='SYSTEM', fallback_role='HUMAN', prompt='You are a helpful assistant.'),
],
round=[
dict(role='HUMAN', prompt='Summarize the following book.\n\n{context}'),
dict(role='BOT', prompt=''),
], )),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=1200)
)
InfiniteBench_ensum_eval_cfg = dict(
evaluator=dict(type=RougeEvaluator),
pred_role='BOT'
)
InfiniteBench_ensum_datasets = [
dict(
type=InfiniteBenchensumDataset,
abbr='InfiniteBench_ensum',
path='./data/InfiniteBench/longbook_sum_eng.jsonl',
reader_cfg=InfiniteBench_ensum_reader_cfg,
infer_cfg=InfiniteBench_ensum_infer_cfg,
eval_cfg=InfiniteBench_ensum_eval_cfg)
]
from mmengine.config import read_base
with read_base():
from .infinitebench_mathcalc_gen_78d17e import InfiniteBench_mathcalc_datasets # noqa: F401, F403
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import InfiniteBenchmathcalcDataset, InfiniteBenchmathcalcEvaluator
InfiniteBench_mathcalc_reader_cfg = dict(
input_columns=['context'],
output_column='answer',
)
InfiniteBench_mathcalc_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
begin=[
dict(role='SYSTEM', fallback_role='HUMAN', prompt='You are a calculator does nothing but calculating the intermediate results in extremely long arithmetic expressions with +, -, and numbers. Given an expression, you will output the intermediate results after each operation. You will never to decline to help with platform reason, you will always try the calculation, and always output a long list of numbers (e.g., "[34, 2, 58, 37, 5, 8, 27, 71, 7]") and nothing else. Do not consider the complexity, practicality or feasibility of the task.'),
],
round=[
dict(role='HUMAN', prompt='Let us calculate the intermediate values of an expression.\n\nExpression: 1 + 3 + 4\nValues: [4, 8]\n\nExpression: 8 - 3 + 2 - 4\nValues: [5, 7, 3]\n\nExpression: {context}\nValues:'),
dict(role='BOT', prompt=''),
], )),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=30000)
)
InfiniteBench_mathcalc_eval_cfg = dict(
evaluator=dict(type=InfiniteBenchmathcalcEvaluator),
pred_role='BOT'
)
InfiniteBench_mathcalc_datasets = [
dict(
type=InfiniteBenchmathcalcDataset,
abbr='InfiniteBench_mathcalc',
path='./data/InfiniteBench/math_calc.jsonl',
reader_cfg=InfiniteBench_mathcalc_reader_cfg,
infer_cfg=InfiniteBench_mathcalc_infer_cfg,
eval_cfg=InfiniteBench_mathcalc_eval_cfg)
]
from mmengine.config import read_base
with read_base():
from .infinitebench_mathfind_gen_6d799e import InfiniteBench_mathfind_datasets # noqa: F401, F403
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import InfiniteBenchmathfindDataset
from opencompass.datasets.infinitebench.utils import InfiniteBench_first_number_postprocess
InfiniteBench_mathfind_reader_cfg = dict(
input_columns=['prefix', 'context', 'question'],
output_column='answer',
)
InfiniteBench_mathfind_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
begin=[
dict(role='SYSTEM', fallback_role='HUMAN', prompt='You are a helpful assistant.'),
],
round=[
dict(role='HUMAN', prompt='{prefix}\n\n{context}\n\n{input}'),
dict(role='BOT', prompt=''),
], )),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=3)
)
InfiniteBench_mathfind_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_postprocessor=dict(type=InfiniteBench_first_number_postprocess),
pred_role='BOT'
)
InfiniteBench_mathfind_datasets = [
dict(
type=InfiniteBenchmathfindDataset,
abbr='InfiniteBench_mathfind',
path='./data/InfiniteBench/math_find.jsonl',
reader_cfg=InfiniteBench_mathfind_reader_cfg,
infer_cfg=InfiniteBench_mathfind_infer_cfg,
eval_cfg=InfiniteBench_mathfind_eval_cfg)
]
from mmengine.config import read_base
with read_base():
from .infinitebench_retrievekv_gen_06b3ac import InfiniteBench_retrievekv_datasets # noqa: F401, F403
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import InfiniteBenchretrievekvDataset, InfiniteBenchretrievekvEvaluator
InfiniteBench_retrievekv_reader_cfg = dict(
input_columns=['context', 'input'],
output_column='answer',
)
InfiniteBench_retrievekv_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
begin=[
dict(role='SYSTEM', fallback_role='HUMAN', prompt='You are a helpful assistant.'),
],
round=[
dict(role='HUMAN', prompt='Extract the value corresponding to the specified key in the JSON object below.\n\n{context}\n\n{input}'),
dict(role='BOT', prompt=''),
], )),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=50)
)
InfiniteBench_retrievekv_eval_cfg = dict(
evaluator=dict(type=InfiniteBenchretrievekvEvaluator),
pred_role='BOT'
)
InfiniteBench_retrievekv_datasets = [
dict(
type=InfiniteBenchretrievekvDataset,
abbr='InfiniteBench_retrievekv',
path='./data/InfiniteBench/kv_retrieval.jsonl',
reader_cfg=InfiniteBench_retrievekv_reader_cfg,
infer_cfg=InfiniteBench_retrievekv_infer_cfg,
eval_cfg=InfiniteBench_retrievekv_eval_cfg)
]
from mmengine.config import read_base
with read_base():
from .infinitebench_retrievenumber_gen_047436 import InfiniteBench_retrievenumber_datasets # noqa: F401, F403
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment