Commit 7d346000 authored by gaotongxiao's avatar gaotongxiao
Browse files

initial commit

parents
from mmengine.config import read_base
with read_base():
from .SuperGLUE_WSC_ppl_85f45f import WSC_datasets # noqa: F401, F403
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import WSCDataset
WSC_reader_cfg = dict(
input_columns=['span1', 'span2', 'text', 'new_text'],
output_column='answer')
WSC_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
0: "{text}",
1: "{new_text}"
}),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer))
WSC_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
WSC_datasets = [
dict(
type=WSCDataset,
path='json',
abbr='WSC',
data_files='./data/SuperGLUE/WSC/val.jsonl',
split='train',
reader_cfg=WSC_reader_cfg,
infer_cfg=WSC_infer_cfg,
eval_cfg=WSC_eval_cfg,
)
]
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import WSCDataset
WSC_reader_cfg = dict(
input_columns=["span1", "span2", "text", "new_text"],
output_column="answer",
)
WSC_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
0: dict(round=[
dict(role="HUMAN", prompt="{text}"),
]),
1: dict(round=[
dict(role="HUMAN", prompt="{new_text}"),
]),
},
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer),
)
WSC_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
WSC_datasets = [
dict(
type=WSCDataset,
path="json",
abbr="WSC",
data_files="./data/SuperGLUE/WSC/val.jsonl",
split="train",
reader_cfg=WSC_reader_cfg,
infer_cfg=WSC_infer_cfg,
eval_cfg=WSC_eval_cfg,
)
]
from mmengine.config import read_base
with read_base():
from .TheoremQA_gen_891fcf import TheoremQA_datasets # noqa: F401, F403
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import TheoremQADataset
TheoremQA_reader_cfg = dict(
input_columns=['Question', 'Answer_type'],
output_column='Answer',
train_split='test')
TheoremQA_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(round=[
dict(
role='HUMAN',
prompt=
"""You are a mathematician, you are supposed to answer the given question. You need to output the answer in your final sentence like "Therefore, the answer is ...". The answer can only be one of the following forms:\n1. a numerical value like 0.1, no symbol and no unit at all.\n2. a list of number like [2, 3, 4].\n3. True/False.\n4. an option like (a), (b), (c), (d)\nQuestion: {Question}\nLet\'s think step by step."""
),
])),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer))
TheoremQA_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_postprocessor=dict(type='TheoremQA'))
TheoremQA_datasets = [
dict(
abbr='TheoremQA',
type=TheoremQADataset,
path="./data/TheoremQA/test.csv",
reader_cfg=TheoremQA_reader_cfg,
infer_cfg=TheoremQA_infer_cfg,
eval_cfg=TheoremQA_eval_cfg)
]
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import TheoremQADataset
TheoremQA_reader_cfg = dict(
input_columns=['Question', 'Answer_type'],
output_column='Answer',
train_split='test')
TheoremQA_prompt1 = """You are a mathematician, you are supposed to answer the given question. You need to output the answer in your final sentence like "Therefore, the answer is ...". The answer can only be one of the following forms:
1. a numerical value like 0.1, no symbol and no unit at all.
2. a list of number like [2, 3, 4].
3. True/False.
4. an option like (a), (b), (c), (d)
"""
TheoremQA_prompt2 = 'Question: {Question}\nLet\'s think step by step.'
TheoremQA_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
begin=[
dict(
role='SYSTEM',
fallback_role='HUMAN',
prompt=TheoremQA_prompt1),
],
round=[
dict(role='HUMAN', prompt=TheoremQA_prompt2),
])),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer))
TheoremQA_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_postprocessor=dict(type='TheoremQA'))
TheoremQA_datasets = [
dict(
abbr='TheoremQA',
type=TheoremQADataset,
path="./data/TheoremQA/test.csv",
reader_cfg=TheoremQA_reader_cfg,
infer_cfg=TheoremQA_infer_cfg,
eval_cfg=TheoremQA_eval_cfg)
]
from mmengine.config import read_base
with read_base():
from .Xsum_gen_d2126e import Xsum_datasets # noqa: F401, F403
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import RougeEvaluator
from opencompass.datasets import XsumDataset
Xsum_reader_cfg = dict(input_columns=['dialogue'], output_column='summary')
Xsum_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template='Document:{dialogue}\n'
'Based on the previous text, provide a brief single summary:'),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer))
Xsum_eval_cfg = dict(
evaluator=dict(type=RougeEvaluator),
pred_postprocessor=dict(type='Xsum'),
)
Xsum_datasets = [
dict(
type=XsumDataset,
abbr='Xsum',
path='./data/Xsum/dev.jsonl',
reader_cfg=Xsum_reader_cfg,
infer_cfg=Xsum_infer_cfg,
eval_cfg=Xsum_eval_cfg)
]
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import RougeEvaluator
from opencompass.datasets import XsumDataset
Xsum_reader_cfg = dict(input_columns=["dialogue"], output_column="summary")
Xsum_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(round=[
dict(
role="HUMAN",
prompt=
"Document:{dialogue}\nBased on the previous text, provide a brief single summary:"
),
]),
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer),
)
Xsum_eval_cfg = dict(
evaluator=dict(type=RougeEvaluator),
pred_role='BOT',
pred_postprocessor=dict(type="Xsum"),
)
Xsum_datasets = [
dict(
type=XsumDataset,
abbr="Xsum",
path="./data/Xsum/dev.jsonl",
reader_cfg=Xsum_reader_cfg,
infer_cfg=Xsum_infer_cfg,
eval_cfg=Xsum_eval_cfg,
)
]
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import AGIEvalDataset, AGIEvalEvaluator
agieval_reader_cfg = dict(
input_columns=['problem_input'], output_column='label')
agieval_single_choice_sets = [
'gaokao-chinese',
'gaokao-english',
'gaokao-geography',
'gaokao-history',
'gaokao-biology',
'gaokao-chemistry',
'gaokao-physics',
'gaokao-mathqa',
'logiqa-zh',
'lsat-ar',
'lsat-lr',
'lsat-rc',
'logiqa-en',
'sat-math',
'sat-en',
'sat-en-without-passage',
'aqua-rat',
]
agieval_multiple_choices_sets = [
# 'jec-qa-kd', # 数据需要额外处理
# 'jec-qa-ca', # 数据需要额外处理
]
agieval_cloze_sets = ['gaokao-mathcloze', 'math']
agieval_datasets = []
for name in agieval_single_choice_sets:
agieval_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[dict(role='HUMAN', prompt='{problem_input}')])),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, labels=['A', 'B', 'C', 'D']))
agieval_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_postprocessor=dict(type='first-capital'))
agieval_datasets.append(
dict(
type=AGIEvalDataset,
path='./data/AGIEval/data/v1/',
name=name,
abbr='agieval-' + name,
setting_name='zero-shot',
reader_cfg=agieval_reader_cfg,
infer_cfg=agieval_infer_cfg.copy(),
eval_cfg=agieval_eval_cfg.copy()))
for name in agieval_cloze_sets:
agieval_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[dict(role="HUMAN", prompt='{problem_input}')])),
retriever=dict(type=ZeroRetriever
), # retriver 不起作用,以输入参数为准 (zero-shot / few-shot)
inferencer=dict(type=GenInferencer))
agieval_eval_cfg = dict(
evaluator=dict(type=AGIEvalEvaluator), pred_role="BOT")
agieval_datasets.append(
dict(
type=AGIEvalDataset,
path='./data/AGIEval/data/v1/',
name=name,
abbr='agieval-' + name,
setting_name='zero-shot',
reader_cfg=agieval_reader_cfg,
infer_cfg=agieval_infer_cfg.copy(),
eval_cfg=agieval_eval_cfg.copy()))
del name, agieval_infer_cfg, agieval_eval_cfg
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import AGIEvalDataset_v2, AGIEvalEvaluator
agieval_reader_cfg = dict(
input_columns=['question', 'options'], output_column='label')
agieval_single_choice_sets = [
'gaokao-chinese',
'gaokao-english',
'gaokao-geography',
'gaokao-history',
'gaokao-biology',
'gaokao-chemistry',
'gaokao-physics',
'gaokao-mathqa',
'logiqa-zh',
'lsat-ar',
'lsat-lr',
'lsat-rc',
'logiqa-en',
'sat-math',
'sat-en',
'sat-en-without-passage',
'aqua-rat',
]
agieval_multiple_choices_sets = [
'jec-qa-kd',
'jec-qa-ca',
]
agieval_cloze_sets = ['gaokao-mathcloze', 'math']
agieval_chinese_sets = [
'gaokao-chinese',
'gaokao-english',
'gaokao-geography',
'gaokao-history',
'gaokao-biology',
'gaokao-chemistry',
'gaokao-physics',
'gaokao-mathqa',
'logiqa-zh',
'gaokao-mathcloze',
]
agieval_english_sets = [
'lsat-ar',
'lsat-lr',
'lsat-rc',
'logiqa-en',
'sat-math',
'sat-en',
'sat-en-without-passage',
'aqua-rat',
'math',
]
agieval_gaokao_sets = [
'gaokao-chinese',
'gaokao-english',
'gaokao-geography',
'gaokao-history',
'gaokao-biology',
'gaokao-chemistry',
'gaokao-physics',
'gaokao-mathqa',
]
agieval_datasets = []
for _name in agieval_single_choice_sets:
if _name in agieval_chinese_sets:
_hint = '答案是: '
else:
_hint = 'The answer is '
agieval_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(round=[
dict(role='HUMAN', prompt=f'{{question}}\n{{options}}\n{_hint}')
])),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=1024))
agieval_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_postprocessor=dict(type='first-capital'))
agieval_datasets.append(
dict(
type=AGIEvalDataset_v2,
path='./data/AGIEval/data/v1/',
name=_name,
abbr='agieval-' + _name,
setting_name='zero-shot',
reader_cfg=agieval_reader_cfg,
infer_cfg=agieval_infer_cfg.copy(),
eval_cfg=agieval_eval_cfg.copy()))
for _name in agieval_multiple_choices_sets:
if _name in agieval_chinese_sets:
_hint = '答案是: '
else:
_hint = 'The answer is '
agieval_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(round=[
dict(role='HUMAN', prompt=f'{{question}}\n{{options}}\n{_hint}')
])),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=1024))
agieval_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_postprocessor=dict(type='first-capital-multi'))
agieval_datasets.append(
dict(
type=AGIEvalDataset_v2,
path='./data/AGIEval/data/v1/',
name=_name,
abbr='agieval-' + _name,
setting_name='zero-shot',
reader_cfg=agieval_reader_cfg,
infer_cfg=agieval_infer_cfg.copy(),
eval_cfg=agieval_eval_cfg.copy()))
for _name in agieval_cloze_sets:
if _name in agieval_chinese_sets:
_hint = '答案是: '
else:
_hint = 'The answer is '
agieval_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[dict(role='HUMAN', prompt=f'{{question}}\n{_hint}')])),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=1024))
agieval_eval_cfg = dict(evaluator=dict(type=AGIEvalEvaluator))
agieval_datasets.append(
dict(
type=AGIEvalDataset_v2,
path='./data/AGIEval/data/v1/',
name=_name,
abbr='agieval-' + _name,
setting_name='zero-shot',
reader_cfg=agieval_reader_cfg,
infer_cfg=agieval_infer_cfg.copy(),
eval_cfg=agieval_eval_cfg.copy()))
for _item in agieval_datasets:
_name = _item['name']
_intro = {
'gaokao-chinese':
'以下是一道中国高考语文选择题,请选择正确的答案。',
'gaokao-english':
'以下是一道中国高考英语选择题,请选择正确的答案。',
'gaokao-geography':
'以下是一道中国高考地理选择题,请选择正确的答案。',
'gaokao-history':
'以下是一道中国高考历史选择题,请选择正确的答案。',
'gaokao-biology':
'以下是一道中国高考生物选择题,请选择正确的答案。',
'gaokao-chemistry':
'以下是一道中国高考化学选择题,请选择正确的答案。',
'gaokao-physics':
'以下是一道中国高考物理选择题,请选择正确的答案。',
'gaokao-mathqa':
'以下是一道中国高考数学选择题,请选择正确的答案。',
'logiqa-zh':
'以下是一道中国公务员考试题,请选择正确的答案。',
'lsat-ar':
'The following is a LSAT Analytical Reasoning question. Please select the correct answer.',
'lsat-lr':
'The following is a LSAT Logical Reasoning question. Please select the correct answer.',
'lsat-rc':
'The following is a LSAT Reading Comprehension question. Please select the correct answer.',
'logiqa-en':
'The following is a Logic Reasoning question. Please select the correct answer.',
'sat-math':
'The following is a SAT Math question. Please select the correct answer.',
'sat-en':
'The following is a SAT English question. Please select the correct answer.',
'sat-en-without-passage':
'The following is a SAT English question. Please select the correct answer.',
'aqua-rat':
'The following is a AQUA-RAT question. Please select the correct answer.',
'jec-qa-kd':
'以下是一道中国司法考试基础知识题,请选择正确的答案。',
'jec-qa-ca':
'以下是一道中国司法考试案例分析题,请选择正确的答案。',
'gaokao-mathcloze':
'以下是一道中国高考数学填空题,请填入正确的答案。',
'math':
'The following is a Math question. Please select the correct answer.',
}[_name]
_templates = _item['infer_cfg']['prompt_template']['template']
_templates['round'][0][
'prompt'] = _intro + '\n' + _templates['round'][0]['prompt']
del _item, _intro, _templates, _name, _hint, agieval_infer_cfg, agieval_eval_cfg
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer, GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import AGIEvalDataset_v2, AGIEvalEvaluator
agieval_single_choice_sets = [
'gaokao-chinese',
'gaokao-english',
'gaokao-geography',
'gaokao-history',
'gaokao-biology',
'gaokao-chemistry',
'gaokao-physics',
'gaokao-mathqa',
'logiqa-zh',
'lsat-ar',
'lsat-lr',
'lsat-rc',
'logiqa-en',
'sat-math',
'sat-en',
'sat-en-without-passage',
'aqua-rat',
]
agieval_multiple_choices_sets = [
'jec-qa-kd',
'jec-qa-ca',
]
agieval_cloze_sets = ['gaokao-mathcloze', 'math']
agieval_chinese_sets = [
'gaokao-chinese',
'gaokao-english',
'gaokao-geography',
'gaokao-history',
'gaokao-biology',
'gaokao-chemistry',
'gaokao-physics',
'gaokao-mathqa',
'logiqa-zh',
'gaokao-mathcloze',
]
agieval_english_sets = [
'lsat-ar',
'lsat-lr',
'lsat-rc',
'logiqa-en',
'sat-math',
'sat-en',
'sat-en-without-passage',
'aqua-rat',
'math',
]
agieval_gaokao_sets = [
'gaokao-chinese',
'gaokao-english',
'gaokao-geography',
'gaokao-history',
'gaokao-biology',
'gaokao-chemistry',
'gaokao-physics',
'gaokao-mathqa',
]
agieval_datasets = []
for _name in agieval_single_choice_sets:
if _name in ['lsat-ar', 'lsat-lr', 'lsat-rc', 'aqua-rat']:
_options = ['A', 'B', 'C', 'D', 'E']
else:
_options = ['A', 'B', 'C', 'D']
if _name in agieval_chinese_sets:
_hint = '答案是:'
else:
_hint = 'The answer is '
agieval_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
label: dict(round=[
dict(role='HUMAN', prompt='{question}\n{options}'),
dict(role='BOT', prompt=f'{_hint}{label}')
])
for label in _options
}),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer, labels=_options))
agieval_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
agieval_datasets.append(
dict(
type=AGIEvalDataset_v2,
path='./data/AGIEval/data/v1/',
name=_name,
abbr='agieval-' + _name,
setting_name='zero-shot',
reader_cfg=dict(
input_columns=['question', 'options'] + _options,
output_column='label'),
infer_cfg=agieval_infer_cfg.copy(),
eval_cfg=agieval_eval_cfg.copy()))
for _name in agieval_multiple_choices_sets:
if _name in agieval_chinese_sets:
_hint = '答案是: '
else:
_hint = 'The answer is '
agieval_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(round=[
dict(role='HUMAN', prompt=f'{{question}}\n{{options}}\n{_hint}')
])),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=1024))
agieval_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_postprocessor=dict(type='first-capital-multi'))
agieval_datasets.append(
dict(
type=AGIEvalDataset_v2,
path='./data/AGIEval/data/v1/',
name=_name,
abbr='agieval-' + _name,
setting_name='zero-shot',
reader_cfg=dict(
input_columns=['question', 'options'], output_column='label'),
infer_cfg=agieval_infer_cfg.copy(),
eval_cfg=agieval_eval_cfg.copy()))
for _name in agieval_cloze_sets:
if _name in agieval_chinese_sets:
_hint = '答案是:'
else:
_hint = 'The answer is '
agieval_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[dict(role='HUMAN', prompt=f'{{question}}{_hint}')])),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=1024))
agieval_eval_cfg = dict(evaluator=dict(type=AGIEvalEvaluator))
agieval_datasets.append(
dict(
type=AGIEvalDataset_v2,
path='./data/AGIEval/data/v1/',
name=_name,
abbr='agieval-' + _name,
setting_name='zero-shot',
reader_cfg=dict(
input_columns=['question', 'options'], output_column='label'),
infer_cfg=agieval_infer_cfg.copy(),
eval_cfg=agieval_eval_cfg.copy()))
for _item in agieval_datasets:
_name = _item['name']
_intro = {
'gaokao-chinese':
'以下是一道中国高考语文选择题,请选择正确的答案。',
'gaokao-english':
'以下是一道中国高考英语选择题,请选择正确的答案。',
'gaokao-geography':
'以下是一道中国高考地理选择题,请选择正确的答案。',
'gaokao-history':
'以下是一道中国高考历史选择题,请选择正确的答案。',
'gaokao-biology':
'以下是一道中国高考生物选择题,请选择正确的答案。',
'gaokao-chemistry':
'以下是一道中国高考化学选择题,请选择正确的答案。',
'gaokao-physics':
'以下是一道中国高考物理选择题,请选择正确的答案。',
'gaokao-mathqa':
'以下是一道中国高考数学选择题,请选择正确的答案。',
'logiqa-zh':
'以下是一道中国公务员考试题,请选择正确的答案。',
'lsat-ar':
'The following is a LSAT Analytical Reasoning question. Please select the correct answer.',
'lsat-lr':
'The following is a LSAT Logical Reasoning question. Please select the correct answer.',
'lsat-rc':
'The following is a LSAT Reading Comprehension question. Please select the correct answer.',
'logiqa-en':
'The following is a Logic Reasoning question. Please select the correct answer.',
'sat-math':
'The following is a SAT Math question. Please select the correct answer.',
'sat-en':
'The following is a SAT English question. Please select the correct answer.',
'sat-en-without-passage':
'The following is a SAT English question. Please select the correct answer.',
'aqua-rat':
'The following is a AQUA-RAT question. Please select the correct answer.',
'jec-qa-kd':
'以下是一道中国司法考试基础知识题,请选择正确的答案。',
'jec-qa-ca':
'以下是一道中国司法考试案例分析题,请选择正确的答案。',
'gaokao-mathcloze':
'以下是一道中国高考数学填空题,请填入正确的答案。',
'math':
'The following is a Math question. Please select the correct answer.',
}[_name]
_templates = _item['infer_cfg']['prompt_template']['template']
if _item['infer_cfg']['inferencer']['type'] == PPLInferencer:
for _label in _templates:
_templates[_label]['round'][0][
'prompt'] = _intro + '\n' + _templates[_label]['round'][0][
'prompt']
else:
_templates['round'][0][
'prompt'] = _intro + '\n' + _templates['round'][0]['prompt']
del _item, _intro, _templates, _label, _name, _options, _hint, agieval_infer_cfg, agieval_eval_cfg
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import HFDataset, HumanEvaluator
apps_reader_cfg = dict(
input_columns=['question'], output_column='problem_id', train_split='test')
# TODO: allow empty output-column
apps_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
begin=[
dict(
role='SYSTEM',
fallback_role='HUMAN',
prompt='Write a python program:'),
],
round=[
dict(role='HUMAN', prompt='{question}'),
])),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer))
apps_eval_cfg = dict(
evaluator=dict(type=HumanEvaluator),
pred_role='BOT',
k=[1, 10, 100], # the parameter only for humaneval
pred_postprocessor=dict(type='humaneval'),
)
apps_datasets = [
dict(
type=HFDataset,
path='codeparrot/apps',
reader_cfg=apps_reader_cfg,
infer_cfg=apps_infer_cfg,
eval_cfg=apps_eval_cfg)
]
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import HFDataset, HumanEvaluator
apps_reader_cfg = dict(
input_columns=['question'], output_column='problem_id', train_split='test')
# TODO: allow empty output-column
apps_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(round=[
dict(role='HUMAN', prompt='Write a python program:\n{question}'),
])),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer))
apps_eval_cfg = dict(
evaluator=dict(type=HumanEvaluator),
pred_role='BOT',
k=[1, 10, 100], # the parameter only for humaneval
pred_postprocessor=dict(type='humaneval'),
)
apps_datasets = [
dict(
type=HFDataset,
path='codeparrot/apps',
reader_cfg=apps_reader_cfg,
infer_cfg=apps_infer_cfg,
eval_cfg=apps_eval_cfg)
]
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import HFDataset, HumanEvaluator
apps_reader_cfg = dict(
input_columns=['question'], output_column='problem_id', train_split='test')
# TODO: allow empty output-column
apps_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template='{question}'),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512))
apps_eval_cfg = dict(
evaluator=dict(type=HumanEvaluator),
k=[1, 10, 100],
pred_postprocessor=dict(type='humaneval'),
)
apps_datasets = [
dict(
type=HFDataset,
path='codeparrot/apps',
reader_cfg=apps_reader_cfg,
infer_cfg=apps_infer_cfg,
eval_cfg=apps_eval_cfg)
]
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import BBHDataset, BBHEvaluator
bbh_reader_cfg = dict(input_columns=["input"], output_column="target")
_path_prefix = "./data/BBH"
bbh_multiple_choice_sets = [
'temporal_sequences',
'disambiguation_qa',
'date_understanding',
'tracking_shuffled_objects_three_objects',
'penguins_in_a_table',
'geometric_shapes',
'snarks',
'ruin_names',
'tracking_shuffled_objects_seven_objects',
'tracking_shuffled_objects_five_objects',
'logical_deduction_three_objects',
'hyperbaton',
'logical_deduction_five_objects',
'logical_deduction_seven_objects',
'movie_recommendation',
'salient_translation_error_detection',
'reasoning_about_colored_objects',
]
bbh_free_form_sets = [
'multistep_arithmetic_two',
'navigate',
'dyck_languages',
'word_sorting',
'sports_understanding',
'boolean_expressions',
'object_counting',
'formal_fallacies',
'causal_judgement',
'web_of_lies',
]
bbh_datasets = []
for _name in bbh_multiple_choice_sets:
_hint = open(f"{_path_prefix}/lib_prompt/{_name}.txt", 'r').read()
bbh_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(round=[
dict(
role="HUMAN",
prompt=
f"Follow the given examples and answer the question.\n{_hint}\n\nQ: {{input}}\nA: Let's think step by step."
)
])),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512))
bbh_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_role="BOT",
pred_postprocessor=dict(type='bbh-mcq'),
dataset_postprocessor=dict(type='bbh-mcq'))
bbh_datasets.append(
dict(
type=BBHDataset,
path=f"{_path_prefix}/data",
name=_name,
abbr='bbh-' + _name,
reader_cfg=bbh_reader_cfg,
infer_cfg=bbh_infer_cfg.copy(),
eval_cfg=bbh_eval_cfg.copy()))
for _name in bbh_free_form_sets:
_hint = open(f"{_path_prefix}/lib_prompt/{_name}.txt", 'r').read()
bbh_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(round=[
dict(
role="HUMAN",
prompt=
f"Follow the given examples and answer the question.\n{_hint}\n\nQ: {{input}}\nA: Let's think step by step."
)
])),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512))
bbh_eval_cfg = dict(evaluator=dict(type=BBHEvaluator), pred_role="BOT")
bbh_datasets.append(
dict(
type=BBHDataset,
path=f"{_path_prefix}/data",
name=_name,
abbr='bbh-' + _name,
reader_cfg=bbh_reader_cfg,
infer_cfg=bbh_infer_cfg.copy(),
eval_cfg=bbh_eval_cfg.copy()))
del _name, _hint, _path_prefix
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import FixKRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import CEvalDataset
ceval_subject_mapping = {
"computer_network":
["Computer Network", "\u8ba1\u7b97\u673a\u7f51\u7edc", "STEM"],
"operating_system":
["Operating System", "\u64cd\u4f5c\u7cfb\u7edf", "STEM"],
"computer_architecture":
["Computer Architecture", "\u8ba1\u7b97\u673a\u7ec4\u6210", "STEM"],
"college_programming":
["College Programming", "\u5927\u5b66\u7f16\u7a0b", "STEM"],
"college_physics": ["College Physics", "\u5927\u5b66\u7269\u7406", "STEM"],
"college_chemistry":
["College Chemistry", "\u5927\u5b66\u5316\u5b66", "STEM"],
"advanced_mathematics":
["Advanced Mathematics", "\u9ad8\u7b49\u6570\u5b66", "STEM"],
"probability_and_statistics":
["Probability and Statistics", "\u6982\u7387\u7edf\u8ba1", "STEM"],
"discrete_mathematics":
["Discrete Mathematics", "\u79bb\u6563\u6570\u5b66", "STEM"],
"electrical_engineer": [
"Electrical Engineer", "\u6ce8\u518c\u7535\u6c14\u5de5\u7a0b\u5e08",
"STEM"
],
"metrology_engineer":
["Metrology Engineer", "\u6ce8\u518c\u8ba1\u91cf\u5e08", "STEM"],
"high_school_mathematics":
["High School Mathematics", "\u9ad8\u4e2d\u6570\u5b66", "STEM"],
"high_school_physics":
["High School Physics", "\u9ad8\u4e2d\u7269\u7406", "STEM"],
"high_school_chemistry":
["High School Chemistry", "\u9ad8\u4e2d\u5316\u5b66", "STEM"],
"high_school_biology": [
"High School Biology", "\u9ad8\u4e2d\u751f\u7269", "STEM"
],
"middle_school_mathematics": [
"Middle School Mathematics", "\u521d\u4e2d\u6570\u5b66", "STEM"
],
"middle_school_biology": [
"Middle School Biology", "\u521d\u4e2d\u751f\u7269", "STEM"
],
"middle_school_physics": [
"Middle School Physics", "\u521d\u4e2d\u7269\u7406", "STEM"
],
"middle_school_chemistry": [
"Middle School Chemistry", "\u521d\u4e2d\u5316\u5b66", "STEM"
],
"veterinary_medicine": [
"Veterinary Medicine", "\u517d\u533b\u5b66", "STEM"
],
"college_economics": [
"College Economics", "\u5927\u5b66\u7ecf\u6d4e\u5b66", "Social Science"
],
"business_administration": [
"Business Administration", "\u5de5\u5546\u7ba1\u7406", "Social Science"
],
"marxism": [
"Marxism", "\u9a6c\u514b\u601d\u4e3b\u4e49\u57fa\u672c\u539f\u7406",
"Social Science"
],
"mao_zedong_thought": [
"Mao Zedong Thought",
"\u6bdb\u6cfd\u4e1c\u601d\u60f3\u548c\u4e2d\u56fd\u7279\u8272\u793e\u4f1a\u4e3b\u4e49\u7406\u8bba\u4f53\u7cfb\u6982\u8bba",
"Social Science"
],
"education_science": [
"Education Science", "\u6559\u80b2\u5b66", "Social Science"
],
"teacher_qualification": [
"Teacher Qualification", "\u6559\u5e08\u8d44\u683c", "Social Science"
],
"high_school_politics": [
"High School Politics", "\u9ad8\u4e2d\u653f\u6cbb", "Social Science"
],
"high_school_geography": [
"High School Geography", "\u9ad8\u4e2d\u5730\u7406", "Social Science"
],
"middle_school_politics": [
"Middle School Politics", "\u521d\u4e2d\u653f\u6cbb", "Social Science"
],
"middle_school_geography": [
"Middle School Geography", "\u521d\u4e2d\u5730\u7406", "Social Science"
],
"modern_chinese_history":
["Modern Chinese History", "\u8fd1\u4ee3\u53f2\u7eb2\u8981", "Humanities"],
"ideological_and_moral_cultivation": [
"Ideological and Moral Cultivation",
"\u601d\u60f3\u9053\u5fb7\u4fee\u517b\u4e0e\u6cd5\u5f8b\u57fa\u7840",
"Humanities"
],
"logic": ["Logic", "\u903b\u8f91\u5b66", "Humanities"],
"law": ["Law", "\u6cd5\u5b66", "Humanities"],
"chinese_language_and_literature": [
"Chinese Language and Literature",
"\u4e2d\u56fd\u8bed\u8a00\u6587\u5b66", "Humanities"
],
"art_studies": ["Art Studies", "\u827a\u672f\u5b66", "Humanities"],
"professional_tour_guide": [
"Professional Tour Guide", "\u5bfc\u6e38\u8d44\u683c", "Humanities"
],
"legal_professional": [
"Legal Professional", "\u6cd5\u5f8b\u804c\u4e1a\u8d44\u683c",
"Humanities"
],
"high_school_chinese": [
"High School Chinese", "\u9ad8\u4e2d\u8bed\u6587", "Humanities"
],
"high_school_history": [
"High School History", "\u9ad8\u4e2d\u5386\u53f2", "Humanities"
],
"middle_school_history": [
"Middle School History", "\u521d\u4e2d\u5386\u53f2", "Humanities"
],
"civil_servant": ["Civil Servant", "\u516c\u52a1\u5458", "Other"],
"sports_science": ["Sports Science", "\u4f53\u80b2\u5b66", "Other"],
"plant_protection": [
"Plant Protection", "\u690d\u7269\u4fdd\u62a4", "Other"
],
"basic_medicine": ["Basic Medicine", "\u57fa\u7840\u533b\u5b66", "Other"],
"clinical_medicine": [
"Clinical Medicine", "\u4e34\u5e8a\u533b\u5b66", "Other"
],
"urban_and_rural_planner": [
"Urban and Rural Planner",
"\u6ce8\u518c\u57ce\u4e61\u89c4\u5212\u5e08", "Other"
],
"accountant": ["Accountant", "\u6ce8\u518c\u4f1a\u8ba1\u5e08", "Other"],
"fire_engineer": [
"Fire Engineer", "\u6ce8\u518c\u6d88\u9632\u5de5\u7a0b\u5e08", "Other"
],
"environmental_impact_assessment_engineer": [
"Environmental Impact Assessment Engineer",
"\u73af\u5883\u5f71\u54cd\u8bc4\u4ef7\u5de5\u7a0b\u5e08", "Other"
],
"tax_accountant": ["Tax Accountant", "\u7a0e\u52a1\u5e08", "Other"],
"physician": ["Physician", "\u533b\u5e08\u8d44\u683c", "Other"]
}
ceval_all_sets = list(ceval_subject_mapping.keys())
ceval_datasets = []
for _split in ["val", "test"]:
for _name in ceval_all_sets:
_ch_name = ceval_subject_mapping[_name][1]
ceval_infer_cfg = dict(
ice_template=dict(
type=PromptTemplate,
template=dict(
begin="</E>",
round=[
dict(
role="HUMAN",
prompt=
f"以下是中国关于{_ch_name}考试的单项选择题,请选出其中的正确答案。\n{{question}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n答案: "
),
dict(role="BOT", prompt="{answer}"),
]),
ice_token="</E>",
),
retriever=dict(type=FixKRetriever),
inferencer=dict(type=GenInferencer, fix_id_list=[0, 1, 2, 3, 4]),
)
ceval_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_postprocessor=dict(type='first-capital'))
ceval_datasets.append(
dict(
type=CEvalDataset,
path="./data/ceval/formal_ceval",
name=_name,
abbr="ceval-" + _name if _split == "val" else "ceval-test-" +
_name,
reader_cfg=dict(
input_columns=["question", "A", "B", "C", "D"],
output_column="answer",
train_split="dev",
test_split=_split),
infer_cfg=ceval_infer_cfg,
eval_cfg=ceval_eval_cfg,
))
del _split, _name, _ch_name
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import FixKRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import CEvalDataset
ceval_subject_mapping = {
"computer_network":
["Computer Network", "\u8ba1\u7b97\u673a\u7f51\u7edc", "STEM"],
"operating_system":
["Operating System", "\u64cd\u4f5c\u7cfb\u7edf", "STEM"],
"computer_architecture":
["Computer Architecture", "\u8ba1\u7b97\u673a\u7ec4\u6210", "STEM"],
"college_programming":
["College Programming", "\u5927\u5b66\u7f16\u7a0b", "STEM"],
"college_physics": ["College Physics", "\u5927\u5b66\u7269\u7406", "STEM"],
"college_chemistry":
["College Chemistry", "\u5927\u5b66\u5316\u5b66", "STEM"],
"advanced_mathematics":
["Advanced Mathematics", "\u9ad8\u7b49\u6570\u5b66", "STEM"],
"probability_and_statistics":
["Probability and Statistics", "\u6982\u7387\u7edf\u8ba1", "STEM"],
"discrete_mathematics":
["Discrete Mathematics", "\u79bb\u6563\u6570\u5b66", "STEM"],
"electrical_engineer": [
"Electrical Engineer", "\u6ce8\u518c\u7535\u6c14\u5de5\u7a0b\u5e08",
"STEM"
],
"metrology_engineer":
["Metrology Engineer", "\u6ce8\u518c\u8ba1\u91cf\u5e08", "STEM"],
"high_school_mathematics":
["High School Mathematics", "\u9ad8\u4e2d\u6570\u5b66", "STEM"],
"high_school_physics":
["High School Physics", "\u9ad8\u4e2d\u7269\u7406", "STEM"],
"high_school_chemistry":
["High School Chemistry", "\u9ad8\u4e2d\u5316\u5b66", "STEM"],
"high_school_biology": [
"High School Biology", "\u9ad8\u4e2d\u751f\u7269", "STEM"
],
"middle_school_mathematics": [
"Middle School Mathematics", "\u521d\u4e2d\u6570\u5b66", "STEM"
],
"middle_school_biology": [
"Middle School Biology", "\u521d\u4e2d\u751f\u7269", "STEM"
],
"middle_school_physics": [
"Middle School Physics", "\u521d\u4e2d\u7269\u7406", "STEM"
],
"middle_school_chemistry": [
"Middle School Chemistry", "\u521d\u4e2d\u5316\u5b66", "STEM"
],
"veterinary_medicine": [
"Veterinary Medicine", "\u517d\u533b\u5b66", "STEM"
],
"college_economics": [
"College Economics", "\u5927\u5b66\u7ecf\u6d4e\u5b66", "Social Science"
],
"business_administration": [
"Business Administration", "\u5de5\u5546\u7ba1\u7406", "Social Science"
],
"marxism": [
"Marxism", "\u9a6c\u514b\u601d\u4e3b\u4e49\u57fa\u672c\u539f\u7406",
"Social Science"
],
"mao_zedong_thought": [
"Mao Zedong Thought",
"\u6bdb\u6cfd\u4e1c\u601d\u60f3\u548c\u4e2d\u56fd\u7279\u8272\u793e\u4f1a\u4e3b\u4e49\u7406\u8bba\u4f53\u7cfb\u6982\u8bba",
"Social Science"
],
"education_science": [
"Education Science", "\u6559\u80b2\u5b66", "Social Science"
],
"teacher_qualification": [
"Teacher Qualification", "\u6559\u5e08\u8d44\u683c", "Social Science"
],
"high_school_politics": [
"High School Politics", "\u9ad8\u4e2d\u653f\u6cbb", "Social Science"
],
"high_school_geography": [
"High School Geography", "\u9ad8\u4e2d\u5730\u7406", "Social Science"
],
"middle_school_politics": [
"Middle School Politics", "\u521d\u4e2d\u653f\u6cbb", "Social Science"
],
"middle_school_geography": [
"Middle School Geography", "\u521d\u4e2d\u5730\u7406", "Social Science"
],
"modern_chinese_history":
["Modern Chinese History", "\u8fd1\u4ee3\u53f2\u7eb2\u8981", "Humanities"],
"ideological_and_moral_cultivation": [
"Ideological and Moral Cultivation",
"\u601d\u60f3\u9053\u5fb7\u4fee\u517b\u4e0e\u6cd5\u5f8b\u57fa\u7840",
"Humanities"
],
"logic": ["Logic", "\u903b\u8f91\u5b66", "Humanities"],
"law": ["Law", "\u6cd5\u5b66", "Humanities"],
"chinese_language_and_literature": [
"Chinese Language and Literature",
"\u4e2d\u56fd\u8bed\u8a00\u6587\u5b66", "Humanities"
],
"art_studies": ["Art Studies", "\u827a\u672f\u5b66", "Humanities"],
"professional_tour_guide": [
"Professional Tour Guide", "\u5bfc\u6e38\u8d44\u683c", "Humanities"
],
"legal_professional": [
"Legal Professional", "\u6cd5\u5f8b\u804c\u4e1a\u8d44\u683c",
"Humanities"
],
"high_school_chinese": [
"High School Chinese", "\u9ad8\u4e2d\u8bed\u6587", "Humanities"
],
"high_school_history": [
"High School History", "\u9ad8\u4e2d\u5386\u53f2", "Humanities"
],
"middle_school_history": [
"Middle School History", "\u521d\u4e2d\u5386\u53f2", "Humanities"
],
"civil_servant": ["Civil Servant", "\u516c\u52a1\u5458", "Other"],
"sports_science": ["Sports Science", "\u4f53\u80b2\u5b66", "Other"],
"plant_protection": [
"Plant Protection", "\u690d\u7269\u4fdd\u62a4", "Other"
],
"basic_medicine": ["Basic Medicine", "\u57fa\u7840\u533b\u5b66", "Other"],
"clinical_medicine": [
"Clinical Medicine", "\u4e34\u5e8a\u533b\u5b66", "Other"
],
"urban_and_rural_planner": [
"Urban and Rural Planner",
"\u6ce8\u518c\u57ce\u4e61\u89c4\u5212\u5e08", "Other"
],
"accountant": ["Accountant", "\u6ce8\u518c\u4f1a\u8ba1\u5e08", "Other"],
"fire_engineer": [
"Fire Engineer", "\u6ce8\u518c\u6d88\u9632\u5de5\u7a0b\u5e08", "Other"
],
"environmental_impact_assessment_engineer": [
"Environmental Impact Assessment Engineer",
"\u73af\u5883\u5f71\u54cd\u8bc4\u4ef7\u5de5\u7a0b\u5e08", "Other"
],
"tax_accountant": ["Tax Accountant", "\u7a0e\u52a1\u5e08", "Other"],
"physician": ["Physician", "\u533b\u5e08\u8d44\u683c", "Other"]
}
ceval_all_sets = list(ceval_subject_mapping.keys())
ceval_datasets = []
for _split in ["val"]:
for _name in ceval_all_sets:
_ch_name = ceval_subject_mapping[_name][1]
ceval_infer_cfg = dict(
ice_template=dict(
type=PromptTemplate,
template=dict(
begin="</E>",
round=[
dict(
role="HUMAN",
prompt=
f"以下是中国关于{_ch_name}考试的单项选择题,请选出其中的正确答案。\n{{question}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n答案: "
),
dict(role="BOT", prompt="{answer}"),
]),
ice_token="</E>",
),
retriever=dict(type=FixKRetriever),
inferencer=dict(type=GenInferencer, fix_id_list=[0, 1, 2, 3, 4]),
)
ceval_eval_cfg = dict(evaluator=dict(type=AccEvaluator), )
ceval_datasets.append(
dict(
type=CEvalDataset,
path="./data/ceval/formal_ceval",
name=_name,
abbr="ceval-" + _name if _split == "val" else "ceval-test-" +
_name,
reader_cfg=dict(
input_columns=["question", "A", "B", "C", "D"],
output_column="answer",
train_split="dev",
test_split=_split),
infer_cfg=ceval_infer_cfg,
eval_cfg=ceval_eval_cfg,
))
del _split, _name, _ch_name
from mmengine.config import read_base
with read_base():
from .ceval_ppl_275812 import ceval_datasets # noqa: F401, F403
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import FixKRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import CEvalDataset
ceval_subject_mapping = {
"computer_network":
["Computer Network", "\u8ba1\u7b97\u673a\u7f51\u7edc", "STEM"],
"operating_system":
["Operating System", "\u64cd\u4f5c\u7cfb\u7edf", "STEM"],
"computer_architecture":
["Computer Architecture", "\u8ba1\u7b97\u673a\u7ec4\u6210", "STEM"],
"college_programming":
["College Programming", "\u5927\u5b66\u7f16\u7a0b", "STEM"],
"college_physics": ["College Physics", "\u5927\u5b66\u7269\u7406", "STEM"],
"college_chemistry":
["College Chemistry", "\u5927\u5b66\u5316\u5b66", "STEM"],
"advanced_mathematics":
["Advanced Mathematics", "\u9ad8\u7b49\u6570\u5b66", "STEM"],
"probability_and_statistics":
["Probability and Statistics", "\u6982\u7387\u7edf\u8ba1", "STEM"],
"discrete_mathematics":
["Discrete Mathematics", "\u79bb\u6563\u6570\u5b66", "STEM"],
"electrical_engineer": [
"Electrical Engineer", "\u6ce8\u518c\u7535\u6c14\u5de5\u7a0b\u5e08",
"STEM"
],
"metrology_engineer":
["Metrology Engineer", "\u6ce8\u518c\u8ba1\u91cf\u5e08", "STEM"],
"high_school_mathematics":
["High School Mathematics", "\u9ad8\u4e2d\u6570\u5b66", "STEM"],
"high_school_physics":
["High School Physics", "\u9ad8\u4e2d\u7269\u7406", "STEM"],
"high_school_chemistry":
["High School Chemistry", "\u9ad8\u4e2d\u5316\u5b66", "STEM"],
"high_school_biology": [
"High School Biology", "\u9ad8\u4e2d\u751f\u7269", "STEM"
],
"middle_school_mathematics": [
"Middle School Mathematics", "\u521d\u4e2d\u6570\u5b66", "STEM"
],
"middle_school_biology": [
"Middle School Biology", "\u521d\u4e2d\u751f\u7269", "STEM"
],
"middle_school_physics": [
"Middle School Physics", "\u521d\u4e2d\u7269\u7406", "STEM"
],
"middle_school_chemistry": [
"Middle School Chemistry", "\u521d\u4e2d\u5316\u5b66", "STEM"
],
"veterinary_medicine": [
"Veterinary Medicine", "\u517d\u533b\u5b66", "STEM"
],
"college_economics": [
"College Economics", "\u5927\u5b66\u7ecf\u6d4e\u5b66", "Social Science"
],
"business_administration": [
"Business Administration", "\u5de5\u5546\u7ba1\u7406", "Social Science"
],
"marxism": [
"Marxism", "\u9a6c\u514b\u601d\u4e3b\u4e49\u57fa\u672c\u539f\u7406",
"Social Science"
],
"mao_zedong_thought": [
"Mao Zedong Thought",
"\u6bdb\u6cfd\u4e1c\u601d\u60f3\u548c\u4e2d\u56fd\u7279\u8272\u793e\u4f1a\u4e3b\u4e49\u7406\u8bba\u4f53\u7cfb\u6982\u8bba",
"Social Science"
],
"education_science": [
"Education Science", "\u6559\u80b2\u5b66", "Social Science"
],
"teacher_qualification": [
"Teacher Qualification", "\u6559\u5e08\u8d44\u683c", "Social Science"
],
"high_school_politics": [
"High School Politics", "\u9ad8\u4e2d\u653f\u6cbb", "Social Science"
],
"high_school_geography": [
"High School Geography", "\u9ad8\u4e2d\u5730\u7406", "Social Science"
],
"middle_school_politics": [
"Middle School Politics", "\u521d\u4e2d\u653f\u6cbb", "Social Science"
],
"middle_school_geography": [
"Middle School Geography", "\u521d\u4e2d\u5730\u7406", "Social Science"
],
"modern_chinese_history":
["Modern Chinese History", "\u8fd1\u4ee3\u53f2\u7eb2\u8981", "Humanities"],
"ideological_and_moral_cultivation": [
"Ideological and Moral Cultivation",
"\u601d\u60f3\u9053\u5fb7\u4fee\u517b\u4e0e\u6cd5\u5f8b\u57fa\u7840",
"Humanities"
],
"logic": ["Logic", "\u903b\u8f91\u5b66", "Humanities"],
"law": ["Law", "\u6cd5\u5b66", "Humanities"],
"chinese_language_and_literature": [
"Chinese Language and Literature",
"\u4e2d\u56fd\u8bed\u8a00\u6587\u5b66", "Humanities"
],
"art_studies": ["Art Studies", "\u827a\u672f\u5b66", "Humanities"],
"professional_tour_guide": [
"Professional Tour Guide", "\u5bfc\u6e38\u8d44\u683c", "Humanities"
],
"legal_professional": [
"Legal Professional", "\u6cd5\u5f8b\u804c\u4e1a\u8d44\u683c",
"Humanities"
],
"high_school_chinese": [
"High School Chinese", "\u9ad8\u4e2d\u8bed\u6587", "Humanities"
],
"high_school_history": [
"High School History", "\u9ad8\u4e2d\u5386\u53f2", "Humanities"
],
"middle_school_history": [
"Middle School History", "\u521d\u4e2d\u5386\u53f2", "Humanities"
],
"civil_servant": ["Civil Servant", "\u516c\u52a1\u5458", "Other"],
"sports_science": ["Sports Science", "\u4f53\u80b2\u5b66", "Other"],
"plant_protection": [
"Plant Protection", "\u690d\u7269\u4fdd\u62a4", "Other"
],
"basic_medicine": ["Basic Medicine", "\u57fa\u7840\u533b\u5b66", "Other"],
"clinical_medicine": [
"Clinical Medicine", "\u4e34\u5e8a\u533b\u5b66", "Other"
],
"urban_and_rural_planner": [
"Urban and Rural Planner",
"\u6ce8\u518c\u57ce\u4e61\u89c4\u5212\u5e08", "Other"
],
"accountant": ["Accountant", "\u6ce8\u518c\u4f1a\u8ba1\u5e08", "Other"],
"fire_engineer": [
"Fire Engineer", "\u6ce8\u518c\u6d88\u9632\u5de5\u7a0b\u5e08", "Other"
],
"environmental_impact_assessment_engineer": [
"Environmental Impact Assessment Engineer",
"\u73af\u5883\u5f71\u54cd\u8bc4\u4ef7\u5de5\u7a0b\u5e08", "Other"
],
"tax_accountant": ["Tax Accountant", "\u7a0e\u52a1\u5e08", "Other"],
"physician": ["Physician", "\u533b\u5e08\u8d44\u683c", "Other"]
}
ceval_all_sets = list(ceval_subject_mapping.keys())
ceval_datasets = []
for _split in ["val", "test"]:
for _name in ceval_all_sets:
_ch_name = ceval_subject_mapping[_name][1]
ceval_infer_cfg = dict(
ice_template=dict(
type=PromptTemplate,
template={
answer: dict(
begin="</E>",
round=[
dict(
role="HUMAN",
prompt=
f"以下是中国关于{_ch_name}考试的单项选择题,请选出其中的正确答案。\n{{question}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n答案: "
),
dict(role="BOT", prompt=answer),
])
for answer in ["A", "B", "C", "D"]
},
ice_token="</E>",
),
retriever=dict(type=FixKRetriever),
inferencer=dict(type=PPLInferencer, fix_id_list=[0, 1, 2, 3, 4]),
)
ceval_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
ceval_datasets.append(
dict(
type=CEvalDataset,
path="./data/ceval/formal_ceval",
name=_name,
abbr="ceval-" + _name if _split == "val" else "ceval-test-" +
_name,
reader_cfg=dict(
input_columns=["question", "A", "B", "C", "D"],
output_column="answer",
train_split="dev",
test_split=_split),
infer_cfg=ceval_infer_cfg,
eval_cfg=ceval_eval_cfg,
))
del _split, _name, _ch_name
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment