initial commit

7d346000 · gaotongxiao · 7d346000 · 7d346000 · 7d346000 · 7d346000
Commit 7d346000 authored Jul 04, 2023 by gaotongxiao
20 changed files
--- a/configs/datasets/SuperGLUE_WSC/SuperGLUE_WSC_ppl.py
+++ b/configs/datasets/SuperGLUE_WSC/SuperGLUE_WSC_ppl.py
+from mmengine.config import read_base
+with read_base():
+    from .SuperGLUE_WSC_ppl_85f45f import WSC_datasets  # noqa: F401, F403
--- a/configs/datasets/SuperGLUE_WSC/SuperGLUE_WSC_ppl_89e489.py
+++ b/configs/datasets/SuperGLUE_WSC/SuperGLUE_WSC_ppl_89e489.py
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import PPLInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import WSCDataset
+WSC_reader_cfg = dict(
+    input_columns=['span1', 'span2', 'text', 'new_text'],
+    output_column='answer')
+WSC_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template={
+            0: "{text}",
+            1: "{new_text}"
+        }),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=PPLInferencer))
+WSC_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
+WSC_datasets = [
+    dict(
+        type=WSCDataset,
+        path='json',
+        abbr='WSC',
+        data_files='./data/SuperGLUE/WSC/val.jsonl',
+        split='train',
+        reader_cfg=WSC_reader_cfg,
+        infer_cfg=WSC_infer_cfg,
+        eval_cfg=WSC_eval_cfg,
+    )
+]
--- a/configs/datasets/SuperGLUE_WSC/SuperGLUE_WSC_ppl_9ed37a.py
+++ b/configs/datasets/SuperGLUE_WSC/SuperGLUE_WSC_ppl_9ed37a.py
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import PPLInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import WSCDataset
+WSC_reader_cfg = dict(
+    input_columns=["span1", "span2", "text", "new_text"],
+    output_column="answer",
+)
+WSC_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template={
+            0: dict(round=[
+                dict(role="HUMAN", prompt="{text}"),
+            ]),
+            1: dict(round=[
+                dict(role="HUMAN", prompt="{new_text}"),
+            ]),
+        },
+    ),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=PPLInferencer),
+)
+WSC_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
+WSC_datasets = [
+    dict(
+        type=WSCDataset,
+        path="json",
+        abbr="WSC",
+        data_files="./data/SuperGLUE/WSC/val.jsonl",
+        split="train",
+        reader_cfg=WSC_reader_cfg,
+        infer_cfg=WSC_infer_cfg,
+        eval_cfg=WSC_eval_cfg,
+    )
+]
--- a/configs/datasets/TheoremQA/TheoremQA_gen.py
+++ b/configs/datasets/TheoremQA/TheoremQA_gen.py
+from mmengine.config import read_base
+with read_base():
+    from .TheoremQA_gen_891fcf import TheoremQA_datasets  # noqa: F401, F403
--- a/configs/datasets/TheoremQA/TheoremQA_gen_24bc13.py
+++ b/configs/datasets/TheoremQA/TheoremQA_gen_24bc13.py
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import TheoremQADataset
+TheoremQA_reader_cfg = dict(
+    input_columns=['Question', 'Answer_type'],
+    output_column='Answer',
+    train_split='test')
+TheoremQA_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template=dict(round=[
+            dict(
+                role='HUMAN',
+                prompt=
+                """You are a mathematician, you are supposed to answer the given question. You need to output the answer in your final sentence like "Therefore, the answer is ...". The answer can only be one of the following forms:\n1. a numerical value like 0.1, no symbol and no unit at all.\n2. a list of number like [2, 3, 4].\n3. True/False.\n4. an option like (a), (b), (c), (d)\nQuestion: {Question}\nLet\'s think step by step."""
+            ),
+        ])),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer))
+TheoremQA_eval_cfg = dict(
+    evaluator=dict(type=AccEvaluator),
+    pred_postprocessor=dict(type='TheoremQA'))
+TheoremQA_datasets = [
+    dict(
+        abbr='TheoremQA',
+        type=TheoremQADataset,
+        path="./data/TheoremQA/test.csv",
+        reader_cfg=TheoremQA_reader_cfg,
+        infer_cfg=TheoremQA_infer_cfg,
+        eval_cfg=TheoremQA_eval_cfg)
+]
--- a/configs/datasets/TheoremQA/TheoremQA_gen_891fcf.py
+++ b/configs/datasets/TheoremQA/TheoremQA_gen_891fcf.py
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import TheoremQADataset
+TheoremQA_reader_cfg = dict(
+    input_columns=['Question', 'Answer_type'],
+    output_column='Answer',
+    train_split='test')
+TheoremQA_prompt1 = """You are a mathematician, you are supposed to answer the given question. You need to output the answer in your final sentence like "Therefore, the answer is ...". The answer can only be one of the following forms:
+1. a numerical value like 0.1, no symbol and no unit at all.
+2. a list of number like [2, 3, 4].
+3. True/False.
+4. an option like (a), (b), (c), (d)
+"""
+TheoremQA_prompt2 = 'Question: {Question}\nLet\'s think step by step.'
+TheoremQA_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template=dict(
+            begin=[
+                dict(
+                    role='SYSTEM',
+                    fallback_role='HUMAN',
+                    prompt=TheoremQA_prompt1),
+            ],
+            round=[
+                dict(role='HUMAN', prompt=TheoremQA_prompt2),
+            ])),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer))
+TheoremQA_eval_cfg = dict(
+    evaluator=dict(type=AccEvaluator),
+    pred_postprocessor=dict(type='TheoremQA'))
+TheoremQA_datasets = [
+    dict(
+        abbr='TheoremQA',
+        type=TheoremQADataset,
+        path="./data/TheoremQA/test.csv",
+        reader_cfg=TheoremQA_reader_cfg,
+        infer_cfg=TheoremQA_infer_cfg,
+        eval_cfg=TheoremQA_eval_cfg)
+]
--- a/configs/datasets/Xsum/Xsum_gen.py
+++ b/configs/datasets/Xsum/Xsum_gen.py
+from mmengine.config import read_base
+with read_base():
+    from .Xsum_gen_d2126e import Xsum_datasets  # noqa: F401, F403
--- a/configs/datasets/Xsum/Xsum_gen_bb6e7b.py
+++ b/configs/datasets/Xsum/Xsum_gen_bb6e7b.py
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import RougeEvaluator
+from opencompass.datasets import XsumDataset
+Xsum_reader_cfg = dict(input_columns=['dialogue'], output_column='summary')
+Xsum_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template='Document：{dialogue}\n'
+        'Based on the previous text, provide a brief single summary:'),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer))
+Xsum_eval_cfg = dict(
+    evaluator=dict(type=RougeEvaluator),
+    pred_postprocessor=dict(type='Xsum'),
+)
+Xsum_datasets = [
+    dict(
+        type=XsumDataset,
+        abbr='Xsum',
+        path='./data/Xsum/dev.jsonl',
+        reader_cfg=Xsum_reader_cfg,
+        infer_cfg=Xsum_infer_cfg,
+        eval_cfg=Xsum_eval_cfg)
+]
--- a/configs/datasets/Xsum/Xsum_gen_d2126e.py
+++ b/configs/datasets/Xsum/Xsum_gen_d2126e.py
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import RougeEvaluator
+from opencompass.datasets import XsumDataset
+Xsum_reader_cfg = dict(input_columns=["dialogue"], output_column="summary")
+Xsum_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template=dict(round=[
+            dict(
+                role="HUMAN",
+                prompt=
+                "Document：{dialogue}\nBased on the previous text, provide a brief single summary:"
+            ),
+        ]),
+    ),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer),
+)
+Xsum_eval_cfg = dict(
+    evaluator=dict(type=RougeEvaluator),
+    pred_role='BOT',
+    pred_postprocessor=dict(type="Xsum"),
+)
+Xsum_datasets = [
+    dict(
+        type=XsumDataset,
+        abbr="Xsum",
+        path="./data/Xsum/dev.jsonl",
+        reader_cfg=Xsum_reader_cfg,
+        infer_cfg=Xsum_infer_cfg,
+        eval_cfg=Xsum_eval_cfg,
+    )
+]
--- a/configs/datasets/agieval/agieval_gen_508849.py
+++ b/configs/datasets/agieval/agieval_gen_508849.py
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import AGIEvalDataset, AGIEvalEvaluator
+agieval_reader_cfg = dict(
+    input_columns=['problem_input'], output_column='label')
+agieval_single_choice_sets = [
+    'gaokao-chinese',
+    'gaokao-english',
+    'gaokao-geography',
+    'gaokao-history',
+    'gaokao-biology',
+    'gaokao-chemistry',
+    'gaokao-physics',
+    'gaokao-mathqa',
+    'logiqa-zh',
+    'lsat-ar',
+    'lsat-lr',
+    'lsat-rc',
+    'logiqa-en',
+    'sat-math',
+    'sat-en',
+    'sat-en-without-passage',
+    'aqua-rat',
+]
+agieval_multiple_choices_sets = [
+    # 'jec-qa-kd',  # 数据需要额外处理
+    # 'jec-qa-ca',  # 数据需要额外处理
+]
+agieval_cloze_sets = ['gaokao-mathcloze', 'math']
+agieval_datasets = []
+for name in agieval_single_choice_sets:
+    agieval_infer_cfg = dict(
+        prompt_template=dict(
+            type=PromptTemplate,
+            template=dict(
+                round=[dict(role='HUMAN', prompt='{problem_input}')])),
+        retriever=dict(type=ZeroRetriever),
+        inferencer=dict(type=GenInferencer, labels=['A', 'B', 'C', 'D']))
+    agieval_eval_cfg = dict(
+        evaluator=dict(type=AccEvaluator),
+        pred_postprocessor=dict(type='first-capital'))
+    agieval_datasets.append(
+        dict(
+            type=AGIEvalDataset,
+            path='./data/AGIEval/data/v1/',
+            name=name,
+            abbr='agieval-' + name,
+            setting_name='zero-shot',
+            reader_cfg=agieval_reader_cfg,
+            infer_cfg=agieval_infer_cfg.copy(),
+            eval_cfg=agieval_eval_cfg.copy()))
+for name in agieval_cloze_sets:
+    agieval_infer_cfg = dict(
+        prompt_template=dict(
+            type=PromptTemplate,
+            template=dict(
+                round=[dict(role="HUMAN", prompt='{problem_input}')])),
+        retriever=dict(type=ZeroRetriever
+                       ),  # retriver 不起作用，以输入参数为准 (zero-shot / few-shot)
+        inferencer=dict(type=GenInferencer))
+    agieval_eval_cfg = dict(
+        evaluator=dict(type=AGIEvalEvaluator), pred_role="BOT")
+    agieval_datasets.append(
+        dict(
+            type=AGIEvalDataset,
+            path='./data/AGIEval/data/v1/',
+            name=name,
+            abbr='agieval-' + name,
+            setting_name='zero-shot',
+            reader_cfg=agieval_reader_cfg,
+            infer_cfg=agieval_infer_cfg.copy(),
+            eval_cfg=agieval_eval_cfg.copy()))
+del name, agieval_infer_cfg, agieval_eval_cfg
--- a/configs/datasets/agieval/agieval_gen_dc7dae.py
+++ b/configs/datasets/agieval/agieval_gen_dc7dae.py
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import AGIEvalDataset_v2, AGIEvalEvaluator
+agieval_reader_cfg = dict(
+    input_columns=['question', 'options'], output_column='label')
+agieval_single_choice_sets = [
+    'gaokao-chinese',
+    'gaokao-english',
+    'gaokao-geography',
+    'gaokao-history',
+    'gaokao-biology',
+    'gaokao-chemistry',
+    'gaokao-physics',
+    'gaokao-mathqa',
+    'logiqa-zh',
+    'lsat-ar',
+    'lsat-lr',
+    'lsat-rc',
+    'logiqa-en',
+    'sat-math',
+    'sat-en',
+    'sat-en-without-passage',
+    'aqua-rat',
+]
+agieval_multiple_choices_sets = [
+    'jec-qa-kd',
+    'jec-qa-ca',
+]
+agieval_cloze_sets = ['gaokao-mathcloze', 'math']
+agieval_chinese_sets = [
+    'gaokao-chinese',
+    'gaokao-english',
+    'gaokao-geography',
+    'gaokao-history',
+    'gaokao-biology',
+    'gaokao-chemistry',
+    'gaokao-physics',
+    'gaokao-mathqa',
+    'logiqa-zh',
+    'gaokao-mathcloze',
+]
+agieval_english_sets = [
+    'lsat-ar',
+    'lsat-lr',
+    'lsat-rc',
+    'logiqa-en',
+    'sat-math',
+    'sat-en',
+    'sat-en-without-passage',
+    'aqua-rat',
+    'math',
+]
+agieval_gaokao_sets = [
+    'gaokao-chinese',
+    'gaokao-english',
+    'gaokao-geography',
+    'gaokao-history',
+    'gaokao-biology',
+    'gaokao-chemistry',
+    'gaokao-physics',
+    'gaokao-mathqa',
+]
+agieval_datasets = []
+for _name in agieval_single_choice_sets:
+    if _name in agieval_chinese_sets:
+        _hint = '答案是： '
+    else:
+        _hint = 'The answer is '
+    agieval_infer_cfg = dict(
+        prompt_template=dict(
+            type=PromptTemplate,
+            template=dict(round=[
+                dict(role='HUMAN', prompt=f'{{question}}\n{{options}}\n{_hint}')
+            ])),
+        retriever=dict(type=ZeroRetriever),
+        inferencer=dict(type=GenInferencer, max_out_len=1024))
+    agieval_eval_cfg = dict(
+        evaluator=dict(type=AccEvaluator),
+        pred_postprocessor=dict(type='first-capital'))
+    agieval_datasets.append(
+        dict(
+            type=AGIEvalDataset_v2,
+            path='./data/AGIEval/data/v1/',
+            name=_name,
+            abbr='agieval-' + _name,
+            setting_name='zero-shot',
+            reader_cfg=agieval_reader_cfg,
+            infer_cfg=agieval_infer_cfg.copy(),
+            eval_cfg=agieval_eval_cfg.copy()))
+for _name in agieval_multiple_choices_sets:
+    if _name in agieval_chinese_sets:
+        _hint = '答案是： '
+    else:
+        _hint = 'The answer is '
+    agieval_infer_cfg = dict(
+        prompt_template=dict(
+            type=PromptTemplate,
+            template=dict(round=[
+                dict(role='HUMAN', prompt=f'{{question}}\n{{options}}\n{_hint}')
+            ])),
+        retriever=dict(type=ZeroRetriever),
+        inferencer=dict(type=GenInferencer, max_out_len=1024))
+    agieval_eval_cfg = dict(
+        evaluator=dict(type=AccEvaluator),
+        pred_postprocessor=dict(type='first-capital-multi'))
+    agieval_datasets.append(
+        dict(
+            type=AGIEvalDataset_v2,
+            path='./data/AGIEval/data/v1/',
+            name=_name,
+            abbr='agieval-' + _name,
+            setting_name='zero-shot',
+            reader_cfg=agieval_reader_cfg,
+            infer_cfg=agieval_infer_cfg.copy(),
+            eval_cfg=agieval_eval_cfg.copy()))
+for _name in agieval_cloze_sets:
+    if _name in agieval_chinese_sets:
+        _hint = '答案是： '
+    else:
+        _hint = 'The answer is '
+    agieval_infer_cfg = dict(
+        prompt_template=dict(
+            type=PromptTemplate,
+            template=dict(
+                round=[dict(role='HUMAN', prompt=f'{{question}}\n{_hint}')])),
+        retriever=dict(type=ZeroRetriever),
+        inferencer=dict(type=GenInferencer, max_out_len=1024))
+    agieval_eval_cfg = dict(evaluator=dict(type=AGIEvalEvaluator))
+    agieval_datasets.append(
+        dict(
+            type=AGIEvalDataset_v2,
+            path='./data/AGIEval/data/v1/',
+            name=_name,
+            abbr='agieval-' + _name,
+            setting_name='zero-shot',
+            reader_cfg=agieval_reader_cfg,
+            infer_cfg=agieval_infer_cfg.copy(),
+            eval_cfg=agieval_eval_cfg.copy()))
+for _item in agieval_datasets:
+    _name = _item['name']
+    _intro = {
+        'gaokao-chinese':
+        '以下是一道中国高考语文选择题，请选择正确的答案。',
+        'gaokao-english':
+        '以下是一道中国高考英语选择题，请选择正确的答案。',
+        'gaokao-geography':
+        '以下是一道中国高考地理选择题，请选择正确的答案。',
+        'gaokao-history':
+        '以下是一道中国高考历史选择题，请选择正确的答案。',
+        'gaokao-biology':
+        '以下是一道中国高考生物选择题，请选择正确的答案。',
+        'gaokao-chemistry':
+        '以下是一道中国高考化学选择题，请选择正确的答案。',
+        'gaokao-physics':
+        '以下是一道中国高考物理选择题，请选择正确的答案。',
+        'gaokao-mathqa':
+        '以下是一道中国高考数学选择题，请选择正确的答案。',
+        'logiqa-zh':
+        '以下是一道中国公务员考试题，请选择正确的答案。',
+        'lsat-ar':
+        'The following is a LSAT Analytical Reasoning question. Please select the correct answer.',
+        'lsat-lr':
+        'The following is a LSAT Logical Reasoning question. Please select the correct answer.',
+        'lsat-rc':
+        'The following is a LSAT Reading Comprehension question. Please select the correct answer.',
+        'logiqa-en':
+        'The following is a Logic Reasoning question. Please select the correct answer.',
+        'sat-math':
+        'The following is a SAT Math question. Please select the correct answer.',
+        'sat-en':
+        'The following is a SAT English question. Please select the correct answer.',
+        'sat-en-without-passage':
+        'The following is a SAT English question. Please select the correct answer.',
+        'aqua-rat':
+        'The following is a AQUA-RAT question. Please select the correct answer.',
+        'jec-qa-kd':
+        '以下是一道中国司法考试基础知识题，请选择正确的答案。',
+        'jec-qa-ca':
+        '以下是一道中国司法考试案例分析题，请选择正确的答案。',
+        'gaokao-mathcloze':
+        '以下是一道中国高考数学填空题，请填入正确的答案。',
+        'math':
+        'The following is a Math question. Please select the correct answer.',
+    }[_name]
+    _templates = _item['infer_cfg']['prompt_template']['template']
+    _templates['round'][0][
+        'prompt'] = _intro + '\n' + _templates['round'][0]['prompt']
+del _item, _intro, _templates, _name, _hint, agieval_infer_cfg, agieval_eval_cfg
--- a/configs/datasets/agieval/agieval_mixed_2f14ad.py
+++ b/configs/datasets/agieval/agieval_mixed_2f14ad.py
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import PPLInferencer, GenInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import AGIEvalDataset_v2, AGIEvalEvaluator
+agieval_single_choice_sets = [
+    'gaokao-chinese',
+    'gaokao-english',
+    'gaokao-geography',
+    'gaokao-history',
+    'gaokao-biology',
+    'gaokao-chemistry',
+    'gaokao-physics',
+    'gaokao-mathqa',
+    'logiqa-zh',
+    'lsat-ar',
+    'lsat-lr',
+    'lsat-rc',
+    'logiqa-en',
+    'sat-math',
+    'sat-en',
+    'sat-en-without-passage',
+    'aqua-rat',
+]
+agieval_multiple_choices_sets = [
+    'jec-qa-kd',
+    'jec-qa-ca',
+]
+agieval_cloze_sets = ['gaokao-mathcloze', 'math']
+agieval_chinese_sets = [
+    'gaokao-chinese',
+    'gaokao-english',
+    'gaokao-geography',
+    'gaokao-history',
+    'gaokao-biology',
+    'gaokao-chemistry',
+    'gaokao-physics',
+    'gaokao-mathqa',
+    'logiqa-zh',
+    'gaokao-mathcloze',
+]
+agieval_english_sets = [
+    'lsat-ar',
+    'lsat-lr',
+    'lsat-rc',
+    'logiqa-en',
+    'sat-math',
+    'sat-en',
+    'sat-en-without-passage',
+    'aqua-rat',
+    'math',
+]
+agieval_gaokao_sets = [
+    'gaokao-chinese',
+    'gaokao-english',
+    'gaokao-geography',
+    'gaokao-history',
+    'gaokao-biology',
+    'gaokao-chemistry',
+    'gaokao-physics',
+    'gaokao-mathqa',
+]
+agieval_datasets = []
+for _name in agieval_single_choice_sets:
+    if _name in ['lsat-ar', 'lsat-lr', 'lsat-rc', 'aqua-rat']:
+        _options = ['A', 'B', 'C', 'D', 'E']
+    else:
+        _options = ['A', 'B', 'C', 'D']
+    if _name in agieval_chinese_sets:
+        _hint = '答案是：'
+    else:
+        _hint = 'The answer is '
+    agieval_infer_cfg = dict(
+        prompt_template=dict(
+            type=PromptTemplate,
+            template={
+                label: dict(round=[
+                    dict(role='HUMAN', prompt='{question}\n{options}'),
+                    dict(role='BOT', prompt=f'{_hint}{label}')
+                ])
+                for label in _options
+            }),
+        retriever=dict(type=ZeroRetriever),
+        inferencer=dict(type=PPLInferencer, labels=_options))
+    agieval_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
+    agieval_datasets.append(
+        dict(
+            type=AGIEvalDataset_v2,
+            path='./data/AGIEval/data/v1/',
+            name=_name,
+            abbr='agieval-' + _name,
+            setting_name='zero-shot',
+            reader_cfg=dict(
+                input_columns=['question', 'options'] + _options,
+                output_column='label'),
+            infer_cfg=agieval_infer_cfg.copy(),
+            eval_cfg=agieval_eval_cfg.copy()))
+for _name in agieval_multiple_choices_sets:
+    if _name in agieval_chinese_sets:
+        _hint = '答案是： '
+    else:
+        _hint = 'The answer is '
+    agieval_infer_cfg = dict(
+        prompt_template=dict(
+            type=PromptTemplate,
+            template=dict(round=[
+                dict(role='HUMAN', prompt=f'{{question}}\n{{options}}\n{_hint}')
+            ])),
+        retriever=dict(type=ZeroRetriever),
+        inferencer=dict(type=GenInferencer, max_out_len=1024))
+    agieval_eval_cfg = dict(
+        evaluator=dict(type=AccEvaluator),
+        pred_postprocessor=dict(type='first-capital-multi'))
+    agieval_datasets.append(
+        dict(
+            type=AGIEvalDataset_v2,
+            path='./data/AGIEval/data/v1/',
+            name=_name,
+            abbr='agieval-' + _name,
+            setting_name='zero-shot',
+            reader_cfg=dict(
+                input_columns=['question', 'options'], output_column='label'),
+            infer_cfg=agieval_infer_cfg.copy(),
+            eval_cfg=agieval_eval_cfg.copy()))
+for _name in agieval_cloze_sets:
+    if _name in agieval_chinese_sets:
+        _hint = '答案是：'
+    else:
+        _hint = 'The answer is '
+    agieval_infer_cfg = dict(
+        prompt_template=dict(
+            type=PromptTemplate,
+            template=dict(
+                round=[dict(role='HUMAN', prompt=f'{{question}}{_hint}')])),
+        retriever=dict(type=ZeroRetriever),
+        inferencer=dict(type=GenInferencer, max_out_len=1024))
+    agieval_eval_cfg = dict(evaluator=dict(type=AGIEvalEvaluator))
+    agieval_datasets.append(
+        dict(
+            type=AGIEvalDataset_v2,
+            path='./data/AGIEval/data/v1/',
+            name=_name,
+            abbr='agieval-' + _name,
+            setting_name='zero-shot',
+            reader_cfg=dict(
+                input_columns=['question', 'options'], output_column='label'),
+            infer_cfg=agieval_infer_cfg.copy(),
+            eval_cfg=agieval_eval_cfg.copy()))
+for _item in agieval_datasets:
+    _name = _item['name']
+    _intro = {
+        'gaokao-chinese':
+        '以下是一道中国高考语文选择题，请选择正确的答案。',
+        'gaokao-english':
+        '以下是一道中国高考英语选择题，请选择正确的答案。',
+        'gaokao-geography':
+        '以下是一道中国高考地理选择题，请选择正确的答案。',
+        'gaokao-history':
+        '以下是一道中国高考历史选择题，请选择正确的答案。',
+        'gaokao-biology':
+        '以下是一道中国高考生物选择题，请选择正确的答案。',
+        'gaokao-chemistry':
+        '以下是一道中国高考化学选择题，请选择正确的答案。',
+        'gaokao-physics':
+        '以下是一道中国高考物理选择题，请选择正确的答案。',
+        'gaokao-mathqa':
+        '以下是一道中国高考数学选择题，请选择正确的答案。',
+        'logiqa-zh':
+        '以下是一道中国公务员考试题，请选择正确的答案。',
+        'lsat-ar':
+        'The following is a LSAT Analytical Reasoning question. Please select the correct answer.',
+        'lsat-lr':
+        'The following is a LSAT Logical Reasoning question. Please select the correct answer.',
+        'lsat-rc':
+        'The following is a LSAT Reading Comprehension question. Please select the correct answer.',
+        'logiqa-en':
+        'The following is a Logic Reasoning question. Please select the correct answer.',
+        'sat-math':
+        'The following is a SAT Math question. Please select the correct answer.',
+        'sat-en':
+        'The following is a SAT English question. Please select the correct answer.',
+        'sat-en-without-passage':
+        'The following is a SAT English question. Please select the correct answer.',
+        'aqua-rat':
+        'The following is a AQUA-RAT question. Please select the correct answer.',
+        'jec-qa-kd':
+        '以下是一道中国司法考试基础知识题，请选择正确的答案。',
+        'jec-qa-ca':
+        '以下是一道中国司法考试案例分析题，请选择正确的答案。',
+        'gaokao-mathcloze':
+        '以下是一道中国高考数学填空题，请填入正确的答案。',
+        'math':
+        'The following is a Math question. Please select the correct answer.',
+    }[_name]
+    _templates = _item['infer_cfg']['prompt_template']['template']
+    if _item['infer_cfg']['inferencer']['type'] == PPLInferencer:
+        for _label in _templates:
+            _templates[_label]['round'][0][
+                'prompt'] = _intro + '\n' + _templates[_label]['round'][0][
+                    'prompt']
+    else:
+        _templates['round'][0][
+            'prompt'] = _intro + '\n' + _templates['round'][0]['prompt']
+del _item, _intro, _templates, _label, _name, _options, _hint, agieval_infer_cfg, agieval_eval_cfg
--- a/configs/datasets/apps/apps_gen_193ec0.py
+++ b/configs/datasets/apps/apps_gen_193ec0.py
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.datasets import HFDataset, HumanEvaluator
+apps_reader_cfg = dict(
+    input_columns=['question'], output_column='problem_id', train_split='test')
+# TODO: allow empty output-column
+apps_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template=dict(
+            begin=[
+                dict(
+                    role='SYSTEM',
+                    fallback_role='HUMAN',
+                    prompt='Write a python program:'),
+            ],
+            round=[
+                dict(role='HUMAN', prompt='{question}'),
+            ])),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer))
+apps_eval_cfg = dict(
+    evaluator=dict(type=HumanEvaluator),
+    pred_role='BOT',
+    k=[1, 10, 100],  # the parameter only for humaneval
+    pred_postprocessor=dict(type='humaneval'),
+)
+apps_datasets = [
+    dict(
+        type=HFDataset,
+        path='codeparrot/apps',
+        reader_cfg=apps_reader_cfg,
+        infer_cfg=apps_infer_cfg,
+        eval_cfg=apps_eval_cfg)
+]
--- a/configs/datasets/apps/apps_gen_36c409.py
+++ b/configs/datasets/apps/apps_gen_36c409.py
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.datasets import HFDataset, HumanEvaluator
+apps_reader_cfg = dict(
+    input_columns=['question'], output_column='problem_id', train_split='test')
+# TODO: allow empty output-column
+apps_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template=dict(round=[
+            dict(role='HUMAN', prompt='Write a python program:\n{question}'),
+        ])),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer))
+apps_eval_cfg = dict(
+    evaluator=dict(type=HumanEvaluator),
+    pred_role='BOT',
+    k=[1, 10, 100],  # the parameter only for humaneval
+    pred_postprocessor=dict(type='humaneval'),
+)
+apps_datasets = [
+    dict(
+        type=HFDataset,
+        path='codeparrot/apps',
+        reader_cfg=apps_reader_cfg,
+        infer_cfg=apps_infer_cfg,
+        eval_cfg=apps_eval_cfg)
+]
--- a/configs/datasets/apps/apps_gen_e85947.py
+++ b/configs/datasets/apps/apps_gen_e85947.py
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.datasets import HFDataset, HumanEvaluator
+apps_reader_cfg = dict(
+    input_columns=['question'], output_column='problem_id', train_split='test')
+# TODO: allow empty output-column
+apps_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template='{question}'),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer, max_out_len=512))
+apps_eval_cfg = dict(
+    evaluator=dict(type=HumanEvaluator),
+    k=[1, 10, 100],
+    pred_postprocessor=dict(type='humaneval'),
+)
+apps_datasets = [
+    dict(
+        type=HFDataset,
+        path='codeparrot/apps',
+        reader_cfg=apps_reader_cfg,
+        infer_cfg=apps_infer_cfg,
+        eval_cfg=apps_eval_cfg)
+]
--- a/configs/datasets/bbh/bbh_gen_58abc3.py
+++ b/configs/datasets/bbh/bbh_gen_58abc3.py
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import BBHDataset, BBHEvaluator
+bbh_reader_cfg = dict(input_columns=["input"], output_column="target")
+_path_prefix = "./data/BBH"
+bbh_multiple_choice_sets = [
+    'temporal_sequences',
+    'disambiguation_qa',
+    'date_understanding',
+    'tracking_shuffled_objects_three_objects',
+    'penguins_in_a_table',
+    'geometric_shapes',
+    'snarks',
+    'ruin_names',
+    'tracking_shuffled_objects_seven_objects',
+    'tracking_shuffled_objects_five_objects',
+    'logical_deduction_three_objects',
+    'hyperbaton',
+    'logical_deduction_five_objects',
+    'logical_deduction_seven_objects',
+    'movie_recommendation',
+    'salient_translation_error_detection',
+    'reasoning_about_colored_objects',
+]
+bbh_free_form_sets = [
+    'multistep_arithmetic_two',
+    'navigate',
+    'dyck_languages',
+    'word_sorting',
+    'sports_understanding',
+    'boolean_expressions',
+    'object_counting',
+    'formal_fallacies',
+    'causal_judgement',
+    'web_of_lies',
+]
+bbh_datasets = []
+for _name in bbh_multiple_choice_sets:
+    _hint = open(f"{_path_prefix}/lib_prompt/{_name}.txt", 'r').read()
+    bbh_infer_cfg = dict(
+        prompt_template=dict(
+            type=PromptTemplate,
+            template=dict(round=[
+                dict(
+                    role="HUMAN",
+                    prompt=
+                    f"Follow the given examples and answer the question.\n{_hint}\n\nQ: {{input}}\nA: Let's think step by step."
+                )
+            ])),
+        retriever=dict(type=ZeroRetriever),
+        inferencer=dict(type=GenInferencer, max_out_len=512))
+    bbh_eval_cfg = dict(
+        evaluator=dict(type=AccEvaluator),
+        pred_role="BOT",
+        pred_postprocessor=dict(type='bbh-mcq'),
+        dataset_postprocessor=dict(type='bbh-mcq'))
+    bbh_datasets.append(
+        dict(
+            type=BBHDataset,
+            path=f"{_path_prefix}/data",
+            name=_name,
+            abbr='bbh-' + _name,
+            reader_cfg=bbh_reader_cfg,
+            infer_cfg=bbh_infer_cfg.copy(),
+            eval_cfg=bbh_eval_cfg.copy()))
+for _name in bbh_free_form_sets:
+    _hint = open(f"{_path_prefix}/lib_prompt/{_name}.txt", 'r').read()
+    bbh_infer_cfg = dict(
+        prompt_template=dict(
+            type=PromptTemplate,
+            template=dict(round=[
+                dict(
+                    role="HUMAN",
+                    prompt=
+                    f"Follow the given examples and answer the question.\n{_hint}\n\nQ: {{input}}\nA: Let's think step by step."
+                )
+            ])),
+        retriever=dict(type=ZeroRetriever),
+        inferencer=dict(type=GenInferencer, max_out_len=512))
+    bbh_eval_cfg = dict(evaluator=dict(type=BBHEvaluator), pred_role="BOT")
+    bbh_datasets.append(
+        dict(
+            type=BBHDataset,
+            path=f"{_path_prefix}/data",
+            name=_name,
+            abbr='bbh-' + _name,
+            reader_cfg=bbh_reader_cfg,
+            infer_cfg=bbh_infer_cfg.copy(),
+            eval_cfg=bbh_eval_cfg.copy()))
+del _name, _hint, _path_prefix
--- a/configs/datasets/ceval/ceval_gen_c24a21.py
+++ b/configs/datasets/ceval/ceval_gen_c24a21.py
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import FixKRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import CEvalDataset
+ceval_subject_mapping = {
+    "computer_network":
+    ["Computer Network", "\u8ba1\u7b97\u673a\u7f51\u7edc", "STEM"],
+    "operating_system":
+    ["Operating System", "\u64cd\u4f5c\u7cfb\u7edf", "STEM"],
+    "computer_architecture":
+    ["Computer Architecture", "\u8ba1\u7b97\u673a\u7ec4\u6210", "STEM"],
+    "college_programming":
+    ["College Programming", "\u5927\u5b66\u7f16\u7a0b", "STEM"],
+    "college_physics": ["College Physics", "\u5927\u5b66\u7269\u7406", "STEM"],
+    "college_chemistry":
+    ["College Chemistry", "\u5927\u5b66\u5316\u5b66", "STEM"],
+    "advanced_mathematics":
+    ["Advanced Mathematics", "\u9ad8\u7b49\u6570\u5b66", "STEM"],
+    "probability_and_statistics":
+    ["Probability and Statistics", "\u6982\u7387\u7edf\u8ba1", "STEM"],
+    "discrete_mathematics":
+    ["Discrete Mathematics", "\u79bb\u6563\u6570\u5b66", "STEM"],
+    "electrical_engineer": [
+        "Electrical Engineer", "\u6ce8\u518c\u7535\u6c14\u5de5\u7a0b\u5e08",
+        "STEM"
+    ],
+    "metrology_engineer":
+    ["Metrology Engineer", "\u6ce8\u518c\u8ba1\u91cf\u5e08", "STEM"],
+    "high_school_mathematics":
+    ["High School Mathematics", "\u9ad8\u4e2d\u6570\u5b66", "STEM"],
+    "high_school_physics":
+    ["High School Physics", "\u9ad8\u4e2d\u7269\u7406", "STEM"],
+    "high_school_chemistry":
+    ["High School Chemistry", "\u9ad8\u4e2d\u5316\u5b66", "STEM"],
+    "high_school_biology": [
+        "High School Biology", "\u9ad8\u4e2d\u751f\u7269", "STEM"
+    ],
+    "middle_school_mathematics": [
+        "Middle School Mathematics", "\u521d\u4e2d\u6570\u5b66", "STEM"
+    ],
+    "middle_school_biology": [
+        "Middle School Biology", "\u521d\u4e2d\u751f\u7269", "STEM"
+    ],
+    "middle_school_physics": [
+        "Middle School Physics", "\u521d\u4e2d\u7269\u7406", "STEM"
+    ],
+    "middle_school_chemistry": [
+        "Middle School Chemistry", "\u521d\u4e2d\u5316\u5b66", "STEM"
+    ],
+    "veterinary_medicine": [
+        "Veterinary Medicine", "\u517d\u533b\u5b66", "STEM"
+    ],
+    "college_economics": [
+        "College Economics", "\u5927\u5b66\u7ecf\u6d4e\u5b66", "Social Science"
+    ],
+    "business_administration": [
+        "Business Administration", "\u5de5\u5546\u7ba1\u7406", "Social Science"
+    ],
+    "marxism": [
+        "Marxism", "\u9a6c\u514b\u601d\u4e3b\u4e49\u57fa\u672c\u539f\u7406",
+        "Social Science"
+    ],
+    "mao_zedong_thought": [
+        "Mao Zedong Thought",
+        "\u6bdb\u6cfd\u4e1c\u601d\u60f3\u548c\u4e2d\u56fd\u7279\u8272\u793e\u4f1a\u4e3b\u4e49\u7406\u8bba\u4f53\u7cfb\u6982\u8bba",
+        "Social Science"
+    ],
+    "education_science": [
+        "Education Science", "\u6559\u80b2\u5b66", "Social Science"
+    ],
+    "teacher_qualification": [
+        "Teacher Qualification", "\u6559\u5e08\u8d44\u683c", "Social Science"
+    ],
+    "high_school_politics": [
+        "High School Politics", "\u9ad8\u4e2d\u653f\u6cbb", "Social Science"
+    ],
+    "high_school_geography": [
+        "High School Geography", "\u9ad8\u4e2d\u5730\u7406", "Social Science"
+    ],
+    "middle_school_politics": [
+        "Middle School Politics", "\u521d\u4e2d\u653f\u6cbb", "Social Science"
+    ],
+    "middle_school_geography": [
+        "Middle School Geography", "\u521d\u4e2d\u5730\u7406", "Social Science"
+    ],
+    "modern_chinese_history":
+    ["Modern Chinese History", "\u8fd1\u4ee3\u53f2\u7eb2\u8981", "Humanities"],
+    "ideological_and_moral_cultivation": [
+        "Ideological and Moral Cultivation",
+        "\u601d\u60f3\u9053\u5fb7\u4fee\u517b\u4e0e\u6cd5\u5f8b\u57fa\u7840",
+        "Humanities"
+    ],
+    "logic": ["Logic", "\u903b\u8f91\u5b66", "Humanities"],
+    "law": ["Law", "\u6cd5\u5b66", "Humanities"],
+    "chinese_language_and_literature": [
+        "Chinese Language and Literature",
+        "\u4e2d\u56fd\u8bed\u8a00\u6587\u5b66", "Humanities"
+    ],
+    "art_studies": ["Art Studies", "\u827a\u672f\u5b66", "Humanities"],
+    "professional_tour_guide": [
+        "Professional Tour Guide", "\u5bfc\u6e38\u8d44\u683c", "Humanities"
+    ],
+    "legal_professional": [
+        "Legal Professional", "\u6cd5\u5f8b\u804c\u4e1a\u8d44\u683c",
+        "Humanities"
+    ],
+    "high_school_chinese": [
+        "High School Chinese", "\u9ad8\u4e2d\u8bed\u6587", "Humanities"
+    ],
+    "high_school_history": [
+        "High School History", "\u9ad8\u4e2d\u5386\u53f2", "Humanities"
+    ],
+    "middle_school_history": [
+        "Middle School History", "\u521d\u4e2d\u5386\u53f2", "Humanities"
+    ],
+    "civil_servant": ["Civil Servant", "\u516c\u52a1\u5458", "Other"],
+    "sports_science": ["Sports Science", "\u4f53\u80b2\u5b66", "Other"],
+    "plant_protection": [
+        "Plant Protection", "\u690d\u7269\u4fdd\u62a4", "Other"
+    ],
+    "basic_medicine": ["Basic Medicine", "\u57fa\u7840\u533b\u5b66", "Other"],
+    "clinical_medicine": [
+        "Clinical Medicine", "\u4e34\u5e8a\u533b\u5b66", "Other"
+    ],
+    "urban_and_rural_planner": [
+        "Urban and Rural Planner",
+        "\u6ce8\u518c\u57ce\u4e61\u89c4\u5212\u5e08", "Other"
+    ],
+    "accountant": ["Accountant", "\u6ce8\u518c\u4f1a\u8ba1\u5e08", "Other"],
+    "fire_engineer": [
+        "Fire Engineer", "\u6ce8\u518c\u6d88\u9632\u5de5\u7a0b\u5e08", "Other"
+    ],
+    "environmental_impact_assessment_engineer": [
+        "Environmental Impact Assessment Engineer",
+        "\u73af\u5883\u5f71\u54cd\u8bc4\u4ef7\u5de5\u7a0b\u5e08", "Other"
+    ],
+    "tax_accountant": ["Tax Accountant", "\u7a0e\u52a1\u5e08", "Other"],
+    "physician": ["Physician", "\u533b\u5e08\u8d44\u683c", "Other"]
+}
+ceval_all_sets = list(ceval_subject_mapping.keys())
+ceval_datasets = []
+for _split in ["val", "test"]:
+    for _name in ceval_all_sets:
+        _ch_name = ceval_subject_mapping[_name][1]
+        ceval_infer_cfg = dict(
+            ice_template=dict(
+                type=PromptTemplate,
+                template=dict(
+                    begin="</E>",
+                    round=[
+                        dict(
+                            role="HUMAN",
+                            prompt=
+                            f"以下是中国关于{_ch_name}考试的单项选择题，请选出其中的正确答案。\n{{question}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n答案: "
+                        ),
+                        dict(role="BOT", prompt="{answer}"),
+                    ]),
+                ice_token="</E>",
+            ),
+            retriever=dict(type=FixKRetriever),
+            inferencer=dict(type=GenInferencer, fix_id_list=[0, 1, 2, 3, 4]),
+        )
+        ceval_eval_cfg = dict(
+            evaluator=dict(type=AccEvaluator),
+            pred_postprocessor=dict(type='first-capital'))
+        ceval_datasets.append(
+            dict(
+                type=CEvalDataset,
+                path="./data/ceval/formal_ceval",
+                name=_name,
+                abbr="ceval-" + _name if _split == "val" else "ceval-test-" +
+                _name,
+                reader_cfg=dict(
+                    input_columns=["question", "A", "B", "C", "D"],
+                    output_column="answer",
+                    train_split="dev",
+                    test_split=_split),
+                infer_cfg=ceval_infer_cfg,
+                eval_cfg=ceval_eval_cfg,
+            ))
+del _split, _name, _ch_name
--- a/configs/datasets/ceval/ceval_gen_ee2cb0.py
+++ b/configs/datasets/ceval/ceval_gen_ee2cb0.py
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import FixKRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import CEvalDataset
+ceval_subject_mapping = {
+    "computer_network":
+    ["Computer Network", "\u8ba1\u7b97\u673a\u7f51\u7edc", "STEM"],
+    "operating_system":
+    ["Operating System", "\u64cd\u4f5c\u7cfb\u7edf", "STEM"],
+    "computer_architecture":
+    ["Computer Architecture", "\u8ba1\u7b97\u673a\u7ec4\u6210", "STEM"],
+    "college_programming":
+    ["College Programming", "\u5927\u5b66\u7f16\u7a0b", "STEM"],
+    "college_physics": ["College Physics", "\u5927\u5b66\u7269\u7406", "STEM"],
+    "college_chemistry":
+    ["College Chemistry", "\u5927\u5b66\u5316\u5b66", "STEM"],
+    "advanced_mathematics":
+    ["Advanced Mathematics", "\u9ad8\u7b49\u6570\u5b66", "STEM"],
+    "probability_and_statistics":
+    ["Probability and Statistics", "\u6982\u7387\u7edf\u8ba1", "STEM"],
+    "discrete_mathematics":
+    ["Discrete Mathematics", "\u79bb\u6563\u6570\u5b66", "STEM"],
+    "electrical_engineer": [
+        "Electrical Engineer", "\u6ce8\u518c\u7535\u6c14\u5de5\u7a0b\u5e08",
+        "STEM"
+    ],
+    "metrology_engineer":
+    ["Metrology Engineer", "\u6ce8\u518c\u8ba1\u91cf\u5e08", "STEM"],
+    "high_school_mathematics":
+    ["High School Mathematics", "\u9ad8\u4e2d\u6570\u5b66", "STEM"],
+    "high_school_physics":
+    ["High School Physics", "\u9ad8\u4e2d\u7269\u7406", "STEM"],
+    "high_school_chemistry":
+    ["High School Chemistry", "\u9ad8\u4e2d\u5316\u5b66", "STEM"],
+    "high_school_biology": [
+        "High School Biology", "\u9ad8\u4e2d\u751f\u7269", "STEM"
+    ],
+    "middle_school_mathematics": [
+        "Middle School Mathematics", "\u521d\u4e2d\u6570\u5b66", "STEM"
+    ],
+    "middle_school_biology": [
+        "Middle School Biology", "\u521d\u4e2d\u751f\u7269", "STEM"
+    ],
+    "middle_school_physics": [
+        "Middle School Physics", "\u521d\u4e2d\u7269\u7406", "STEM"
+    ],
+    "middle_school_chemistry": [
+        "Middle School Chemistry", "\u521d\u4e2d\u5316\u5b66", "STEM"
+    ],
+    "veterinary_medicine": [
+        "Veterinary Medicine", "\u517d\u533b\u5b66", "STEM"
+    ],
+    "college_economics": [
+        "College Economics", "\u5927\u5b66\u7ecf\u6d4e\u5b66", "Social Science"
+    ],
+    "business_administration": [
+        "Business Administration", "\u5de5\u5546\u7ba1\u7406", "Social Science"
+    ],
+    "marxism": [
+        "Marxism", "\u9a6c\u514b\u601d\u4e3b\u4e49\u57fa\u672c\u539f\u7406",
+        "Social Science"
+    ],
+    "mao_zedong_thought": [
+        "Mao Zedong Thought",
+        "\u6bdb\u6cfd\u4e1c\u601d\u60f3\u548c\u4e2d\u56fd\u7279\u8272\u793e\u4f1a\u4e3b\u4e49\u7406\u8bba\u4f53\u7cfb\u6982\u8bba",
+        "Social Science"
+    ],
+    "education_science": [
+        "Education Science", "\u6559\u80b2\u5b66", "Social Science"
+    ],
+    "teacher_qualification": [
+        "Teacher Qualification", "\u6559\u5e08\u8d44\u683c", "Social Science"
+    ],
+    "high_school_politics": [
+        "High School Politics", "\u9ad8\u4e2d\u653f\u6cbb", "Social Science"
+    ],
+    "high_school_geography": [
+        "High School Geography", "\u9ad8\u4e2d\u5730\u7406", "Social Science"
+    ],
+    "middle_school_politics": [
+        "Middle School Politics", "\u521d\u4e2d\u653f\u6cbb", "Social Science"
+    ],
+    "middle_school_geography": [
+        "Middle School Geography", "\u521d\u4e2d\u5730\u7406", "Social Science"
+    ],
+    "modern_chinese_history":
+    ["Modern Chinese History", "\u8fd1\u4ee3\u53f2\u7eb2\u8981", "Humanities"],
+    "ideological_and_moral_cultivation": [
+        "Ideological and Moral Cultivation",
+        "\u601d\u60f3\u9053\u5fb7\u4fee\u517b\u4e0e\u6cd5\u5f8b\u57fa\u7840",
+        "Humanities"
+    ],
+    "logic": ["Logic", "\u903b\u8f91\u5b66", "Humanities"],
+    "law": ["Law", "\u6cd5\u5b66", "Humanities"],
+    "chinese_language_and_literature": [
+        "Chinese Language and Literature",
+        "\u4e2d\u56fd\u8bed\u8a00\u6587\u5b66", "Humanities"
+    ],
+    "art_studies": ["Art Studies", "\u827a\u672f\u5b66", "Humanities"],
+    "professional_tour_guide": [
+        "Professional Tour Guide", "\u5bfc\u6e38\u8d44\u683c", "Humanities"
+    ],
+    "legal_professional": [
+        "Legal Professional", "\u6cd5\u5f8b\u804c\u4e1a\u8d44\u683c",
+        "Humanities"
+    ],
+    "high_school_chinese": [
+        "High School Chinese", "\u9ad8\u4e2d\u8bed\u6587", "Humanities"
+    ],
+    "high_school_history": [
+        "High School History", "\u9ad8\u4e2d\u5386\u53f2", "Humanities"
+    ],
+    "middle_school_history": [
+        "Middle School History", "\u521d\u4e2d\u5386\u53f2", "Humanities"
+    ],
+    "civil_servant": ["Civil Servant", "\u516c\u52a1\u5458", "Other"],
+    "sports_science": ["Sports Science", "\u4f53\u80b2\u5b66", "Other"],
+    "plant_protection": [
+        "Plant Protection", "\u690d\u7269\u4fdd\u62a4", "Other"
+    ],
+    "basic_medicine": ["Basic Medicine", "\u57fa\u7840\u533b\u5b66", "Other"],
+    "clinical_medicine": [
+        "Clinical Medicine", "\u4e34\u5e8a\u533b\u5b66", "Other"
+    ],
+    "urban_and_rural_planner": [
+        "Urban and Rural Planner",
+        "\u6ce8\u518c\u57ce\u4e61\u89c4\u5212\u5e08", "Other"
+    ],
+    "accountant": ["Accountant", "\u6ce8\u518c\u4f1a\u8ba1\u5e08", "Other"],
+    "fire_engineer": [
+        "Fire Engineer", "\u6ce8\u518c\u6d88\u9632\u5de5\u7a0b\u5e08", "Other"
+    ],
+    "environmental_impact_assessment_engineer": [
+        "Environmental Impact Assessment Engineer",
+        "\u73af\u5883\u5f71\u54cd\u8bc4\u4ef7\u5de5\u7a0b\u5e08", "Other"
+    ],
+    "tax_accountant": ["Tax Accountant", "\u7a0e\u52a1\u5e08", "Other"],
+    "physician": ["Physician", "\u533b\u5e08\u8d44\u683c", "Other"]
+}
+ceval_all_sets = list(ceval_subject_mapping.keys())
+ceval_datasets = []
+for _split in ["val"]:
+    for _name in ceval_all_sets:
+        _ch_name = ceval_subject_mapping[_name][1]
+        ceval_infer_cfg = dict(
+            ice_template=dict(
+                type=PromptTemplate,
+                template=dict(
+                    begin="</E>",
+                    round=[
+                        dict(
+                            role="HUMAN",
+                            prompt=
+                            f"以下是中国关于{_ch_name}考试的单项选择题，请选出其中的正确答案。\n{{question}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n答案: "
+                        ),
+                        dict(role="BOT", prompt="{answer}"),
+                    ]),
+                ice_token="</E>",
+            ),
+            retriever=dict(type=FixKRetriever),
+            inferencer=dict(type=GenInferencer, fix_id_list=[0, 1, 2, 3, 4]),
+        )
+        ceval_eval_cfg = dict(evaluator=dict(type=AccEvaluator), )
+        ceval_datasets.append(
+            dict(
+                type=CEvalDataset,
+                path="./data/ceval/formal_ceval",
+                name=_name,
+                abbr="ceval-" + _name if _split == "val" else "ceval-test-" +
+                _name,
+                reader_cfg=dict(
+                    input_columns=["question", "A", "B", "C", "D"],
+                    output_column="answer",
+                    train_split="dev",
+                    test_split=_split),
+                infer_cfg=ceval_infer_cfg,
+                eval_cfg=ceval_eval_cfg,
+            ))
+del _split, _name, _ch_name
--- a/configs/datasets/ceval/ceval_ppl.py
+++ b/configs/datasets/ceval/ceval_ppl.py
+from mmengine.config import read_base
+with read_base():
+    from .ceval_ppl_275812 import ceval_datasets  # noqa: F401, F403
--- a/configs/datasets/ceval/ceval_ppl_162686.py
+++ b/configs/datasets/ceval/ceval_ppl_162686.py
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import FixKRetriever
+from opencompass.openicl.icl_inferencer import PPLInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import CEvalDataset
+ceval_subject_mapping = {
+    "computer_network":
+    ["Computer Network", "\u8ba1\u7b97\u673a\u7f51\u7edc", "STEM"],
+    "operating_system":
+    ["Operating System", "\u64cd\u4f5c\u7cfb\u7edf", "STEM"],
+    "computer_architecture":
+    ["Computer Architecture", "\u8ba1\u7b97\u673a\u7ec4\u6210", "STEM"],
+    "college_programming":
+    ["College Programming", "\u5927\u5b66\u7f16\u7a0b", "STEM"],
+    "college_physics": ["College Physics", "\u5927\u5b66\u7269\u7406", "STEM"],
+    "college_chemistry":
+    ["College Chemistry", "\u5927\u5b66\u5316\u5b66", "STEM"],
+    "advanced_mathematics":
+    ["Advanced Mathematics", "\u9ad8\u7b49\u6570\u5b66", "STEM"],
+    "probability_and_statistics":
+    ["Probability and Statistics", "\u6982\u7387\u7edf\u8ba1", "STEM"],
+    "discrete_mathematics":
+    ["Discrete Mathematics", "\u79bb\u6563\u6570\u5b66", "STEM"],
+    "electrical_engineer": [
+        "Electrical Engineer", "\u6ce8\u518c\u7535\u6c14\u5de5\u7a0b\u5e08",
+        "STEM"
+    ],
+    "metrology_engineer":
+    ["Metrology Engineer", "\u6ce8\u518c\u8ba1\u91cf\u5e08", "STEM"],
+    "high_school_mathematics":
+    ["High School Mathematics", "\u9ad8\u4e2d\u6570\u5b66", "STEM"],
+    "high_school_physics":
+    ["High School Physics", "\u9ad8\u4e2d\u7269\u7406", "STEM"],
+    "high_school_chemistry":
+    ["High School Chemistry", "\u9ad8\u4e2d\u5316\u5b66", "STEM"],
+    "high_school_biology": [
+        "High School Biology", "\u9ad8\u4e2d\u751f\u7269", "STEM"
+    ],
+    "middle_school_mathematics": [
+        "Middle School Mathematics", "\u521d\u4e2d\u6570\u5b66", "STEM"
+    ],
+    "middle_school_biology": [
+        "Middle School Biology", "\u521d\u4e2d\u751f\u7269", "STEM"
+    ],
+    "middle_school_physics": [
+        "Middle School Physics", "\u521d\u4e2d\u7269\u7406", "STEM"
+    ],
+    "middle_school_chemistry": [
+        "Middle School Chemistry", "\u521d\u4e2d\u5316\u5b66", "STEM"
+    ],
+    "veterinary_medicine": [
+        "Veterinary Medicine", "\u517d\u533b\u5b66", "STEM"
+    ],
+    "college_economics": [
+        "College Economics", "\u5927\u5b66\u7ecf\u6d4e\u5b66", "Social Science"
+    ],
+    "business_administration": [
+        "Business Administration", "\u5de5\u5546\u7ba1\u7406", "Social Science"
+    ],
+    "marxism": [
+        "Marxism", "\u9a6c\u514b\u601d\u4e3b\u4e49\u57fa\u672c\u539f\u7406",
+        "Social Science"
+    ],
+    "mao_zedong_thought": [
+        "Mao Zedong Thought",
+        "\u6bdb\u6cfd\u4e1c\u601d\u60f3\u548c\u4e2d\u56fd\u7279\u8272\u793e\u4f1a\u4e3b\u4e49\u7406\u8bba\u4f53\u7cfb\u6982\u8bba",
+        "Social Science"
+    ],
+    "education_science": [
+        "Education Science", "\u6559\u80b2\u5b66", "Social Science"
+    ],
+    "teacher_qualification": [
+        "Teacher Qualification", "\u6559\u5e08\u8d44\u683c", "Social Science"
+    ],
+    "high_school_politics": [
+        "High School Politics", "\u9ad8\u4e2d\u653f\u6cbb", "Social Science"
+    ],
+    "high_school_geography": [
+        "High School Geography", "\u9ad8\u4e2d\u5730\u7406", "Social Science"
+    ],
+    "middle_school_politics": [
+        "Middle School Politics", "\u521d\u4e2d\u653f\u6cbb", "Social Science"
+    ],
+    "middle_school_geography": [
+        "Middle School Geography", "\u521d\u4e2d\u5730\u7406", "Social Science"
+    ],
+    "modern_chinese_history":
+    ["Modern Chinese History", "\u8fd1\u4ee3\u53f2\u7eb2\u8981", "Humanities"],
+    "ideological_and_moral_cultivation": [
+        "Ideological and Moral Cultivation",
+        "\u601d\u60f3\u9053\u5fb7\u4fee\u517b\u4e0e\u6cd5\u5f8b\u57fa\u7840",
+        "Humanities"
+    ],
+    "logic": ["Logic", "\u903b\u8f91\u5b66", "Humanities"],
+    "law": ["Law", "\u6cd5\u5b66", "Humanities"],
+    "chinese_language_and_literature": [
+        "Chinese Language and Literature",
+        "\u4e2d\u56fd\u8bed\u8a00\u6587\u5b66", "Humanities"
+    ],
+    "art_studies": ["Art Studies", "\u827a\u672f\u5b66", "Humanities"],
+    "professional_tour_guide": [
+        "Professional Tour Guide", "\u5bfc\u6e38\u8d44\u683c", "Humanities"
+    ],
+    "legal_professional": [
+        "Legal Professional", "\u6cd5\u5f8b\u804c\u4e1a\u8d44\u683c",
+        "Humanities"
+    ],
+    "high_school_chinese": [
+        "High School Chinese", "\u9ad8\u4e2d\u8bed\u6587", "Humanities"
+    ],
+    "high_school_history": [
+        "High School History", "\u9ad8\u4e2d\u5386\u53f2", "Humanities"
+    ],
+    "middle_school_history": [
+        "Middle School History", "\u521d\u4e2d\u5386\u53f2", "Humanities"
+    ],
+    "civil_servant": ["Civil Servant", "\u516c\u52a1\u5458", "Other"],
+    "sports_science": ["Sports Science", "\u4f53\u80b2\u5b66", "Other"],
+    "plant_protection": [
+        "Plant Protection", "\u690d\u7269\u4fdd\u62a4", "Other"
+    ],
+    "basic_medicine": ["Basic Medicine", "\u57fa\u7840\u533b\u5b66", "Other"],
+    "clinical_medicine": [
+        "Clinical Medicine", "\u4e34\u5e8a\u533b\u5b66", "Other"
+    ],
+    "urban_and_rural_planner": [
+        "Urban and Rural Planner",
+        "\u6ce8\u518c\u57ce\u4e61\u89c4\u5212\u5e08", "Other"
+    ],
+    "accountant": ["Accountant", "\u6ce8\u518c\u4f1a\u8ba1\u5e08", "Other"],
+    "fire_engineer": [
+        "Fire Engineer", "\u6ce8\u518c\u6d88\u9632\u5de5\u7a0b\u5e08", "Other"
+    ],
+    "environmental_impact_assessment_engineer": [
+        "Environmental Impact Assessment Engineer",
+        "\u73af\u5883\u5f71\u54cd\u8bc4\u4ef7\u5de5\u7a0b\u5e08", "Other"
+    ],
+    "tax_accountant": ["Tax Accountant", "\u7a0e\u52a1\u5e08", "Other"],
+    "physician": ["Physician", "\u533b\u5e08\u8d44\u683c", "Other"]
+}
+ceval_all_sets = list(ceval_subject_mapping.keys())
+ceval_datasets = []
+for _split in ["val", "test"]:
+    for _name in ceval_all_sets:
+        _ch_name = ceval_subject_mapping[_name][1]
+        ceval_infer_cfg = dict(
+            ice_template=dict(
+                type=PromptTemplate,
+                template={
+                    answer: dict(
+                        begin="</E>",
+                        round=[
+                            dict(
+                                role="HUMAN",
+                                prompt=
+                                f"以下是中国关于{_ch_name}考试的单项选择题，请选出其中的正确答案。\n{{question}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n答案: "
+                            ),
+                            dict(role="BOT", prompt=answer),
+                        ])
+                    for answer in ["A", "B", "C", "D"]
+                },
+                ice_token="</E>",
+            ),
+            retriever=dict(type=FixKRetriever),
+            inferencer=dict(type=PPLInferencer, fix_id_list=[0, 1, 2, 3, 4]),
+        )
+        ceval_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
+        ceval_datasets.append(
+            dict(
+                type=CEvalDataset,
+                path="./data/ceval/formal_ceval",
+                name=_name,
+                abbr="ceval-" + _name if _split == "val" else "ceval-test-" +
+                _name,
+                reader_cfg=dict(
+                    input_columns=["question", "A", "B", "C", "D"],
+                    output_column="answer",
+                    train_split="dev",
+                    test_split=_split),
+                infer_cfg=ceval_infer_cfg,
+                eval_cfg=ceval_eval_cfg,
+            ))
+del _split, _name, _ch_name