Initial commit

c289ecc0 · xinghao · c289ecc0 · c289ecc0 · c289ecc0 · c289ecc0
Commit c289ecc0 authored Oct 21, 2025 by xinghao
20 changed files
--- a/opencompass/configs/datasets/SmolInstruct/smolinstruct_meteor_gen_065150.py
+++ b/opencompass/configs/datasets/SmolInstruct/smolinstruct_meteor_gen_065150.py
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever, FixKRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.datasets.smolinstruct import MeteorEvaluator
+from opencompass.datasets import SmolInstructDataset
+
+meteor_reader_cfg = dict(
+    input_columns=['input'],
+    output_column='output',
+    train_split='validation')
+
+meteor_hint_dict = {
+    'MC': """You are an expert chemist. Given the SMILES representation of a molecule, your task is to describe the molecule in natural language.
+    The input contains the SMILES representation of the molecule. Your reply should contain a natural language description of the molecule. Your reply must be valid and chemically reasonable.""",
+}
+
+name_dict = {
+    'MC': 'molecule_captioning',
+}
+
+meteor_datasets = []
+for _name in meteor_hint_dict:
+    _hint = meteor_hint_dict[_name]
+    meteor_infer_cfg = dict(
+        ice_template=dict(
+            type=PromptTemplate,
+            template=dict(round=[
+                dict(
+                    role='HUMAN',
+                    prompt=f'{_hint}\nQuestion: {{input}}\nAnswer: '
+                ),
+                dict(role='BOT', prompt='{output}\n')
+            ]),
+        ),
+        prompt_template=dict(
+            type=PromptTemplate,
+            template=dict(
+                begin='</E>',
+                round=[
+                    dict(
+                        role='HUMAN',
+                        prompt=f'{_hint}\nQuestion: {{input}}\nAnswer: '
+                    ),
+                ],
+            ),
+            ice_token='</E>',
+        ),
+        retriever=dict(type=FixKRetriever, fix_id_list=[0]),
+        inferencer=dict(type=GenInferencer),
+    )
+
+    meteor_eval_cfg = dict(
+        evaluator=dict(type=MeteorEvaluator),
+    )
+
+    meteor_datasets.append(
+        dict(
+            abbr=f'{_name}',
+            type=SmolInstructDataset,
+            path='osunlp/SMolInstruct',
+            name=name_dict[_name],
+            reader_cfg=meteor_reader_cfg,
+            infer_cfg=meteor_infer_cfg,
+            eval_cfg=meteor_eval_cfg,
+        ))
+
+del _name, _hint
--- a/opencompass/configs/datasets/SmolInstruct/smolinstruct_nc_0shot_instruct.py
+++ b/opencompass/configs/datasets/SmolInstruct/smolinstruct_nc_0shot_instruct.py
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever, FixKRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.datasets.smolinstruct import NCExactMatchEvaluator, NCElementMatchEvaluator
+from opencompass.datasets import SmolInstructDataset
+from opencompass.utils.text_postprocessors import first_capital_postprocess
+
+nc_0shot_reader_cfg = dict(
+    input_columns=['input'],
+    output_column='output',
+    train_split='validation')
+
+nc_hint_dict = {
+    'I2F': """You are an expert chemist. Given the IUPAC representation of compounds, your task is to predict the molecular formula of the compound.
+    The input contains the IUPAC representation of the compound. Your reply should contain only the molecular formula of the compound wrapped in <MOLFORMULA> and </MOLFORMULA> tags and no other text. Your reply must be valid and chemically reasonable.""",
+    'I2S': """You are an expert chemist. Given the IUPAC representation of compounds, your task is to predict the SMILES representation of the compound.
+    The input contains the IUPAC representation of the compound. Your reply should contain only the SMILES representation of the compound wrapped in <SMILES> and </SMILES> tags and no other text. Your reply must be valid and chemically reasonable.""",
+    'S2F': """You are an expert chemist. Given the SMILES representation of compounds, your task is to predict the molecular formula of the compound.
+    The input contains the SMILES representation of the compound. Your reply should contain only the molecular formula of the compound wrapped in <MOLFORMULA> and </MOLFORMULA> tags and no other text. Your reply must be valid and chemically reasonable.""",
+    'S2I': """You are an expert chemist. Given the SMILES representation of compounds, your task is to predict the IUPAC representation of the compound.
+    The input contains the SMILES representation of the compound. Your reply should contain only the IUPAC representation of the compound wrapped in <IUPAC> and </IUPAC> tags and no other text. Your reply must be valid and chemically reasonable.""",
+}
+
+name_dict = {
+    'I2F': 'name_conversion-i2f',
+    'I2S': 'name_conversion-i2s',
+    'S2F': 'name_conversion-s2f',
+    'S2I': 'name_conversion-s2i',
+}
+
+nc_0shot_instruct_datasets = []
+for _name in name_dict:
+    _hint = nc_hint_dict[_name]
+    nc_0shot_infer_cfg = dict(
+        prompt_template=dict(
+            type=PromptTemplate,
+            template=dict(round=[
+                dict(role='HUMAN', prompt=f'{_hint}\nQuestion: {{input}}\nAnswer: '),
+                dict(role='BOT', prompt='{output}\n')
+            ]),
+            # template=f'<s>[INST] {{input}} [/INST]',
+        ),
+        retriever=dict(type=ZeroRetriever),
+        inferencer=dict(type=GenInferencer),
+    )
+    if _name in ['I2F', 'S2F']:
+        nc_0shot_eval_cfg = dict(
+            evaluator=dict(type=NCElementMatchEvaluator),
+        )
+    else:
+        nc_0shot_eval_cfg = dict(
+            evaluator=dict(type=NCExactMatchEvaluator),
+        )
+
+    nc_0shot_instruct_datasets.append(
+        dict(
+            abbr=f'NC-{_name}-0shot-instruct',
+            type=SmolInstructDataset,
+            path='osunlp/SMolInstruct',
+            name=name_dict[_name],
+            reader_cfg=nc_0shot_reader_cfg,
+            infer_cfg=nc_0shot_infer_cfg,
+            eval_cfg=nc_0shot_eval_cfg,
+        ))
+
+del _name
--- a/opencompass/configs/datasets/SmolInstruct/smolinstruct_nc_gen_c84c18.py
+++ b/opencompass/configs/datasets/SmolInstruct/smolinstruct_nc_gen_c84c18.py
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever, FixKRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.datasets.smolinstruct import NCExactMatchEvaluator, NCElementMatchEvaluator
+from opencompass.datasets import SmolInstructDataset
+from opencompass.utils.text_postprocessors import first_capital_postprocess
+
+nc_reader_cfg = dict(
+    input_columns=['input'],
+    output_column='output',
+    train_split='validation')
+
+nc_hint_dict = {
+    'I2F': """You are an expert chemist. Given the IUPAC representation of compounds, your task is to predict the molecular formula of the compound.
+    The input contains the IUPAC representation of the compound. Your reply should contain only the molecular formula of the compound wrapped in <MOLFORMULA> and </MOLFORMULA> tags and no other text. Your reply must be valid and chemically reasonable.""",
+    'I2S': """You are an expert chemist. Given the IUPAC representation of compounds, your task is to predict the SMILES representation of the compound.
+    The input contains the IUPAC representation of the compound. Your reply should contain only the SMILES representation of the compound wrapped in <SMILES> and </SMILES> tags and no other text. Your reply must be valid and chemically reasonable.""",
+    'S2F': """You are an expert chemist. Given the SMILES representation of compounds, your task is to predict the molecular formula of the compound.
+    The input contains the SMILES representation of the compound. Your reply should contain only the molecular formula of the compound wrapped in <MOLFORMULA> and </MOLFORMULA> tags and no other text. Your reply must be valid and chemically reasonable.""",
+    'S2I': """You are an expert chemist. Given the SMILES representation of compounds, your task is to predict the IUPAC representation of the compound.
+    The input contains the SMILES representation of the compound. Your reply should contain only the IUPAC representation of the compound wrapped in <IUPAC> and </IUPAC> tags and no other text. Your reply must be valid and chemically reasonable.""",
+}
+
+name_dict = {
+    'I2F': 'name_conversion-i2f',
+    'I2S': 'name_conversion-i2s',
+    'S2F': 'name_conversion-s2f',
+    'S2I': 'name_conversion-s2i',
+}
+
+nc_datasets = []
+for _name in nc_hint_dict:
+    _hint = nc_hint_dict[_name]
+    nc_infer_cfg = dict(
+        ice_template=dict(
+            type=PromptTemplate,
+            template=dict(round=[
+                dict(
+                    role='HUMAN',
+                    prompt=f'{_hint}\nQuestion: {{input}}\nAnswer: '
+                ),
+                dict(role='BOT', prompt='{output}\n')
+            ]),
+        ),
+        prompt_template=dict(
+            type=PromptTemplate,
+            template=dict(
+                begin='</E>',
+                round=[
+                    dict(
+                        role='HUMAN',
+                        prompt=f'{_hint}\nQuestion: {{input}}\nAnswer: '
+                    ),
+                ],
+            ),
+            ice_token='</E>',
+        ),
+        retriever=dict(type=FixKRetriever, fix_id_list=[0]),
+        inferencer=dict(type=GenInferencer),
+    )
+    # nc_infer_cfg = dict(
+    #     prompt_template=dict(
+    #         type=PromptTemplate,
+    #         template=dict(
+    #             round=[
+    #                 dict(role='HUMAN', prompt=f'{_hint}\nQuestion: {{input}}\nAnswer: '),
+    #             ],
+    #         ),
+    #     ),
+    #     retriever=dict(type=ZeroRetriever),
+    #     inferencer=dict(type=GenInferencer),
+    # )
+    if _name in ['I2F', 'S2F']:
+        nc_eval_cfg = dict(
+            evaluator=dict(type=NCElementMatchEvaluator),
+        )
+    else:
+        nc_eval_cfg = dict(
+            evaluator=dict(type=NCExactMatchEvaluator),
+        )
+
+    nc_datasets.append(
+        dict(
+            abbr=f'NC-{_name}',
+            type=SmolInstructDataset,
+            path='osunlp/SMolInstruct',
+            name=name_dict[_name],
+            reader_cfg=nc_reader_cfg,
+            infer_cfg=nc_infer_cfg,
+            eval_cfg=nc_eval_cfg,
+        ))
+
+del _name, _hint
--- a/opencompass/configs/datasets/SmolInstruct/smolinstruct_pp_acc_0_shot_instruct.py
+++ b/opencompass/configs/datasets/SmolInstruct/smolinstruct_pp_acc_0_shot_instruct.py
+from opencompass.openicl import AccEvaluator
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever, FixKRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.datasets import SmolInstructDataset
+from opencompass.datasets.smolinstruct import smolinstruct_acc_0shot_postprocess
+
+pp_acc_reader_cfg = dict(
+    input_columns=['input'],
+    output_column='output',
+    train_split='validation')
+
+pp_acc_hint_dict = {
+    'BBBP': """You are an expert chemist. Given the smiles representation of the compound, your task is to predict whether blood-brain barrier permeability (BBBP) is a property of the compound.
+    The input contains the compound. Your reply should only contain Yes or No. Your reply must be valid and chemically reasonable.""",
+    'ClinTox': """You are an expert chemist. Given the smiles representation of the compound, your task is to predict whether the compound is toxic.
+    The input contains the compound. Your reply should contain only Yes or No. Your reply must be valid and chemically reasonable.""",
+    'HIV': """You are an expert chemist. Given the smiles representation of the compound, your task is to predict whether the compound serve as an inhibitor of HIV replication.
+    The input contains the compound. Your reply should contain only Yes or No. Your reply must be valid and chemically reasonable.""",
+    'SIDER': """You are an expert chemist. Given the smiles representation of the compound, your task is to predict whether the compound has any side effects.
+    The input contains the compound. Your reply should contain only Yes or No. Your reply must be valid and chemically reasonable.""",
+}
+
+name_dict = {
+    'BBBP': 'property_prediction-bbbp',
+    'ClinTox': 'property_prediction-clintox',
+    'HIV': 'property_prediction-hiv',
+    'SIDER': 'property_prediction-sider',
+}
+
+pp_acc_datasets_0shot_instruct = []
+for _name in pp_acc_hint_dict:
+    _hint = pp_acc_hint_dict[_name]
+
+    pp_acc_infer_cfg = dict(
+        prompt_template=dict(
+            type=PromptTemplate,
+            template=dict(round=[
+                dict(role='HUMAN', prompt=f'{_hint}\nQuestion: {{input}}\nAnswer: '),
+                dict(role='BOT', prompt='{output}\n')
+            ]),
+            # template=f'<s>[INST] {{input}} [/INST]',
+        ),
+        retriever=dict(type=ZeroRetriever),
+        inferencer=dict(type=GenInferencer),
+    )
+
+    pp_acc_eval_cfg = dict(
+        evaluator=dict(type=AccEvaluator),
+        pred_postprocessor=dict(type=smolinstruct_acc_0shot_postprocess)
+    )
+
+    pp_acc_datasets_0shot_instruct.append(
+        dict(
+            abbr=f'PP-{_name}-0shot-instruct',
+            type=SmolInstructDataset,
+            path='osunlp/SMolInstruct',
+            name=name_dict[_name],
+            reader_cfg=pp_acc_reader_cfg,
+            infer_cfg=pp_acc_infer_cfg,
+            eval_cfg=pp_acc_eval_cfg,
+        ))
+
+del _name, _hint
--- a/opencompass/configs/datasets/SmolInstruct/smolinstruct_pp_acc_gen_8607a3.py
+++ b/opencompass/configs/datasets/SmolInstruct/smolinstruct_pp_acc_gen_8607a3.py
+from opencompass.openicl import AccEvaluator
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever, FixKRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.datasets import SmolInstructDataset
+from opencompass.datasets.smolinstruct import smolinstruct_acc_postprocess
+
+pp_acc_reader_cfg = dict(
+    input_columns=['input'],
+    output_column='output',
+    train_split='validation')
+
+pp_acc_hint_dict = {
+    'BBBP': """You are an expert chemist. Given the smiles representation of the compound, your task is to predict whether blood-brain barrier permeability (BBBP) is a property of the compound.
+    The input contains the compound. Your reply should contain only Yes or No. Your reply must be valid and chemically reasonable.""",
+    'ClinTox': """You are an expert chemist. Given the smiles representation of the compound, your task is to predict whether the compound is toxic.
+    The input contains the compound. Your reply should contain only Yes or No. Your reply must be valid and chemically reasonable.""",
+    'HIV': """You are an expert chemist. Given the smiles representation of the compound, your task is to predict whether the compound serve as an inhibitor of HIV replication.
+    The input contains the compound. Your reply should contain only Yes or No. Your reply must be valid and chemically reasonable.""",
+    'SIDER': """You are an expert chemist. Given the smiles representation of the compound, your task is to predict whether the compound has any side effects.
+    The input contains the compound. Your reply should contain only Yes or No. Your reply must be valid and chemically reasonable.""",
+}
+
+name_dict = {
+    'BBBP': 'property_prediction-bbbp',
+    'ClinTox': 'property_prediction-clintox',
+    'HIV': 'property_prediction-hiv',
+    'SIDER': 'property_prediction-sider',
+}
+
+pp_acc_datasets = []
+for _name in pp_acc_hint_dict:
+    _hint = pp_acc_hint_dict[_name]
+
+    pp_acc_infer_cfg = dict(
+        ice_template=dict(
+            type=PromptTemplate,
+            template=dict(round=[
+                dict(
+                    role='HUMAN',
+                    prompt=f'{_hint}\nQuestion: {{input}}\nAnswer: '
+                ),
+                dict(role='BOT', prompt='{output}\n')
+            ]),
+        ),
+        prompt_template=dict(
+            type=PromptTemplate,
+            template=dict(
+                begin='</E>',
+                round=[
+                    dict(
+                        role='HUMAN',
+                        prompt=f'{_hint}\nQuestion: {{input}}\nAnswer: '
+                    ),
+                ],
+            ),
+            ice_token='</E>',
+        ),
+        retriever=dict(type=FixKRetriever, fix_id_list=[0]),
+        inferencer=dict(type=GenInferencer),
+    )
+
+    pp_acc_eval_cfg = dict(
+        evaluator=dict(type=AccEvaluator),
+        pred_postprocessor=dict(type=smolinstruct_acc_postprocess)
+    )
+
+    pp_acc_datasets.append(
+        dict(
+            abbr=f'PP-{_name}',
+            type=SmolInstructDataset,
+            path='osunlp/SMolInstruct',
+            name=name_dict[_name],
+            reader_cfg=pp_acc_reader_cfg,
+            infer_cfg=pp_acc_infer_cfg,
+            eval_cfg=pp_acc_eval_cfg,
+        ))
+
+del _name, _hint
--- a/opencompass/configs/datasets/SmolInstruct/smolinstruct_rmse_0shot_instruct.py
+++ b/opencompass/configs/datasets/SmolInstruct/smolinstruct_rmse_0shot_instruct.py
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever, FixKRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.datasets.smolinstruct import RMSEEvaluator
+from opencompass.datasets import SmolInstructDataset
+
+pp_rmse_0shot_reader_cfg = dict(
+    input_columns=['input'],
+    output_column='output',
+    train_split='validation')
+
+pp_rmse_hint_dict = {
+    'ESOL': """You are an expert chemist. Given the SMILES representation of compounds, your task is to predict the log solubility of the compound.
+    The input contains the SMILES representation of the compound. Your reply should contain the log solubility of the compound wrapped in \\boxed{}. Your reply must be valid and chemically reasonable.""",
+    'Lipo': """You are an expert chemist. Given the SMILES representation of compounds, your task is to predict the octanol/water partition coefficient of the compound.
+    The input contains the SMILES representation of the compound. Your reply should contain the octanol/water partition coefficient of the compound wrapped in \\boxed{}. Your reply must be valid and chemically reasonable."""
+}
+
+name_dict = {
+    'ESOL': 'property_prediction-esol',
+    'Lipo': 'property_prediction-lipo'
+}
+
+pp_rmse_0shot_instruct_datasets = []
+for _name in name_dict:
+    _hint = pp_rmse_hint_dict[_name]
+    pp_rmse_0shot_infer_cfg = dict(
+        prompt_template=dict(
+            type=PromptTemplate,
+            template=dict(round=[
+                dict(role='HUMAN', prompt=f'{_hint}\nQuestion: {{input}}\nAnswer: '),
+                dict(role='BOT', prompt='{output}\n')
+            ]),
+            # template=f'<s>[INST] {{input}} [/INST]',
+        ),
+        retriever=dict(type=ZeroRetriever),
+        inferencer=dict(type=GenInferencer),
+    )
+
+    pp_rmse_0shot_eval_cfg = dict(
+        evaluator=dict(type=RMSEEvaluator),
+    )
+
+    pp_rmse_0shot_instruct_datasets.append(
+        dict(
+            abbr=f'PP-{_name}-0shot-instruct',
+            type=SmolInstructDataset,
+            path='osunlp/SMolInstruct',
+            name=name_dict[_name],
+            reader_cfg=pp_rmse_0shot_reader_cfg,
+            infer_cfg=pp_rmse_0shot_infer_cfg,
+            eval_cfg=pp_rmse_0shot_eval_cfg,
+        ))
+
+del _name
\ No newline at end of file
--- a/opencompass/configs/datasets/SmolInstruct/smolinstruct_rmse_gen_0fcc6b.py
+++ b/opencompass/configs/datasets/SmolInstruct/smolinstruct_rmse_gen_0fcc6b.py
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever, FixKRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.datasets.smolinstruct import RMSEEvaluator
+from opencompass.datasets import SmolInstructDataset
+
+pp_rmse_reader_cfg = dict(
+    input_columns=['input'],
+    output_column='output',
+    train_split='validation')
+
+pp_rmse_hint_dict = {
+    'ESOL': """You are an expert chemist. Given the SMILES representation of compounds, your task is to predict the log solubility of the compound.
+    The input contains the SMILES representation of the compound. Your reply should contain the log solubility of the compound wrapped in <NUMBER> and </NUMBER> tags. Your reply must be valid and chemically reasonable.""",
+    'Lipo': """You are an expert chemist. Given the SMILES representation of compounds, your task is to predict the octanol/water partition coefficient of the compound.
+    The input contains the SMILES representation of the compound. Your reply should contain the octanol/water partition coefficient of the compound wrapped in <NUMBER> and </NUMBER> tags. Your reply must be valid and chemically reasonable."""
+}
+
+name_dict = {
+    'ESOL': 'property_prediction-esol',
+    'Lipo': 'property_prediction-lipo'
+}
+
+pp_rmse_datasets = []
+for _name in pp_rmse_hint_dict:
+    _hint = pp_rmse_hint_dict[_name]
+    pp_rmse_infer_cfg = dict(
+        ice_template=dict(
+            type=PromptTemplate,
+            template=dict(round=[
+                dict(
+                    role='HUMAN',
+                    prompt=f'{_hint}\nQuestion: {{input}}\nAnswer: '
+                ),
+                dict(role='BOT', prompt='{output}\n')
+            ]),
+        ),
+        prompt_template=dict(
+            type=PromptTemplate,
+            template=dict(
+                begin='</E>',
+                round=[
+                    dict(
+                        role='HUMAN',
+                        prompt=f'{_hint}\nQuestion: {{input}}\nAnswer: '
+                    ),
+                ],
+            ),
+            ice_token='</E>',
+        ),
+        retriever=dict(type=FixKRetriever, fix_id_list=[0]),
+        inferencer=dict(type=GenInferencer),
+    )
+
+    pp_rmse_eval_cfg = dict(
+        evaluator=dict(type=RMSEEvaluator),
+    )
+
+    pp_rmse_datasets.append(
+        dict(
+            abbr=f'PP-{_name}',
+            type=SmolInstructDataset,
+            path='osunlp/SMolInstruct',
+            name=name_dict[_name],
+            reader_cfg=pp_rmse_reader_cfg,
+            infer_cfg=pp_rmse_infer_cfg,
+            eval_cfg=pp_rmse_eval_cfg,
+        ))
+
+del _name, _hint
\ No newline at end of file
--- a/opencompass/configs/datasets/SuperGLUE_AX_b/SuperGLUE_AX_b_gen.py
+++ b/opencompass/configs/datasets/SuperGLUE_AX_b/SuperGLUE_AX_b_gen.py
+from mmengine.config import read_base
+
+with read_base():
+    from .SuperGLUE_AX_b_gen_4dfefa import AX_b_datasets  # noqa: F401, F403
--- a/opencompass/configs/datasets/SuperGLUE_AX_b/SuperGLUE_AX_b_gen_4dfefa.py
+++ b/opencompass/configs/datasets/SuperGLUE_AX_b/SuperGLUE_AX_b_gen_4dfefa.py
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import AXDatasetV2
+from opencompass.utils.text_postprocessors import first_option_postprocess
+
+AX_b_reader_cfg = dict(
+    input_columns=['sentence1', 'sentence2'],
+    output_column='label',
+)
+
+AX_b_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template=dict(round=[
+            dict(
+                role='HUMAN',
+                prompt=
+                '{sentence1}\n{sentence2}\nIs the sentence below entailed by the sentence above?\nA. Yes\nB. No\nAnswer:'
+            ),
+        ]),
+    ),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer),
+)
+
+AX_b_eval_cfg = dict(
+    evaluator=dict(type=AccEvaluator),
+    pred_role='BOT',
+    pred_postprocessor=dict(type=first_option_postprocess, options='AB'),
+)
+
+AX_b_datasets = [
+    dict(
+        abbr='AX_b',
+        type=AXDatasetV2,
+        path='./data/SuperGLUE/AX-b/AX-b.jsonl',
+        reader_cfg=AX_b_reader_cfg,
+        infer_cfg=AX_b_infer_cfg,
+        eval_cfg=AX_b_eval_cfg,
+    )
+]
--- a/opencompass/configs/datasets/SuperGLUE_AX_b/SuperGLUE_AX_b_ppl.py
+++ b/opencompass/configs/datasets/SuperGLUE_AX_b/SuperGLUE_AX_b_ppl.py
+from mmengine.config import read_base
+
+with read_base():
+    from .SuperGLUE_AX_b_ppl_6db806 import AX_b_datasets  # noqa: F401, F403
--- a/opencompass/configs/datasets/SuperGLUE_AX_b/SuperGLUE_AX_b_ppl_0748aa.py
+++ b/opencompass/configs/datasets/SuperGLUE_AX_b/SuperGLUE_AX_b_ppl_0748aa.py
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import PPLInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import HFDataset
+
+AX_b_reader_cfg = dict(
+    input_columns=['sentence1', 'sentence2'],
+    output_column='label',
+    test_split='train')
+
+AX_b_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template={
+            'entailment': '{sentence1}?entailment, {sentence2}',
+            'not_entailment': '{sentence1}?not_entailment, {sentence2}'
+        }),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=PPLInferencer))
+
+AX_b_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
+
+AX_b_datasets = [
+    dict(
+        type=HFDataset,
+        abbr='AX_b',
+        path='json',
+        data_files='./data/SuperGLUE/AX-b/AX-b.jsonl',
+        split='train',
+        reader_cfg=AX_b_reader_cfg,
+        infer_cfg=AX_b_infer_cfg,
+        eval_cfg=AX_b_eval_cfg)
+]
--- a/opencompass/configs/datasets/SuperGLUE_AX_b/SuperGLUE_AX_b_ppl_6db806.py
+++ b/opencompass/configs/datasets/SuperGLUE_AX_b/SuperGLUE_AX_b_ppl_6db806.py
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import PPLInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import HFDataset
+
+AX_b_reader_cfg = dict(
+    input_columns=['sentence1', 'sentence2'],
+    output_column='label',
+    test_split='train')
+
+AX_b_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template={
+            'entailment':
+            dict(round=[
+                dict(
+                    role='HUMAN',
+                    prompt=
+                    '{sentence1}\n{sentence2}\nIs the sentence below entailed by the sentence above?'
+                ),
+                dict(role='BOT', prompt='Yes'),
+            ]),
+            'not_entailment':
+            dict(round=[
+                dict(
+                    role='HUMAN',
+                    prompt=
+                    '{sentence1}\n{sentence2}\nIs the sentence below entailed by the sentence above?'
+                ),
+                dict(role='BOT', prompt='No'),
+            ])
+        },
+    ),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=PPLInferencer),
+)
+
+AX_b_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
+
+AX_b_datasets = [
+    dict(
+        type=HFDataset,
+        abbr='AX_b',
+        path='json',
+        data_files='./data/SuperGLUE/AX-b/AX-b.jsonl',
+        split='train',
+        reader_cfg=AX_b_reader_cfg,
+        infer_cfg=AX_b_infer_cfg,
+        eval_cfg=AX_b_eval_cfg,
+    )
+]
--- a/opencompass/configs/datasets/SuperGLUE_AX_g/SuperGLUE_AX_g_gen.py
+++ b/opencompass/configs/datasets/SuperGLUE_AX_g/SuperGLUE_AX_g_gen.py
+from mmengine.config import read_base
+
+with read_base():
+    from .SuperGLUE_AX_g_gen_68aac7 import AX_g_datasets  # noqa: F401, F403
--- a/opencompass/configs/datasets/SuperGLUE_AX_g/SuperGLUE_AX_g_gen_68aac7.py
+++ b/opencompass/configs/datasets/SuperGLUE_AX_g/SuperGLUE_AX_g_gen_68aac7.py
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import AXDatasetV2
+from opencompass.utils.text_postprocessors import first_option_postprocess
+
+AX_g_reader_cfg = dict(
+    input_columns=['hypothesis', 'premise'],
+    output_column='label',
+)
+
+AX_g_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template=dict(round=[
+            dict(
+                role='HUMAN',
+                prompt=
+                '{premise}\n{hypothesis}\nIs the sentence below entailed by the sentence above?\nA. Yes\nB. No\nAnswer:'
+            ),
+        ]),
+    ),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer),
+)
+
+AX_g_eval_cfg = dict(
+    evaluator=dict(type=AccEvaluator),
+    pred_role='BOT',
+    pred_postprocessor=dict(type=first_option_postprocess, options='AB'),
+)
+
+AX_g_datasets = [
+    dict(
+        abbr='AX_g',
+        type=AXDatasetV2,
+        path='./data/SuperGLUE/AX-g/AX-g.jsonl',
+        reader_cfg=AX_g_reader_cfg,
+        infer_cfg=AX_g_infer_cfg,
+        eval_cfg=AX_g_eval_cfg,
+    )
+]
--- a/opencompass/configs/datasets/SuperGLUE_AX_g/SuperGLUE_AX_g_ppl.py
+++ b/opencompass/configs/datasets/SuperGLUE_AX_g/SuperGLUE_AX_g_ppl.py
+from mmengine.config import read_base
+
+with read_base():
+    from .SuperGLUE_AX_g_ppl_66caf3 import AX_g_datasets  # noqa: F401, F403
--- a/opencompass/configs/datasets/SuperGLUE_AX_g/SuperGLUE_AX_g_ppl_50f8f6.py
+++ b/opencompass/configs/datasets/SuperGLUE_AX_g/SuperGLUE_AX_g_ppl_50f8f6.py
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import PPLInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import HFDataset
+
+AX_g_reader_cfg = dict(
+    input_columns=['hypothesis', 'premise'],
+    output_column='label',
+    test_split='train')
+
+AX_g_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template={
+            'entailment': '{premise}?entailment, {hypothesis}',
+            'not_entailment': '{premise}?not_entailment, {hypothesis}'
+        }),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=PPLInferencer))
+
+AX_g_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
+
+AX_g_datasets = [
+    dict(
+        type=HFDataset,
+        abbr='AX_g',
+        path='json',
+        data_files='./data/SuperGLUE/AX-g/AX-g.jsonl',
+        split='train',
+        reader_cfg=AX_g_reader_cfg,
+        infer_cfg=AX_g_infer_cfg,
+        eval_cfg=AX_g_eval_cfg)
+]
--- a/opencompass/configs/datasets/SuperGLUE_AX_g/SuperGLUE_AX_g_ppl_66caf3.py
+++ b/opencompass/configs/datasets/SuperGLUE_AX_g/SuperGLUE_AX_g_ppl_66caf3.py
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import PPLInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import HFDataset
+
+AX_g_reader_cfg = dict(
+    input_columns=['hypothesis', 'premise'],
+    output_column='label',
+    test_split='train')
+
+AX_g_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template={
+            'entailment':
+            dict(round=[
+                dict(
+                    role='HUMAN',
+                    prompt=
+                    '{premise}\n{hypothesis}\nIs the sentence below entailed by the sentence above?'
+                ),
+                dict(role='BOT', prompt='Yes'),
+            ]),
+            'not_entailment':
+            dict(round=[
+                dict(
+                    role='HUMAN',
+                    prompt=
+                    '{premise}\n{hypothesis}\nIs the sentence below entailed by the sentence above?'
+                ),
+                dict(role='BOT', prompt='No'),
+            ])
+        },
+    ),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=PPLInferencer),
+)
+
+AX_g_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
+
+AX_g_datasets = [
+    dict(
+        type=HFDataset,
+        abbr='AX_g',
+        path='json',
+        data_files='./data/SuperGLUE/AX-g/AX-g.jsonl',
+        split='train',
+        reader_cfg=AX_g_reader_cfg,
+        infer_cfg=AX_g_infer_cfg,
+        eval_cfg=AX_g_eval_cfg,
+    )
+]
--- a/opencompass/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_cot_gen_1d56df.py
+++ b/opencompass/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_cot_gen_1d56df.py
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import BoolQDatasetV2
+from opencompass.utils.text_postprocessors import (
+    first_option_postprocess,
+)
+
+QUERY_TEMPLATE = """
+Answer the following question. The last line of your response should be of the following format: 'ANSWER: $LETTER' (without quotes) where LETTER is one of AB. Think step by step before answering.
+
+Passage: {passage}
+
+Question: {question}
+
+A. Yes
+B. NO
+
+""".strip()
+
+BoolQ_reader_cfg = dict(
+    input_columns=['question', 'passage'],
+    output_column='label',
+)
+
+BoolQ_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template=dict(
+            round=[
+                dict(role='HUMAN', prompt=QUERY_TEMPLATE),
+            ]
+        ),
+    ),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer),
+)
+
+BoolQ_eval_cfg = dict(
+    evaluator=dict(type=AccEvaluator),
+    pred_role='BOT',
+    pred_postprocessor=dict(type=first_option_postprocess, options='AB'),
+)
+
+BoolQ_datasets = [
+    dict(
+        abbr='BoolQ',
+        type=BoolQDatasetV2,
+        path='opencompass/boolq',
+        reader_cfg=BoolQ_reader_cfg,
+        infer_cfg=BoolQ_infer_cfg,
+        eval_cfg=BoolQ_eval_cfg,
+    )
+]
--- a/opencompass/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_few_shot_gen_ba58ea.py
+++ b/opencompass/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_few_shot_gen_ba58ea.py
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever, FixKRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import BoolQDatasetV2
+from opencompass.utils.text_postprocessors import first_capital_postprocess
+
+BoolQ_reader_cfg = dict(
+    input_columns=['question', 'passage'],
+    output_column='label',
+)
+
+BoolQ_infer_cfg = dict(
+    ice_template=dict(
+        type=PromptTemplate,
+        template=dict(
+            begin='</E>',
+            round=[
+                dict(
+                    role='HUMAN',
+                    prompt='{passage}\nQuestion: {question}\nA. Yes\nB. No\nAnswer:',
+                ),
+                dict(role='BOT', prompt='{label}'),
+            ],
+        ),
+        ice_token='</E>',
+    ),
+    retriever=dict(type=FixKRetriever, fix_id_list=[0, 2, 4, 6, 8]),
+    inferencer=dict(type=GenInferencer, max_out_len=50),
+)
+
+BoolQ_eval_cfg = dict(
+    evaluator=dict(type=AccEvaluator),
+    pred_role='BOT',
+    pred_postprocessor=dict(type=first_capital_postprocess),
+)
+
+BoolQ_datasets = [
+    dict(
+        abbr='BoolQ',
+        type=BoolQDatasetV2,
+        path='opencompass/boolq',
+        reader_cfg=BoolQ_reader_cfg,
+        infer_cfg=BoolQ_infer_cfg,
+        eval_cfg=BoolQ_eval_cfg,
+    )
+]
--- a/opencompass/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_few_shot_ppl.py
+++ b/opencompass/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_few_shot_ppl.py
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever, FixKRetriever
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import BoolQDatasetV2
+from opencompass.openicl.icl_inferencer import PPLInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+
+BoolQ_reader_cfg = dict(
+    input_columns=['question', 'passage'],
+    output_column='label',
+)
+
+BoolQ_infer_cfg = dict(
+    ice_template=dict(
+        type=PromptTemplate,
+        template={
+            'B': dict(
+                round=[
+                    dict(role='HUMAN', prompt='{passage}\nQuestion: {question}?'),
+                    dict(role='BOT', prompt='No'),
+                ]
+            ),
+            'A': dict(
+                round=[
+                    dict(role='HUMAN', prompt='{passage}\nQuestion: {question}?'),
+                    dict(role='BOT', prompt='Yes'),
+                ]
+            ),
+        },
+        ice_token='</E>',
+    ),
+    retriever=dict(type=FixKRetriever, fix_id_list=[0, 2, 4, 6, 8]),
+    inferencer=dict(type=PPLInferencer, max_out_len=50),
+)
+
+BoolQ_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
+
+BoolQ_datasets = [
+    dict(
+        abbr='BoolQ',
+        type=BoolQDatasetV2,
+        path='opencompass/boolq',
+        reader_cfg=BoolQ_reader_cfg,
+        infer_cfg=BoolQ_infer_cfg,
+        eval_cfg=BoolQ_eval_cfg,
+    )
+]