Initial commit

c289ecc0 · xinghao · c289ecc0 · c289ecc0 · c289ecc0 · c289ecc0
Commit c289ecc0 authored Oct 21, 2025 by xinghao
20 changed files
--- a/opencompass/configs/datasets/cmo_fib/cmo_fib_gen.py
+++ b/opencompass/configs/datasets/cmo_fib/cmo_fib_gen.py
+from mmengine.config import read_base
+
+with read_base():
+    from .cmo_fib_gen_ace24b import cmo_fib_datasets  # noqa: F401, F403
\ No newline at end of file
--- a/opencompass/configs/datasets/cmo_fib/cmo_fib_gen_2783e5.py
+++ b/opencompass/configs/datasets/cmo_fib/cmo_fib_gen_2783e5.py
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.datasets import CMOFibDataset, MATHEvaluator, math_postprocess_v2
+
+
+cmo_fib_reader_cfg = dict(
+    input_columns=['question'], 
+    output_column='answer'
+)
+
+
+cmo_fib_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template=dict(
+            round=[
+                dict(role='HUMAN', prompt='{question}\n请一步一步地推理，并将最终答案写入\\boxed{}.'),
+            ],
+        )
+    ),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer)
+)
+
+cmo_fib_eval_cfg = dict(
+    evaluator=dict(type=MATHEvaluator, version='v2'), pred_postprocessor=dict(type=math_postprocess_v2)
+)
+
+cmo_fib_datasets = [
+    dict(
+        abbr='cmo_fib',
+        type=CMOFibDataset,
+        path='opencompass/cmo_fib',
+        reader_cfg=cmo_fib_reader_cfg,
+        infer_cfg=cmo_fib_infer_cfg,
+        eval_cfg=cmo_fib_eval_cfg
+    )
+]
\ No newline at end of file
--- a/opencompass/configs/datasets/cmo_fib/cmo_fib_gen_ace24b.py
+++ b/opencompass/configs/datasets/cmo_fib/cmo_fib_gen_ace24b.py
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.datasets import CMOFibDataset, MATHEvaluator, math_postprocess_v2
+
+
+cmo_fib_reader_cfg = dict(
+    input_columns=['question'], 
+    output_column='answer'
+)
+
+
+cmo_fib_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template=dict(
+            round=[
+                dict(role='HUMAN', prompt='{question}\n请一步一步地推理，并将最终答案写入\\boxed{}.'),
+            ],
+        )
+    ),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer, max_out_len=2048)
+)
+
+cmo_fib_eval_cfg = dict(
+    evaluator=dict(type=MATHEvaluator, version='v2'), pred_postprocessor=dict(type=math_postprocess_v2)
+)
+
+cmo_fib_datasets = [
+    dict(
+        abbr='cmo_fib',
+        type=CMOFibDataset,
+        path='opencompass/cmo_fib',
+        reader_cfg=cmo_fib_reader_cfg,
+        infer_cfg=cmo_fib_infer_cfg,
+        eval_cfg=cmo_fib_eval_cfg
+    )
+]
\ No newline at end of file
--- a/opencompass/configs/datasets/codecompass/codecompass_gen_079a6c.py
+++ b/opencompass/configs/datasets/codecompass/codecompass_gen_079a6c.py
+
+
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.datasets import CodeCompassCodeGenerationDataset
+
+
+# Reader Config
+codecompass_reader_cfg = dict(
+    input_columns=['prompt'],
+    output_column='metadata',
+    train_split='test'
+)
+
+# Inference Config
+codecompass_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template=dict(round=[dict(role='HUMAN', prompt='{prompt}')])
+    ),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer, max_out_len=2048)
+)
+
+codecompass_eval_cfg = dict(
+    evaluator=dict(
+        type='CodeCompassEvaluator',
+        num_process_evaluate=16,
+        timeout=15,
+        k_list=[1],
+        dataset_path='opencompass/CodeCompass'
+    ),
+    pred_role='BOT',
+)
+
+codecompass_datasets = [ 
+    dict(
+        type=CodeCompassCodeGenerationDataset,
+        abbr='codecompass_gen_cpp',
+        path='opencompass/CodeCompass', 
+        reader_cfg=codecompass_reader_cfg,
+        infer_cfg=codecompass_infer_cfg,
+        eval_cfg=codecompass_eval_cfg
+    )
+]
\ No newline at end of file
--- a/opencompass/configs/datasets/collections/base_core.py
+++ b/opencompass/configs/datasets/collections/base_core.py
+from mmengine.config import read_base
+
+with read_base():
+    from ..mmlu.mmlu_ppl_ac766d import mmlu_datasets
+    from ..cmmlu.cmmlu_ppl_041cbf import cmmlu_datasets
+    from ..ceval.ceval_ppl_1cd8bf import ceval_datasets
+    from ..GaokaoBench.GaokaoBench_no_subjective_gen_d21e37 import GaokaoBench_datasets
+    from ..triviaqa.triviaqa_wiki_1shot_gen_20a989 import triviaqa_datasets
+    from ..nq.nq_open_1shot_gen_20a989 import nq_datasets
+    from ..race.race_ppl_abed12 import race_datasets
+    from ..winogrande.winogrande_5shot_ll_252f01 import winogrande_datasets
+    from ..hellaswag.hellaswag_10shot_ppl_59c85e import hellaswag_datasets
+    from ..bbh.bbh_gen_98fba6 import bbh_datasets
+    from ..gsm8k.gsm8k_gen_ee684f import gsm8k_datasets
+    from ..math.math_evaluatorv2_gen_2f4a71 import math_datasets
+    from ..TheoremQA.TheoremQA_post_v2_gen_2c2583 import TheoremQA_datasets
+    from ..humaneval.deprecated_humaneval_gen_d2537e import humaneval_datasets
+    from ..mbpp.deprecated_sanitized_mbpp_gen_cb43ef import sanitized_mbpp_datasets
+
+datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
--- a/opencompass/configs/datasets/collections/base_medium.py
+++ b/opencompass/configs/datasets/collections/base_medium.py
+from mmengine.config import read_base
+
+with read_base():
+    from ..mmlu.mmlu_ppl_ac766d import mmlu_datasets
+    from ..ceval.ceval_ppl_578f8d import ceval_datasets
+    from ..agieval.agieval_mixed_713d14 import agieval_datasets
+    from ..GaokaoBench.GaokaoBench_mixed_9af5ee import GaokaoBench_datasets
+    from ..bbh.bbh_gen_5b92b0 import bbh_datasets
+    from ..humaneval.humaneval_gen_8e312c import humaneval_datasets
+    from ..mbpp.deprecated_mbpp_gen_1e1056 import mbpp_datasets
+    from ..CLUE_C3.CLUE_C3_ppl_e24a31 import C3_datasets
+    from ..CLUE_CMRC.CLUE_CMRC_gen_1bd3c8 import CMRC_datasets
+    from ..CLUE_DRCD.CLUE_DRCD_gen_1bd3c8 import DRCD_datasets
+    from ..CLUE_afqmc.CLUE_afqmc_ppl_6507d7 import afqmc_datasets
+    from ..CLUE_cmnli.CLUE_cmnli_ppl_fdc6de import cmnli_datasets
+    from ..CLUE_ocnli.CLUE_ocnli_ppl_fdc6de import ocnli_datasets
+    from ..FewCLUE_bustm.FewCLUE_bustm_ppl_e53034 import bustm_datasets
+    from ..FewCLUE_chid.FewCLUE_chid_ppl_8f2872 import chid_datasets
+    from ..FewCLUE_cluewsc.FewCLUE_cluewsc_ppl_4284a0 import cluewsc_datasets
+    from ..FewCLUE_csl.FewCLUE_csl_ppl_841b62 import csl_datasets
+    from ..FewCLUE_eprstmt.FewCLUE_eprstmt_ppl_f1e631 import eprstmt_datasets
+    from ..FewCLUE_ocnli_fc.FewCLUE_ocnli_fc_ppl_c08300 import ocnli_fc_datasets
+    from ..FewCLUE_tnews.FewCLUE_tnews_ppl_d10e8a import tnews_datasets
+    from ..lcsts.lcsts_gen_8ee1fe import lcsts_datasets
+    from ..lambada.lambada_gen_217e11 import lambada_datasets
+    from ..storycloze.storycloze_ppl_496661 import storycloze_datasets
+    from ..SuperGLUE_AX_b.SuperGLUE_AX_b_ppl_6db806 import AX_b_datasets
+    from ..SuperGLUE_AX_g.SuperGLUE_AX_g_ppl_66caf3 import AX_g_datasets
+    from ..SuperGLUE_BoolQ.SuperGLUE_BoolQ_ppl_314b96 import BoolQ_datasets
+    from ..SuperGLUE_CB.SuperGLUE_CB_ppl_0143fe import CB_datasets
+    from ..SuperGLUE_COPA.SuperGLUE_COPA_ppl_9f3618 import COPA_datasets
+    from ..SuperGLUE_MultiRC.SuperGLUE_MultiRC_ppl_ced824 import MultiRC_datasets
+    from ..SuperGLUE_RTE.SuperGLUE_RTE_ppl_66caf3 import RTE_datasets
+    from ..SuperGLUE_ReCoRD.SuperGLUE_ReCoRD_gen_30dea0 import ReCoRD_datasets
+    from ..SuperGLUE_WiC.SuperGLUE_WiC_ppl_312de9 import WiC_datasets
+    from ..SuperGLUE_WSC.SuperGLUE_WSC_ppl_003529 import WSC_datasets
+    from ..race.race_ppl_a138cd import race_datasets
+    from ..Xsum.Xsum_gen_31397e import Xsum_datasets
+    from ..gsm8k.gsm8k_gen_1d7fe4 import gsm8k_datasets
+    from ..summedits.summedits_ppl_1fbeb6 import summedits_datasets
+    from ..math.math_gen_265cce import math_datasets
+    from ..TheoremQA.TheoremQA_gen_ef26ca import TheoremQA_datasets
+    from ..hellaswag.hellaswag_ppl_47bff9 import hellaswag_datasets
+    from ..ARC_e.ARC_e_ppl_a450bd import ARC_e_datasets
+    from ..ARC_c.ARC_c_ppl_a450bd import ARC_c_datasets
+    from ..commonsenseqa.commonsenseqa_ppl_5545e2 import commonsenseqa_datasets
+    from ..piqa.piqa_ppl_1cf9f0 import piqa_datasets
+    from ..siqa.siqa_ppl_ced5f6 import siqa_datasets
+    from ..strategyqa.strategyqa_gen_1180a7 import strategyqa_datasets
+    from ..winogrande.winogrande_ll_c5cf57 import winogrande_datasets
+    from ..obqa.obqa_ppl_c7c154 import obqa_datasets
+    from ..nq.nq_gen_c788f6 import nq_datasets
+    from ..triviaqa.triviaqa_gen_2121ce import triviaqa_datasets
+    from ..flores.flores_gen_806ede import flores_datasets
+
+datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
--- a/opencompass/configs/datasets/collections/base_medium_llama.py
+++ b/opencompass/configs/datasets/collections/base_medium_llama.py
+from mmengine.config import read_base
+
+with read_base():
+    from ..mmlu.mmlu_ppl_ac766d import mmlu_datasets
+    from ..ceval.ceval_ppl_578f8d import ceval_datasets
+    from ..agieval.agieval_mixed_713d14 import agieval_datasets
+    from ..GaokaoBench.GaokaoBench_mixed_9af5ee import GaokaoBench_datasets
+    from ..bbh.bbh_gen_5b92b0 import bbh_datasets
+    from ..humaneval.deprecated_humaneval_gen_a82cae import humaneval_datasets
+    from ..mbpp.deprecated_mbpp_gen_1e1056 import mbpp_datasets
+    from ..CLUE_C3.CLUE_C3_ppl_e24a31 import C3_datasets
+    from ..CLUE_CMRC.CLUE_CMRC_gen_1bd3c8 import CMRC_datasets
+    from ..CLUE_DRCD.CLUE_DRCD_gen_1bd3c8 import DRCD_datasets
+    from ..CLUE_afqmc.CLUE_afqmc_ppl_6507d7 import afqmc_datasets
+    from ..CLUE_cmnli.CLUE_cmnli_ppl_fdc6de import cmnli_datasets
+    from ..CLUE_ocnli.CLUE_ocnli_ppl_fdc6de import ocnli_datasets
+    from ..FewCLUE_bustm.FewCLUE_bustm_ppl_e53034 import bustm_datasets
+    from ..FewCLUE_chid.FewCLUE_chid_ppl_8f2872 import chid_datasets
+    from ..FewCLUE_cluewsc.FewCLUE_cluewsc_ppl_4284a0 import cluewsc_datasets
+    from ..FewCLUE_csl.FewCLUE_csl_ppl_841b62 import csl_datasets
+    from ..FewCLUE_eprstmt.FewCLUE_eprstmt_ppl_f1e631 import eprstmt_datasets
+    from ..FewCLUE_ocnli_fc.FewCLUE_ocnli_fc_ppl_c08300 import ocnli_fc_datasets
+    from ..FewCLUE_tnews.FewCLUE_tnews_ppl_d10e8a import tnews_datasets
+    from ..lcsts.lcsts_gen_8ee1fe import lcsts_datasets
+    from ..lambada.lambada_gen_217e11 import lambada_datasets
+    from ..storycloze.storycloze_ppl_496661 import storycloze_datasets
+    from ..SuperGLUE_AX_b.SuperGLUE_AX_b_ppl_6db806 import AX_b_datasets
+    from ..SuperGLUE_AX_g.SuperGLUE_AX_g_ppl_66caf3 import AX_g_datasets
+    from ..SuperGLUE_BoolQ.SuperGLUE_BoolQ_ppl_314797 import BoolQ_datasets
+    from ..SuperGLUE_CB.SuperGLUE_CB_ppl_0143fe import CB_datasets
+    from ..SuperGLUE_COPA.SuperGLUE_COPA_ppl_9f3618 import COPA_datasets
+    from ..SuperGLUE_MultiRC.SuperGLUE_MultiRC_ppl_ced824 import MultiRC_datasets
+    from ..SuperGLUE_RTE.SuperGLUE_RTE_ppl_66caf3 import RTE_datasets
+    from ..SuperGLUE_ReCoRD.SuperGLUE_ReCoRD_gen_30dea0 import ReCoRD_datasets
+    from ..SuperGLUE_WiC.SuperGLUE_WiC_ppl_312de9 import WiC_datasets
+    from ..SuperGLUE_WSC.SuperGLUE_WSC_ppl_003529 import WSC_datasets
+    from ..race.race_ppl_5831a0 import race_datasets
+    from ..Xsum.Xsum_gen_31397e import Xsum_datasets
+    from ..gsm8k.gsm8k_gen_1d7fe4 import gsm8k_datasets
+    from ..summedits.summedits_ppl_1fbeb6 import summedits_datasets
+    from ..math.math_gen_265cce import math_datasets
+    from ..TheoremQA.TheoremQA_gen_ef26ca import TheoremQA_datasets
+    from ..hellaswag.hellaswag_ppl_a6e128 import hellaswag_datasets
+    from ..ARC_e.ARC_e_ppl_2ef631 import ARC_e_datasets
+    from ..ARC_c.ARC_c_ppl_2ef631 import ARC_c_datasets
+    from ..commonsenseqa.commonsenseqa_ppl_5545e2 import commonsenseqa_datasets
+    from ..piqa.piqa_ppl_0cfff2 import piqa_datasets
+    from ..siqa.siqa_ppl_e8d8c5 import siqa_datasets
+    from ..strategyqa.strategyqa_gen_1180a7 import strategyqa_datasets
+    from ..winogrande.winogrande_ll_c5cf57 import winogrande_datasets
+    from ..obqa.obqa_ppl_6aac9e import obqa_datasets
+    from ..nq.nq_gen_0356ec import nq_datasets
+    from ..triviaqa.triviaqa_gen_0356ec import triviaqa_datasets
+    from ..flores.flores_gen_806ede import flores_datasets
+
+datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
--- a/opencompass/configs/datasets/collections/base_small.py
+++ b/opencompass/configs/datasets/collections/base_small.py
+from mmengine.config import read_base
+
+with read_base():
+    from ..ceval.ceval_ppl_578f8d import ceval_datasets
+    from ..bbh.bbh_gen_5b92b0 import bbh_datasets
+    from ..CLUE_CMRC.CLUE_CMRC_gen_1bd3c8 import CMRC_datasets
+    from ..CLUE_DRCD.CLUE_DRCD_gen_1bd3c8 import DRCD_datasets
+    from ..CLUE_afqmc.CLUE_afqmc_ppl_6507d7 import afqmc_datasets
+    from ..FewCLUE_bustm.FewCLUE_bustm_ppl_e53034 import bustm_datasets
+    from ..FewCLUE_chid.FewCLUE_chid_ppl_8f2872 import chid_datasets
+    from ..FewCLUE_cluewsc.FewCLUE_cluewsc_ppl_868415 import cluewsc_datasets
+    from ..FewCLUE_eprstmt.FewCLUE_eprstmt_ppl_f1e631 import eprstmt_datasets
+    from ..humaneval.humaneval_gen_8e312c import humaneval_datasets
+    from ..mbpp.deprecated_mbpp_gen_1e1056 import mbpp_datasets
+    from ..lambada.lambada_gen_217e11 import lambada_datasets
+    from ..storycloze.storycloze_ppl_496661 import storycloze_datasets
+    from ..SuperGLUE_AX_b.SuperGLUE_AX_b_ppl_6db806 import AX_b_datasets
+    from ..SuperGLUE_AX_g.SuperGLUE_AX_g_ppl_66caf3 import AX_g_datasets
+    from ..SuperGLUE_BoolQ.SuperGLUE_BoolQ_ppl_314b96 import BoolQ_datasets
+    from ..SuperGLUE_CB.SuperGLUE_CB_ppl_0143fe import CB_datasets
+    from ..SuperGLUE_COPA.SuperGLUE_COPA_ppl_9f3618 import COPA_datasets
+    from ..SuperGLUE_MultiRC.SuperGLUE_MultiRC_ppl_ced824 import MultiRC_datasets
+    from ..SuperGLUE_RTE.SuperGLUE_RTE_ppl_66caf3 import RTE_datasets
+    from ..SuperGLUE_ReCoRD.SuperGLUE_ReCoRD_gen_30dea0 import ReCoRD_datasets
+    from ..SuperGLUE_WiC.SuperGLUE_WiC_ppl_312de9 import WiC_datasets
+    from ..SuperGLUE_WSC.SuperGLUE_WSC_ppl_d0f531 import WSC_datasets
+    from ..race.race_ppl_a138cd import race_datasets
+    from ..math.math_gen_265cce import math_datasets
+    from ..gsm8k.gsm8k_gen_1d7fe4 import gsm8k_datasets
+    from ..summedits.summedits_ppl_1fbeb6 import summedits_datasets
+    from ..hellaswag.hellaswag_ppl_47bff9 import hellaswag_datasets
+    from ..piqa.piqa_ppl_1cf9f0 import piqa_datasets
+    from ..winogrande.winogrande_ll_c5cf57 import winogrande_datasets
+    from ..obqa.obqa_ppl_c7c154 import obqa_datasets
+    from ..nq.nq_gen_c788f6 import nq_datasets
+    from ..triviaqa.triviaqa_gen_2121ce import triviaqa_datasets
+
+datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
--- a/opencompass/configs/datasets/collections/chat_core.py
+++ b/opencompass/configs/datasets/collections/chat_core.py
+from mmengine.config import read_base
+
+with read_base():
+    from ..mmlu.mmlu_gen_4d595a import mmlu_datasets
+    from ..cmmlu.cmmlu_gen_c13365 import cmmlu_datasets
+    from ..ceval.ceval_internal_gen_2daf24 import ceval_datasets
+    from ..GaokaoBench.GaokaoBench_no_subjective_gen_4c31db import GaokaoBench_datasets
+    from ..triviaqa.triviaqa_wiki_1shot_gen_bc5f21 import triviaqa_datasets
+    from ..nq.nq_open_1shot_gen_2e45e5 import nq_datasets
+    from ..race.race_gen_69ee4f import race_datasets
+    from ..winogrande.winogrande_5shot_gen_6447e6 import winogrande_datasets
+    from ..hellaswag.hellaswag_10shot_gen_e42710 import hellaswag_datasets
+    from ..bbh.bbh_gen_5b92b0 import bbh_datasets
+    from ..gsm8k.gsm8k_gen_1d7fe4 import gsm8k_datasets
+    from ..math.math_evaluatorv2_gen_cecb31 import math_datasets
+    from ..TheoremQA.TheoremQA_post_v2_gen_ef26ca import TheoremQA_datasets
+    from ..humaneval.humaneval_gen_8e312c import humaneval_datasets
+    from ..mbpp.deprecated_sanitized_mbpp_gen_1e1056 import sanitized_mbpp_datasets
+
+datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
--- a/opencompass/configs/datasets/collections/chat_medium.py
+++ b/opencompass/configs/datasets/collections/chat_medium.py
+from mmengine.config import read_base
+
+with read_base():
+    from ..mmlu.mmlu_gen_4d595a import mmlu_datasets
+    from ..ceval.ceval_gen_5f30c7 import ceval_datasets
+    from ..agieval.agieval_gen_64afd3 import agieval_datasets
+    from ..GaokaoBench.GaokaoBench_gen_5cfe9e import GaokaoBench_datasets
+    from ..bbh.bbh_gen_5b92b0 import bbh_datasets
+    from ..humaneval.humaneval_gen_8e312c import humaneval_datasets
+    from ..mbpp.deprecated_mbpp_gen_1e1056 import mbpp_datasets
+    from ..CLUE_C3.CLUE_C3_gen_8c358f import C3_datasets
+    from ..CLUE_CMRC.CLUE_CMRC_gen_1bd3c8 import CMRC_datasets
+    from ..CLUE_DRCD.CLUE_DRCD_gen_1bd3c8 import DRCD_datasets
+    from ..CLUE_afqmc.CLUE_afqmc_gen_901306 import afqmc_datasets
+    from ..CLUE_cmnli.CLUE_cmnli_gen_1abf97 import cmnli_datasets
+    from ..CLUE_ocnli.CLUE_ocnli_gen_c4cb6c import ocnli_datasets
+    from ..FewCLUE_bustm.FewCLUE_bustm_gen_634f41 import bustm_datasets
+    from ..FewCLUE_chid.FewCLUE_chid_gen_0a29a2 import chid_datasets
+    from ..FewCLUE_cluewsc.FewCLUE_cluewsc_gen_c68933 import cluewsc_datasets
+    from ..FewCLUE_csl.FewCLUE_csl_gen_28b223 import csl_datasets
+    from ..FewCLUE_eprstmt.FewCLUE_eprstmt_gen_740ea0 import eprstmt_datasets
+    from ..FewCLUE_ocnli_fc.FewCLUE_ocnli_fc_gen_f97a97 import ocnli_fc_datasets
+    from ..FewCLUE_tnews.FewCLUE_tnews_gen_b90e4a import tnews_datasets
+    from ..lcsts.lcsts_gen_8ee1fe import lcsts_datasets
+    from ..lambada.lambada_gen_217e11 import lambada_datasets
+    from ..storycloze.storycloze_gen_7f656a import storycloze_datasets
+    from ..SuperGLUE_AX_b.SuperGLUE_AX_b_gen_4dfefa import AX_b_datasets
+    from ..SuperGLUE_AX_g.SuperGLUE_AX_g_gen_68aac7 import AX_g_datasets
+    from ..SuperGLUE_BoolQ.SuperGLUE_BoolQ_gen_883d50 import BoolQ_datasets
+    from ..SuperGLUE_CB.SuperGLUE_CB_gen_854c6c import CB_datasets
+    from ..SuperGLUE_COPA.SuperGLUE_COPA_gen_91ca53 import COPA_datasets
+    from ..SuperGLUE_MultiRC.SuperGLUE_MultiRC_gen_27071f import MultiRC_datasets
+    from ..SuperGLUE_RTE.SuperGLUE_RTE_gen_68aac7 import RTE_datasets
+    from ..SuperGLUE_ReCoRD.SuperGLUE_ReCoRD_gen_30dea0 import ReCoRD_datasets
+    from ..SuperGLUE_WiC.SuperGLUE_WiC_gen_d06864 import WiC_datasets
+    from ..SuperGLUE_WSC.SuperGLUE_WSC_gen_7902a7 import WSC_datasets
+    from ..race.race_gen_69ee4f import race_datasets
+    from ..Xsum.Xsum_gen_31397e import Xsum_datasets
+    from ..gsm8k.gsm8k_gen_1d7fe4 import gsm8k_datasets
+    from ..summedits.summedits_gen_315438 import summedits_datasets
+    from ..math.math_gen_265cce import math_datasets
+    from ..TheoremQA.TheoremQA_gen_7009de import TheoremQA_datasets
+    from ..hellaswag.hellaswag_gen_6faab5 import hellaswag_datasets
+    from ..ARC_e.ARC_e_gen_1e0de5 import ARC_e_datasets
+    from ..ARC_c.ARC_c_gen_1e0de5 import ARC_c_datasets
+    from ..commonsenseqa.commonsenseqa_gen_c946f2 import commonsenseqa_datasets
+    from ..piqa.piqa_gen_1194eb import piqa_datasets
+    from ..siqa.siqa_gen_e78df3 import siqa_datasets
+    from ..strategyqa.strategyqa_gen_1180a7 import strategyqa_datasets
+    from ..winogrande.deprecated_winogrande_gen_a9ede5 import winogrande_datasets
+    from ..obqa.obqa_gen_9069e4 import obqa_datasets
+    from ..nq.nq_gen_c788f6 import nq_datasets
+    from ..triviaqa.triviaqa_gen_2121ce import triviaqa_datasets
+    from ..flores.flores_gen_806ede import flores_datasets
+
+datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
--- a/opencompass/configs/datasets/collections/chat_small.py
+++ b/opencompass/configs/datasets/collections/chat_small.py
+from mmengine.config import read_base
+
+with read_base():
+    from ..mmlu.mmlu_gen_4d595a import mmlu_datasets
+    from ..ceval.ceval_gen_5f30c7 import ceval_datasets
+    from ..bbh.bbh_gen_5b92b0 import bbh_datasets
+    from ..CLUE_CMRC.CLUE_CMRC_gen_1bd3c8 import CMRC_datasets
+    from ..CLUE_DRCD.CLUE_DRCD_gen_1bd3c8 import DRCD_datasets
+    from ..CLUE_afqmc.CLUE_afqmc_gen_901306 import afqmc_datasets
+    from ..FewCLUE_bustm.FewCLUE_bustm_gen_634f41 import bustm_datasets
+    from ..FewCLUE_chid.FewCLUE_chid_gen_0a29a2 import chid_datasets
+    from ..FewCLUE_cluewsc.FewCLUE_cluewsc_gen_c68933 import cluewsc_datasets
+    from ..FewCLUE_eprstmt.FewCLUE_eprstmt_gen_740ea0 import eprstmt_datasets
+    from ..humaneval.humaneval_gen_8e312c import humaneval_datasets
+    from ..mbpp.deprecated_mbpp_gen_1e1056 import mbpp_datasets
+    from ..lambada.lambada_gen_217e11 import lambada_datasets
+    from ..storycloze.storycloze_gen_7f656a import storycloze_datasets
+    from ..SuperGLUE_AX_b.SuperGLUE_AX_b_gen_4dfefa import AX_b_datasets
+    from ..SuperGLUE_AX_g.SuperGLUE_AX_g_gen_68aac7 import AX_g_datasets
+    from ..SuperGLUE_BoolQ.SuperGLUE_BoolQ_gen_883d50 import BoolQ_datasets
+    from ..SuperGLUE_CB.SuperGLUE_CB_gen_854c6c import CB_datasets
+    from ..SuperGLUE_COPA.SuperGLUE_COPA_gen_91ca53 import COPA_datasets
+    from ..SuperGLUE_MultiRC.SuperGLUE_MultiRC_gen_27071f import MultiRC_datasets
+    from ..SuperGLUE_RTE.SuperGLUE_RTE_gen_68aac7 import RTE_datasets
+    from ..SuperGLUE_ReCoRD.SuperGLUE_ReCoRD_gen_30dea0 import ReCoRD_datasets
+    from ..SuperGLUE_WiC.SuperGLUE_WiC_gen_d06864 import WiC_datasets
+    from ..SuperGLUE_WSC.SuperGLUE_WSC_gen_fe4bf3 import WSC_datasets
+    from ..race.race_gen_69ee4f import race_datasets
+    from ..math.math_gen_265cce import math_datasets
+    from ..gsm8k.gsm8k_gen_1d7fe4 import gsm8k_datasets
+    from ..summedits.summedits_gen_315438 import summedits_datasets
+    from ..hellaswag.hellaswag_gen_6faab5 import hellaswag_datasets
+    from ..piqa.piqa_gen_1194eb import piqa_datasets
+    from ..winogrande.deprecated_winogrande_gen_a9ede5 import winogrande_datasets
+    from ..obqa.obqa_gen_9069e4 import obqa_datasets
+    from ..nq.nq_gen_c788f6 import nq_datasets
+    from ..triviaqa.triviaqa_gen_2121ce import triviaqa_datasets
+
+datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
--- a/opencompass/configs/datasets/collections/example.py
+++ b/opencompass/configs/datasets/collections/example.py
+from mmengine.config import read_base
+
+with read_base():
+    from ..piqa.piqa_gen_1194eb import piqa_datasets
+    from ..nq.nq_gen_c788f6 import nq_datasets
+
+datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
--- a/opencompass/configs/datasets/collections/leaderboard/qwen.py
+++ b/opencompass/configs/datasets/collections/leaderboard/qwen.py
+from mmengine.config import read_base
+
+with read_base():
+    from ...ceval.ceval_ppl_578f8d import ceval_datasets
+    from ...agieval.agieval_mixed_713d14 import agieval_datasets
+    from ...mmlu.mmlu_ppl_ac766d import mmlu_datasets
+    from ...cmmlu.cmmlu_ppl_8b9c76 import cmmlu_datasets
+    from ...GaokaoBench.GaokaoBench_mixed_9af5ee import GaokaoBench_datasets
+    from ...ARC_c.ARC_c_gen_1e0de5 import ARC_c_datasets
+    from ...ARC_e.ARC_e_gen_1e0de5 import ARC_e_datasets
+
+    from ...SuperGLUE_WiC.SuperGLUE_WiC_ppl_312de9 import WiC_datasets
+    from ...FewCLUE_chid.FewCLUE_chid_ppl_8f2872 import chid_datasets
+    from ...CLUE_afqmc.CLUE_afqmc_ppl_6507d7 import afqmc_datasets
+    from ...SuperGLUE_WSC.SuperGLUE_WSC_ppl_003529 import WSC_datasets
+    from ...tydiqa.tydiqa_gen_978d2a import tydiqa_datasets
+    from ...flores.flores_gen_806ede import flores_datasets
+
+    from ...SuperGLUE_BoolQ.SuperGLUE_BoolQ_ppl_314797 import BoolQ_datasets
+    from ...commonsenseqa.commonsenseqa_ppl_5545e2 import commonsenseqa_datasets
+    from ...triviaqa.triviaqa_gen_0356ec import triviaqa_datasets
+    from ...nq.nq_gen_0356ec import nq_datasets
+
+    from ...CLUE_C3.CLUE_C3_gen_8c358f import C3_datasets
+    from ...race.race_ppl_5831a0 import race_datasets
+    from ...obqa.obqa_gen_9069e4 import obqa_datasets
+    from ...FewCLUE_csl.FewCLUE_csl_ppl_841b62 import csl_datasets
+    from ...lcsts.lcsts_gen_8ee1fe import lcsts_datasets
+    from ...Xsum.Xsum_gen_31397e import Xsum_datasets
+    from ...FewCLUE_eprstmt.FewCLUE_eprstmt_gen_740ea0 import eprstmt_datasets
+    from ...lambada.lambada_gen_217e11 import lambada_datasets
+
+    from ...CLUE_cmnli.CLUE_cmnli_ppl_fdc6de import cmnli_datasets
+    from ...CLUE_ocnli.CLUE_ocnli_gen_c4cb6c import ocnli_datasets
+    from ...SuperGLUE_AX_b.SuperGLUE_AX_b_gen_4dfefa import AX_b_datasets
+    from ...SuperGLUE_AX_g.SuperGLUE_AX_g_gen_68aac7 import AX_g_datasets
+    from ...SuperGLUE_RTE.SuperGLUE_RTE_gen_68aac7 import RTE_datasets
+    from ...SuperGLUE_COPA.SuperGLUE_COPA_gen_91ca53 import COPA_datasets
+    from ...SuperGLUE_ReCoRD.SuperGLUE_ReCoRD_gen_a69961 import ReCoRD_datasets
+    from ...hellaswag.hellaswag_gen_6faab5 import hellaswag_datasets
+    from ...piqa.piqa_gen_1194eb import piqa_datasets
+    from ...siqa.siqa_ppl_e8d8c5 import siqa_datasets
+    from ...math.math_gen_265cce import math_datasets
+    from ...gsm8k.gsm8k_gen_1d7fe4 import gsm8k_datasets
+    from ...drop.deprecated_drop_gen_8a9ed9 import drop_datasets
+    from ...humaneval.deprecated_humaneval_gen_a82cae import humaneval_datasets
+    from ...mbpp.deprecated_mbpp_gen_1e1056 import mbpp_datasets
+    from ...bbh.bbh_gen_5bf00b import bbh_datasets
+
+
+datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
--- a/opencompass/configs/datasets/collections/leaderboard/qwen_chat.py
+++ b/opencompass/configs/datasets/collections/leaderboard/qwen_chat.py
+from mmengine.config import read_base
+
+with read_base():
+    from ...ceval.ceval_gen_5f30c7 import ceval_datasets
+    from ...agieval.agieval_mixed_713d14 import agieval_datasets
+    from ...mmlu.mmlu_gen_4d595a import mmlu_datasets
+    from ...cmmlu.cmmlu_gen_c13365 import cmmlu_datasets
+    from ...GaokaoBench.GaokaoBench_gen_5cfe9e import GaokaoBench_datasets
+    from ...ARC_c.ARC_c_ppl_2ef631 import ARC_c_datasets
+    from ...ARC_e.ARC_e_ppl_2ef631 import ARC_e_datasets
+
+    from ...SuperGLUE_WiC.SuperGLUE_WiC_gen_d06864 import WiC_datasets
+    from ...FewCLUE_chid.FewCLUE_chid_ppl_8f2872 import chid_datasets
+    from ...CLUE_afqmc.CLUE_afqmc_gen_901306 import afqmc_datasets
+    from ...SuperGLUE_WSC.SuperGLUE_WSC_ppl_003529 import WSC_datasets
+    from ...tydiqa.tydiqa_gen_978d2a import tydiqa_datasets
+    from ...flores.flores_gen_806ede import flores_datasets
+
+    from ...SuperGLUE_BoolQ.SuperGLUE_BoolQ_ppl_314797 import BoolQ_datasets
+    from ...commonsenseqa.commonsenseqa_gen_c946f2 import commonsenseqa_datasets
+    from ...triviaqa.triviaqa_gen_2121ce import triviaqa_datasets
+    from ...nq.nq_gen_c788f6 import nq_datasets
+
+    from ...CLUE_C3.CLUE_C3_gen_8c358f import C3_datasets
+    from ...race.race_gen_69ee4f import race_datasets
+    from ...obqa.obqa_ppl_6aac9e import obqa_datasets
+    from ...FewCLUE_csl.FewCLUE_csl_ppl_841b62 import csl_datasets
+    from ...lcsts.lcsts_gen_8ee1fe import lcsts_datasets
+    from ...Xsum.Xsum_gen_31397e import Xsum_datasets
+    from ...FewCLUE_eprstmt.FewCLUE_eprstmt_ppl_f1e631 import eprstmt_datasets
+    from ...lambada.lambada_gen_217e11 import lambada_datasets
+
+    from ...CLUE_cmnli.CLUE_cmnli_ppl_fdc6de import cmnli_datasets
+    from ...CLUE_ocnli.CLUE_ocnli_ppl_fdc6de import ocnli_datasets
+    from ...SuperGLUE_AX_b.SuperGLUE_AX_b_ppl_6db806 import AX_b_datasets
+    from ...SuperGLUE_AX_g.SuperGLUE_AX_g_ppl_66caf3 import AX_g_datasets
+    from ...SuperGLUE_RTE.SuperGLUE_RTE_ppl_66caf3 import RTE_datasets
+    from ...SuperGLUE_COPA.SuperGLUE_COPA_gen_91ca53 import COPA_datasets
+    from ...SuperGLUE_ReCoRD.SuperGLUE_ReCoRD_gen_30dea0 import ReCoRD_datasets
+    from ...hellaswag.hellaswag_ppl_a6e128 import hellaswag_datasets
+    from ...piqa.piqa_ppl_0cfff2 import piqa_datasets
+    from ...siqa.siqa_ppl_e8d8c5 import siqa_datasets
+    from ...math.math_gen_265cce import math_datasets
+    from ...gsm8k.gsm8k_gen_1d7fe4 import gsm8k_datasets
+    from ...drop.deprecated_drop_gen_8a9ed9 import drop_datasets
+    from ...humaneval.deprecated_humaneval_gen_a82cae import humaneval_datasets
+    from ...mbpp.deprecated_mbpp_gen_1e1056 import mbpp_datasets
+    from ...bbh.bbh_gen_5b92b0 import bbh_datasets
+
+
+datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
--- a/opencompass/configs/datasets/commonsenseqa/commonsenseqa_7shot_cot_gen_734a22.py
+++ b/opencompass/configs/datasets/commonsenseqa/commonsenseqa_7shot_cot_gen_734a22.py
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import commonsenseqaDataset
+from opencompass.utils.text_postprocessors import (
+    match_answer_pattern,
+)
+
+commonsenseqa_reader_cfg = dict(
+    input_columns=['question', 'A', 'B', 'C', 'D', 'E'],
+    output_column='answerKey',
+    test_split='validation',
+)
+
+_ice_template = dict(
+    type=PromptTemplate,
+    template=dict(
+        begin='</E>',
+        round=[
+            dict(
+                role='HUMAN',
+                prompt='Q: What do people use to absorb extra ink from a fountain pen? Answer Choices: A.shirt pocket B.calligrapher’s hand C.inkwell D.desk drawer E.blotter',
+            ),
+            dict(
+                role='BOT',
+                prompt='A: The answer must be an item that can absorb ink. Of the above choices, only blotters are used to absorb ink. So the answer is E.',
+            ),
+            dict(
+                role='HUMAN',
+                prompt='Q: What home entertainment equipment requires cable?Answer Choices: A.radio shack B.substation C.television D.cabinet',
+            ),
+            dict(
+                role='BOT',
+                prompt='A: The answer must require cable. Of the above choices, only television requires cable. So the answer is C.',
+            ),
+            dict(
+                role='HUMAN',
+                prompt='Q: The fox walked from the city into the forest, what was it looking for? Answer Choices: A.pretty flowers B.hen house C.natural habitat D.storybook',
+            ),
+            dict(
+                role='BOT',
+                prompt='A: The answer must be something in the forest. Of the above choices, only natural habitat is in the forest. So the answer is B.',
+            ),
+            dict(
+                role='HUMAN',
+                prompt='Q: Sammy wanted to go to where the people were. Where might he go? Answer Choices: A.populated areas B.race track C.desert D.apartment E.roadblock',
+            ),
+            dict(
+                role='BOT',
+                prompt='A: The answer must be a place with a lot of people. Of the above choices, only populated areas have a lot of people. So the answer is A.',
+            ),
+            dict(
+                role='HUMAN',
+                prompt='Q: Where do you put your grapes just before checking out? Answer Choices: A.mouth B.grocery cart Csuper market D.fruit basket E.fruit market',
+            ),
+            dict(
+                role='BOT',
+                prompt='A: The answer should be the place where grocery items are placed before checking out. Of the above choices, grocery cart makes the most sense for holding grocery items. So the answer is B.',
+            ),
+            dict(
+                role='HUMAN',
+                prompt='Q: Google Maps and other highway and street GPS services have replaced what? Answer Choices: A.united states B.mexico C.countryside D.atlas',
+            ),
+            dict(
+                role='BOT',
+                prompt='A: The answer must be something that used to do what Google Maps and GPS services do, which is to give directions. Of the above choices, only atlases are used to give directions. So the answer is D.',
+            ),
+            dict(
+                role='HUMAN',
+                prompt='Q: Before getting a divorce, what did the wife feel who was doing all the work? Answer Choices: A.harder B.anguish C.bitterness D.tears E.sadness',
+            ),
+            dict(
+                role='BOT',
+                prompt='A: The answer should be the feeling of someone getting divorced who was doing all the work. Of the above choices, the closest feeling is bitterness. So the answer is C.',
+            ),
+            dict(
+                role='HUMAN',
+                prompt='Q:{question}  Answer Choices: A. {A}\nB. {B}\nC. {C}\nD. {D}\nE. {E}\nA:',
+            ),
+            dict(
+                role='BOT',
+                prompt='{answerKey}',
+            ),
+        ],
+    ),
+    ice_token='</E>',
+)
+
+commonsenseqa_infer_cfg = dict(
+    ice_template=_ice_template,
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer),
+)
+
+commonsenseqa_eval_cfg = dict(
+    evaluator=dict(type=AccEvaluator),
+    pred_postprocessor=dict(
+        type=match_answer_pattern, answer_pattern=r'(?i)so the answer is\s*([A-P])'
+    ),
+)
+
+
+commonsenseqa_datasets = [
+    dict(
+        abbr='commonsense_qa',
+        type=commonsenseqaDataset,
+        path='opencompass/commonsense_qa',
+        reader_cfg=commonsenseqa_reader_cfg,
+        infer_cfg=commonsenseqa_infer_cfg,
+        eval_cfg=commonsenseqa_eval_cfg,
+    )
+]
+
+del _ice_template
--- a/opencompass/configs/datasets/commonsenseqa/commonsenseqa_gen.py
+++ b/opencompass/configs/datasets/commonsenseqa/commonsenseqa_gen.py
+from mmengine.config import read_base
+
+with read_base():
+    from .commonsenseqa_gen_c946f2 import commonsenseqa_datasets  # noqa: F401, F403
--- a/opencompass/configs/datasets/commonsenseqa/commonsenseqa_gen_1da2d0.py
+++ b/opencompass/configs/datasets/commonsenseqa/commonsenseqa_gen_1da2d0.py
+# Use FixKRetriever to avoid hang caused by the Huggingface
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import FixKRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import commonsenseqaDataset
+from opencompass.utils.text_postprocessors import first_capital_postprocess
+
+commonsenseqa_reader_cfg = dict(
+    input_columns=['question', 'A', 'B', 'C', 'D', 'E'],
+    output_column='answerKey',
+    test_split='validation')
+
+_ice_template = dict(
+    type=PromptTemplate,
+    template=dict(
+        begin='</E>',
+        round=[
+            dict(
+                role='HUMAN',
+                prompt=
+                '{question}\nA. {A}\nB. {B}\nC. {C}\nD. {D}\nE. {E}\nAnswer:',
+            ),
+            dict(
+                role='BOT',
+                prompt='{answerKey}',
+            ),
+        ],
+    ),
+    ice_token='</E>',
+)
+
+commonsenseqa_infer_cfg = dict(
+    ice_template=_ice_template,
+    retriever=dict(type=FixKRetriever, fix_id_list=[0, 1, 2, 3, 4, 5, 6, 7]),
+    inferencer=dict(type=GenInferencer),
+)
+
+commonsenseqa_eval_cfg = dict(
+    evaluator=dict(type=AccEvaluator),
+    pred_postprocessor=dict(type=first_capital_postprocess),
+)
+
+commonsenseqa_datasets = [
+    dict(
+        abbr='commonsense_qa',
+        type=commonsenseqaDataset,
+        path='opencompass/commonsense_qa',
+        reader_cfg=commonsenseqa_reader_cfg,
+        infer_cfg=commonsenseqa_infer_cfg,
+        eval_cfg=commonsenseqa_eval_cfg,
+    )
+]
+
+del _ice_template
--- a/opencompass/configs/datasets/commonsenseqa/commonsenseqa_gen_c946f2.py
+++ b/opencompass/configs/datasets/commonsenseqa/commonsenseqa_gen_c946f2.py
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import MDLRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import commonsenseqaDataset
+from opencompass.utils.text_postprocessors import first_capital_postprocess
+
+commonsenseqa_reader_cfg = dict(
+    input_columns=['question', 'A', 'B', 'C', 'D', 'E'],
+    output_column='answerKey',
+    test_split='validation')
+
+_ice_template = dict(
+    type=PromptTemplate,
+    template=dict(
+        begin='</E>',
+        round=[
+            dict(
+                role='HUMAN',
+                prompt=
+                '{question}\nA. {A}\nB. {B}\nC. {C}\nD. {D}\nE. {E}\nAnswer:',
+            ),
+            dict(
+                role='BOT',
+                prompt='{answerKey}',
+            ),
+        ],
+    ),
+    ice_token='</E>',
+)
+
+commonsenseqa_infer_cfg = dict(
+    ice_template=_ice_template,
+    retriever=dict(
+        type=MDLRetriever,
+        ice_num=8,
+        candidate_num=30,
+        select_time=10,
+        seed=1,
+        batch_size=12,
+        ice_template=_ice_template,
+    ),
+    inferencer=dict(type=GenInferencer),
+)
+
+commonsenseqa_eval_cfg = dict(
+    evaluator=dict(type=AccEvaluator),
+    pred_postprocessor=dict(type=first_capital_postprocess),
+)
+
+commonsenseqa_datasets = [
+    dict(
+        abbr='commonsense_qa',
+        type=commonsenseqaDataset,
+        path='opencompass/commonsense_qa',
+        reader_cfg=commonsenseqa_reader_cfg,
+        infer_cfg=commonsenseqa_infer_cfg,
+        eval_cfg=commonsenseqa_eval_cfg,
+    )
+]
+
+del _ice_template
--- a/opencompass/configs/datasets/commonsenseqa/commonsenseqa_ppl.py
+++ b/opencompass/configs/datasets/commonsenseqa/commonsenseqa_ppl.py
+from mmengine.config import read_base
+
+with read_base():
+    from .commonsenseqa_ppl_5545e2 import commonsenseqa_datasets  # noqa: F401, F403
--- a/opencompass/configs/datasets/commonsenseqa/commonsenseqa_ppl_3e9f2d.py
+++ b/opencompass/configs/datasets/commonsenseqa/commonsenseqa_ppl_3e9f2d.py
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import MDLRetriever
+from opencompass.openicl.icl_inferencer import PPLInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import commonsenseqaDataset
+
+commonsenseqa_reader_cfg = dict(
+    input_columns=['question', 'A', 'B', 'C', 'D', 'E'],
+    output_column='answerKey',
+    test_split='validation')
+
+_ice_template = dict(
+    type=PromptTemplate,
+    template={
+        ans: dict(
+            begin=[
+                dict(
+                    role='SYSTEM',
+                    fallback_role='HUMAN',
+                    prompt=f'Answer the following question:'), '</E>'
+            ],
+            round=[
+                dict(role='HUMAN', prompt='{question}'),
+                dict(role='BOT', prompt=ans_token),
+            ])
+        for ans, ans_token in [['A', '{A}'], ['B', '{B}'],
+                               ['C', '{C}'], ['D', '{D}'],
+                               ['E', '{E}']]
+    },
+    ice_token='</E>')
+
+commonsenseqa_infer_cfg = dict(
+    ice_template=_ice_template,
+    retriever=dict(
+        type=MDLRetriever,
+        ice_num=8,
+        candidate_num=30,
+        select_time=10,
+        seed=1,
+        batch_size=12,
+        ice_template=_ice_template),
+    inferencer=dict(type=PPLInferencer))
+
+commonsenseqa_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
+
+commonsenseqa_datasets = [
+    dict(
+        abbr='commonsense_qa',
+        type=commonsenseqaDataset,
+        path='opencompass/commonsense_qa',
+        reader_cfg=commonsenseqa_reader_cfg,
+        infer_cfg=commonsenseqa_infer_cfg,
+        eval_cfg=commonsenseqa_eval_cfg)
+]
+
+del _ice_template