initial commit

7d346000 · gaotongxiao · 7d346000 · 7d346000 · 7d346000 · 7d346000
Commit 7d346000 authored Jul 04, 2023 by gaotongxiao
20 changed files
--- a/configs/datasets/ceval/ceval_ppl_275812.py
+++ b/configs/datasets/ceval/ceval_ppl_275812.py
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import FixKRetriever
+from opencompass.openicl.icl_inferencer import PPLInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import CEvalDataset
+
+ceval_subject_mapping = {
+    "computer_network":
+    ["Computer Network", "\u8ba1\u7b97\u673a\u7f51\u7edc", "STEM"],
+    "operating_system":
+    ["Operating System", "\u64cd\u4f5c\u7cfb\u7edf", "STEM"],
+    "computer_architecture":
+    ["Computer Architecture", "\u8ba1\u7b97\u673a\u7ec4\u6210", "STEM"],
+    "college_programming":
+    ["College Programming", "\u5927\u5b66\u7f16\u7a0b", "STEM"],
+    "college_physics": ["College Physics", "\u5927\u5b66\u7269\u7406", "STEM"],
+    "college_chemistry":
+    ["College Chemistry", "\u5927\u5b66\u5316\u5b66", "STEM"],
+    "advanced_mathematics":
+    ["Advanced Mathematics", "\u9ad8\u7b49\u6570\u5b66", "STEM"],
+    "probability_and_statistics":
+    ["Probability and Statistics", "\u6982\u7387\u7edf\u8ba1", "STEM"],
+    "discrete_mathematics":
+    ["Discrete Mathematics", "\u79bb\u6563\u6570\u5b66", "STEM"],
+    "electrical_engineer": [
+        "Electrical Engineer", "\u6ce8\u518c\u7535\u6c14\u5de5\u7a0b\u5e08",
+        "STEM"
+    ],
+    "metrology_engineer":
+    ["Metrology Engineer", "\u6ce8\u518c\u8ba1\u91cf\u5e08", "STEM"],
+    "high_school_mathematics":
+    ["High School Mathematics", "\u9ad8\u4e2d\u6570\u5b66", "STEM"],
+    "high_school_physics":
+    ["High School Physics", "\u9ad8\u4e2d\u7269\u7406", "STEM"],
+    "high_school_chemistry":
+    ["High School Chemistry", "\u9ad8\u4e2d\u5316\u5b66", "STEM"],
+    "high_school_biology": [
+        "High School Biology", "\u9ad8\u4e2d\u751f\u7269", "STEM"
+    ],
+    "middle_school_mathematics": [
+        "Middle School Mathematics", "\u521d\u4e2d\u6570\u5b66", "STEM"
+    ],
+    "middle_school_biology": [
+        "Middle School Biology", "\u521d\u4e2d\u751f\u7269", "STEM"
+    ],
+    "middle_school_physics": [
+        "Middle School Physics", "\u521d\u4e2d\u7269\u7406", "STEM"
+    ],
+    "middle_school_chemistry": [
+        "Middle School Chemistry", "\u521d\u4e2d\u5316\u5b66", "STEM"
+    ],
+    "veterinary_medicine": [
+        "Veterinary Medicine", "\u517d\u533b\u5b66", "STEM"
+    ],
+    "college_economics": [
+        "College Economics", "\u5927\u5b66\u7ecf\u6d4e\u5b66", "Social Science"
+    ],
+    "business_administration": [
+        "Business Administration", "\u5de5\u5546\u7ba1\u7406", "Social Science"
+    ],
+    "marxism": [
+        "Marxism", "\u9a6c\u514b\u601d\u4e3b\u4e49\u57fa\u672c\u539f\u7406",
+        "Social Science"
+    ],
+    "mao_zedong_thought": [
+        "Mao Zedong Thought",
+        "\u6bdb\u6cfd\u4e1c\u601d\u60f3\u548c\u4e2d\u56fd\u7279\u8272\u793e\u4f1a\u4e3b\u4e49\u7406\u8bba\u4f53\u7cfb\u6982\u8bba",
+        "Social Science"
+    ],
+    "education_science": [
+        "Education Science", "\u6559\u80b2\u5b66", "Social Science"
+    ],
+    "teacher_qualification": [
+        "Teacher Qualification", "\u6559\u5e08\u8d44\u683c", "Social Science"
+    ],
+    "high_school_politics": [
+        "High School Politics", "\u9ad8\u4e2d\u653f\u6cbb", "Social Science"
+    ],
+    "high_school_geography": [
+        "High School Geography", "\u9ad8\u4e2d\u5730\u7406", "Social Science"
+    ],
+    "middle_school_politics": [
+        "Middle School Politics", "\u521d\u4e2d\u653f\u6cbb", "Social Science"
+    ],
+    "middle_school_geography": [
+        "Middle School Geography", "\u521d\u4e2d\u5730\u7406", "Social Science"
+    ],
+    "modern_chinese_history":
+    ["Modern Chinese History", "\u8fd1\u4ee3\u53f2\u7eb2\u8981", "Humanities"],
+    "ideological_and_moral_cultivation": [
+        "Ideological and Moral Cultivation",
+        "\u601d\u60f3\u9053\u5fb7\u4fee\u517b\u4e0e\u6cd5\u5f8b\u57fa\u7840",
+        "Humanities"
+    ],
+    "logic": ["Logic", "\u903b\u8f91\u5b66", "Humanities"],
+    "law": ["Law", "\u6cd5\u5b66", "Humanities"],
+    "chinese_language_and_literature": [
+        "Chinese Language and Literature",
+        "\u4e2d\u56fd\u8bed\u8a00\u6587\u5b66", "Humanities"
+    ],
+    "art_studies": ["Art Studies", "\u827a\u672f\u5b66", "Humanities"],
+    "professional_tour_guide": [
+        "Professional Tour Guide", "\u5bfc\u6e38\u8d44\u683c", "Humanities"
+    ],
+    "legal_professional": [
+        "Legal Professional", "\u6cd5\u5f8b\u804c\u4e1a\u8d44\u683c",
+        "Humanities"
+    ],
+    "high_school_chinese": [
+        "High School Chinese", "\u9ad8\u4e2d\u8bed\u6587", "Humanities"
+    ],
+    "high_school_history": [
+        "High School History", "\u9ad8\u4e2d\u5386\u53f2", "Humanities"
+    ],
+    "middle_school_history": [
+        "Middle School History", "\u521d\u4e2d\u5386\u53f2", "Humanities"
+    ],
+    "civil_servant": ["Civil Servant", "\u516c\u52a1\u5458", "Other"],
+    "sports_science": ["Sports Science", "\u4f53\u80b2\u5b66", "Other"],
+    "plant_protection": [
+        "Plant Protection", "\u690d\u7269\u4fdd\u62a4", "Other"
+    ],
+    "basic_medicine": ["Basic Medicine", "\u57fa\u7840\u533b\u5b66", "Other"],
+    "clinical_medicine": [
+        "Clinical Medicine", "\u4e34\u5e8a\u533b\u5b66", "Other"
+    ],
+    "urban_and_rural_planner": [
+        "Urban and Rural Planner",
+        "\u6ce8\u518c\u57ce\u4e61\u89c4\u5212\u5e08", "Other"
+    ],
+    "accountant": ["Accountant", "\u6ce8\u518c\u4f1a\u8ba1\u5e08", "Other"],
+    "fire_engineer": [
+        "Fire Engineer", "\u6ce8\u518c\u6d88\u9632\u5de5\u7a0b\u5e08", "Other"
+    ],
+    "environmental_impact_assessment_engineer": [
+        "Environmental Impact Assessment Engineer",
+        "\u73af\u5883\u5f71\u54cd\u8bc4\u4ef7\u5de5\u7a0b\u5e08", "Other"
+    ],
+    "tax_accountant": ["Tax Accountant", "\u7a0e\u52a1\u5e08", "Other"],
+    "physician": ["Physician", "\u533b\u5e08\u8d44\u683c", "Other"]
+}
+ceval_all_sets = list(ceval_subject_mapping.keys())
+
+ceval_datasets = []
+for _split in ["val"]:
+    for _name in ceval_all_sets:
+        _ch_name = ceval_subject_mapping[_name][1]
+        ceval_infer_cfg = dict(
+            ice_template=dict(
+                type=PromptTemplate,
+                template={
+                    answer: dict(
+                        begin="</E>",
+                        round=[
+                            dict(
+                                role="HUMAN",
+                                prompt=
+                                f"以下是中国关于{_ch_name}考试的单项选择题，请选出其中的正确答案。\n{{question}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n答案: "
+                            ),
+                            dict(role="BOT", prompt=answer),
+                        ])
+                    for answer in ["A", "B", "C", "D"]
+                },
+                ice_token="</E>",
+            ),
+            retriever=dict(type=FixKRetriever),
+            inferencer=dict(type=PPLInferencer, fix_id_list=[0, 1, 2, 3, 4]),
+        )
+
+        ceval_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
+
+        ceval_datasets.append(
+            dict(
+                type=CEvalDataset,
+                path="./data/ceval/formal_ceval",
+                name=_name,
+                abbr="ceval-" + _name if _split == "val" else "ceval-test-" +
+                _name,
+                reader_cfg=dict(
+                    input_columns=["question", "A", "B", "C", "D"],
+                    output_column="answer",
+                    train_split="dev",
+                    test_split=_split),
+                infer_cfg=ceval_infer_cfg,
+                eval_cfg=ceval_eval_cfg,
+            ))
+
+del _split, _name, _ch_name
--- a/configs/datasets/commonsenseqa/commonsenseqa_ppl_2ca33c.py
+++ b/configs/datasets/commonsenseqa/commonsenseqa_ppl_2ca33c.py
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import MDLRetriever
+from opencompass.openicl.icl_inferencer import PPLInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import commonsenseqaDataset
+
+commonsenseqa_reader_cfg = dict(
+    input_columns=['question', 'A', 'B', 'C', 'D', 'E'],
+    output_column='answerKey',
+    test_split='validation')
+
+_ice_template = dict(
+    type=PromptTemplate,
+    template={
+        ans: dict(
+            begin='</E>',
+            round=[
+                dict(role="HUMAN", prompt="Question: {question}\nAnswer: "),
+                dict(role="BOT", prompt=ans_token),
+            ])
+        for ans, ans_token in [["A", "{A}"], ["B", "{B}"],
+                               ["C", "{C}"], ["D", "{D}"],
+                               ["E", "{E}"]]
+    },
+    ice_token='</E>')
+
+commonsenseqa_infer_cfg = dict(
+    ice_template=_ice_template,
+    retriever=dict(
+        type=MDLRetriever,
+        ice_num=8,
+        candidate_num=30,
+        select_time=10,
+        seed=1,
+        batch_size=12,
+        ice_template=_ice_template),
+    inferencer=dict(type=PPLInferencer))
+
+commonsenseqa_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
+
+commonsenseqa_datasets = [
+    dict(
+        type=commonsenseqaDataset,
+        path='commonsense_qa',
+        reader_cfg=commonsenseqa_reader_cfg,
+        infer_cfg=commonsenseqa_infer_cfg,
+        eval_cfg=commonsenseqa_eval_cfg)
+]
+
+del _ice_template
--- a/configs/datasets/crowspairs/crowspairs_ppl.py
+++ b/configs/datasets/crowspairs/crowspairs_ppl.py
+from mmengine.config import read_base
+
+with read_base():
+    from .crowspairs_ppl_f60797 import crowspairs_datasets  # noqa: F401, F403
--- a/configs/datasets/drop/drop_gen_e54fe7.py
+++ b/configs/datasets/drop/drop_gen_e54fe7.py
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import EMEvaluator
+from opencompass.datasets import dropDataset
+
+drop_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template=
+        '''Text: In the county, the population was spread out with 23.50% under the age of 18, 8.70% from 18 to 24, 29.70% from 25 to 44, 24.70% from 45 to 64, and 13.30% who were 65 years of age or older.
+Question: How many more percent are under the age of 18 compared to the 18 to 24 group?
+Anawer: According to the text, 23.5% are under the age of 18, and 8.7% are from ages 18 to 24. 23.5%-8.7%=14.8%. So the answer is 14.8.
+
+Text: Playing in their second straight Thanksgiving game, the Eagles struggled especially on defense, where they were unable to stop the much-hyped Lions offense. The worst of it all was how unproven rookie Eric Rowe was tasked with covering wide receiver Calvin Johnson, leading to Johnson catching 3 touchdowns. Stafford’s five passing touchdowns, including three of them to Johnson was too much for the Eagles to overcome and for the second consecutive time this season, the Eagles gave up 45 points in a game. With the loss, the Eagles drop to 4-7 on the season and 6-1 when playing on Thanksgiving.
+Question: How many TD passes did Stafford throw other than to Johnson?
+Anawer: According to the text, Stafford threw 5 TD passes, 3 of which were to Johnson. 5-3=2. So the answer is 2.
+
+Text: {prompt}
+Question: {question}
+Anawer:'''),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer))
+
+drop_eval_cfg = dict(
+    evaluator=dict(type=EMEvaluator), pred_postprocessor=dict(
+        type='gsm8k'))  # use the same processor to find answer
+
+drop_datasets = [
+    dict(
+        abbr='drop',
+        type=dropDataset,
+        path='drop',
+        reader_cfg=dict(
+            input_columns=['prompt'],
+            output_column='answers',
+            train_split='validation',
+            test_split='validation',
+        ),
+        infer_cfg=drop_infer_cfg,
+        eval_cfg=drop_eval_cfg)
+]
--- a/configs/datasets/flores/flores_gen_45df8b.py
+++ b/configs/datasets/flores/flores_gen_45df8b.py
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import TopkRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import BleuEvaluator
+from opencompass.datasets import FloresFirst100Dataset
+
+_flores_lang_map = [
+    ["eng", "eng_Latn", "English", "Indo-European-Germanic"],
+    ["afr", "afr_Latn", "Afrikaans", "Indo-European-Germanic"],
+    ["dan", "dan_Latn", "Danish", "Indo-European-Germanic"],
+    ["deu", "deu_Latn", "German", "Indo-European-Germanic"],
+    ["isl", "isl_Latn", "Icelandic", "Indo-European-Germanic"],
+    ["ltz", "ltz_Latn", "Luxembourgish", "Indo-European-Germanic"],
+    ["nld", "nld_Latn", "Dutch", "Indo-European-Germanic"],
+    ["nob", "nob_Latn", "Norwegian", "Indo-European-Germanic"],
+    ["swe", "swe_Latn", "Swedish", "Indo-European-Germanic"],
+    ["ast", "ast_Latn", "Asturian", "Indo-European-Romance"],
+    ["cat", "cat_Latn", "Catalan", "Indo-European-Romance"],
+    ["fra", "fra_Latn", "French", "Indo-European-Romance"],
+    ["glg", "glg_Latn", "Galician", "Indo-European-Romance"],
+    ["oci", "oci_Latn", "Occitan", "Indo-European-Romance"],
+    ["por", "por_Latn", "Portuguese", "Indo-European-Romance"],
+    ["ron", "ron_Latn", "Romanian", "Indo-European-Romance"],
+    ["spa", "spa_Latn", "Spanish", "Indo-European-Romance"],
+    ["bel", "bel_Cyrl", "Belarusian", "Indo-European-Slavic"],
+    ["bos", "bos_Latn", "Bosnian", "Indo-European-Slavic"],
+    ["bul", "bul_Cyrl", "Bulgarian", "Indo-European-Slavic"],
+    ["ces", "ces_Latn", "Czech", "Indo-European-Slavic"],
+    ["hrv", "hrv_Latn", "Croatian", "Indo-European-Slavic"],
+    ["mkd", "mkd_Cyrl", "Macedonian", "Indo-European-Slavic"],
+    ["pol", "pol_Latn", "Polish", "Indo-European-Slavic"],
+    ["rus", "rus_Cyrl", "Russian", "Indo-European-Slavic"],
+    ["slk", "slk_Latn", "Slovak", "Indo-European-Slavic"],
+    ["slv", "slv_Latn", "Slovenian", "Indo-European-Slavic"],
+    ["srp", "srp_Cyrl", "Serbian", "Indo-European-Slavic"],
+    ["ukr", "ukr_Cyrl", "Ukrainian", "Indo-European-Slavic"],
+    ["asm", "asm_Beng", "Assamese", "Indo-European-Indo-Aryan"],
+    ["ben", "ben_Beng", "Bengali", "Indo-European-Indo-Aryan"],
+    ["guj", "guj_Gujr", "Gujarati", "Indo-European-Indo-Aryan"],
+    ["hin", "hin_Deva", "Hindi", "Indo-European-Indo-Aryan"],
+    ["mar", "mar_Deva", "Marathi", "Indo-European-Indo-Aryan"],
+    ["npi", "npi_Deva", "Nepali", "Indo-European-Indo-Aryan"],
+    ["ory", "ory_Orya", "Oriya", "Indo-European-Indo-Aryan"],
+    ["pan", "pan_Guru", "Punjabi", "Indo-European-Indo-Aryan"],
+    ["snd", "snd_Arab", "Sindhi", "Indo-European-Indo-Aryan"],
+    ["urd", "urd_Arab", "Urdu", "Indo-European-Indo-Aryan"],
+    ["ckb", "ckb_Arab", "Kurdish", "Indo-European-Other"],
+    ["cym", "cym_Latn", "Welsh", "Indo-European-Other"],
+    ["ell", "ell_Grek", "Greek", "Indo-European-Other"],
+    ["fas", "pes_Arab", "Persian", "Indo-European-Other"],
+    ["gle", "gle_Latn", "Irish", "Indo-European-Other"],
+    ["hye", "hye_Armn", "Armenian", "Indo-European-Other"],
+    ["ita", "ita_Latn", "Italian", "Indo-European-Other"],
+    ["lav", "lvs_Latn", "Latvian", "Indo-European-Other"],
+    ["lit", "lit_Latn", "Lithuanian", "Indo-European-Other"],
+    ["pus", "pbt_Arab", "Pashto", "Indo-European-Other"],
+    ["tgk", "tgk_Cyrl", "Tajik", "Indo-European-Other"],
+    ["ceb", "ceb_Latn", "Cebuano", "Austronesian"],
+    ["ind", "ind_Latn", "Indonesian", "Austronesian"],
+    ["jav", "jav_Latn", "Javanese", "Austronesian"],
+    ["mri", "mri_Latn", "Maori", "Austronesian"],
+    ["msa", "zsm_Latn", "Malay", "Austronesian"],
+    ["tgl", "tgl_Latn", "Tagalog", "Austronesian"],
+    ["ibo", "ibo_Latn", "Igbo", "Atlantic-Congo"],
+    ["kam", "kam_Latn", "Kamba", "Atlantic-Congo"],
+    ["kea", "kea_Latn", "Kabuverdianu", "Atlantic-Congo"],
+    ["lin", "lin_Latn", "Lingala", "Atlantic-Congo"],
+    ["lug", "lug_Latn", "Luganda", "Atlantic-Congo"],
+    ["nso", "nso_Latn", "Northern Sotho", "Atlantic-Congo"],
+    ["nya", "nya_Latn", "Nyanja", "Atlantic-Congo"],
+    ["sna", "sna_Latn", "Shona", "Atlantic-Congo"],
+    ["swh", "swh_Latn", "Swahili", "Atlantic-Congo"],
+    ["umb", "umb_Latn", "Umbundu", "Atlantic-Congo"],
+    ["wol", "wol_Latn", "Wolof", "Atlantic-Congo"],
+    ["xho", "xho_Latn", "Xhosa", "Atlantic-Congo"],
+    ["yor", "yor_Latn", "Yoruba", "Atlantic-Congo"],
+    ["zul", "zul_Latn", "Zulu", "Atlantic-Congo"],
+    ["amh", "amh_Ethi", "Amharic", "Afro-Asiatic"],
+    ["ara", "arb_Arab", "Arabic", "Afro-Asiatic"],
+    ["ful", "fuv_Latn", "Fulah", "Afro-Asiatic"],
+    ["mlt", "mlt_Latn", "Maltese", "Afro-Asiatic"],
+    ["orm", "gaz_Latn", "Oromo", "Afro-Asiatic"],
+    ["som", "som_Latn", "Somali", "Afro-Asiatic"],
+    ["azj", "azj_Latn", "Azerbaijani", "Turkic"],
+    ["kaz", "kaz_Cyrl", "Kazakh", "Turkic"],
+    ["kir", "kir_Cyrl", "Kyrgyz", "Turkic"],
+    ["tur", "tur_Latn", "Turkish", "Turkic"],
+    ["uzb", "uzn_Latn", "Uzbek", "Turkic"],
+    ["kan", "kan_Knda", "Kannada", "Dravidian"],
+    ["mal", "mal_Mlym", "Malayalam", "Dravidian"],
+    ["tam", "tam_Taml", "Tamil", "Dravidian"],
+    ["tel", "tel_Telu", "Telugu", "Dravidian"],
+    ["mya", "mya_Mymr", "Burmese", "Sino-Tibetan"],
+    ["zho_simpl", "zho_Hans", "Chinese (Simpl)", "Sino-Tibetan"],
+    ["zho_trad", "zho_Hant", "Chinese (Trad)", "Sino-Tibetan"],
+    ["est", "est_Latn", "Estonian", "Other"],
+    ["fin", "fin_Latn", "Finnish", "Other"],
+    ["hau", "hau_Latn", "Hausa", "Other"],
+    ["heb", "heb_Hebr", "Hebrew", "Other"],
+    ["hun", "hun_Latn", "Hungarian", "Other"],
+    ["jpn", "jpn_Jpan", "Japanese", "Other"],
+    ["kat", "kat_Geor", "Georgian", "Other"],
+    ["khm", "khm_Khmr", "Khmer", "Other"],
+    ["kor", "kor_Hang", "Korean", "Other"],
+    ["lao", "lao_Laoo", "Lao", "Other"],
+    ["luo", "luo_Latn", "Luo", "Other"],
+    ["mon", "khk_Cyrl", "Mongolian", "Other"],
+    ["tha", "tha_Thai", "Thai", "Other"],
+    ["vie", "vie_Latn", "Vietnamese", "Other"],
+]
+flores_lang_map = {i[0]: i for i in _flores_lang_map}
+_flores_subtasks = [f"eng-{i}" for i in flores_lang_map if i != "eng"
+                    ] + [f"{i}-eng" for i in flores_lang_map if i != "eng"]
+
+flores_datasets = []
+for _flores_subtask in _flores_subtasks:
+    _src, _tgt = _flores_subtask.split("-")
+    _, _flores_source, _src_inst, _ = flores_lang_map[_src]
+    _, _flores_target, _tgt_inst, _ = flores_lang_map[_tgt]
+
+    flores_infer_cfg = dict(
+        ice_template=dict(
+            type=PromptTemplate,
+            template=f"</E>{{sentence_{_flores_source}}} = {{sentence_{_flores_target}}}" if _flores_subtask != "zho_simpl-eng"
+            else f"</E>Chinese: {{sentence_{_flores_source}}}\nEnglish: {{sentence_{_flores_target}}}",
+            ice_token="</E>",
+        ),
+        retriever=dict(type=TopkRetriever, ice_num=8),
+        inferencer=dict(type=GenInferencer),
+    )
+    flores_eval_cfg = dict(
+        evaluator=dict(type=BleuEvaluator),
+        pred_role="BOT",
+        pred_postprocessor=dict(type="flores"),
+        dataset_postprocessor=dict(type="flores"),
+    )
+    if _tgt == "zho_simpl":
+        flores_eval_cfg["pred_postprocessor"] = dict(type="flores-chinese")
+        flores_eval_cfg["dataset_postprocessor"] = dict(type="flores-chinese")
+    flores_datasets.append(
+        dict(
+            type=FloresFirst100Dataset,
+            abbr=f"flores_100_{_src}-{_tgt}",
+            name=f"{_flores_source}-{_flores_target}",
+            reader_cfg=dict(
+                input_columns=f"sentence_{_flores_source}",
+                output_column=f"sentence_{_flores_target}",
+                train_split="dev",
+                test_split="devtest"),
+            infer_cfg=flores_infer_cfg.copy(),
+            eval_cfg=flores_eval_cfg.copy(),
+        ))
+
+del _flores_lang_map, _flores_subtask, _src, _tgt, _, _flores_source, _src_inst, _flores_target, _tgt_inst
--- a/configs/datasets/flores/flores_gen_8eb9ca.py
+++ b/configs/datasets/flores/flores_gen_8eb9ca.py
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import TopkRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import BleuEvaluator
+from opencompass.datasets import FloresFirst100Dataset
+
+_flores_lang_map = [
+    ["eng", "eng_Latn", "English", "Indo-European-Germanic"],
+    ["afr", "afr_Latn", "Afrikaans", "Indo-European-Germanic"],
+    ["dan", "dan_Latn", "Danish", "Indo-European-Germanic"],
+    ["deu", "deu_Latn", "German", "Indo-European-Germanic"],
+    ["isl", "isl_Latn", "Icelandic", "Indo-European-Germanic"],
+    ["ltz", "ltz_Latn", "Luxembourgish", "Indo-European-Germanic"],
+    ["nld", "nld_Latn", "Dutch", "Indo-European-Germanic"],
+    ["nob", "nob_Latn", "Norwegian", "Indo-European-Germanic"],
+    ["swe", "swe_Latn", "Swedish", "Indo-European-Germanic"],
+    ["ast", "ast_Latn", "Asturian", "Indo-European-Romance"],
+    ["cat", "cat_Latn", "Catalan", "Indo-European-Romance"],
+    ["fra", "fra_Latn", "French", "Indo-European-Romance"],
+    ["glg", "glg_Latn", "Galician", "Indo-European-Romance"],
+    ["oci", "oci_Latn", "Occitan", "Indo-European-Romance"],
+    ["por", "por_Latn", "Portuguese", "Indo-European-Romance"],
+    ["ron", "ron_Latn", "Romanian", "Indo-European-Romance"],
+    ["spa", "spa_Latn", "Spanish", "Indo-European-Romance"],
+    ["bel", "bel_Cyrl", "Belarusian", "Indo-European-Slavic"],
+    ["bos", "bos_Latn", "Bosnian", "Indo-European-Slavic"],
+    ["bul", "bul_Cyrl", "Bulgarian", "Indo-European-Slavic"],
+    ["ces", "ces_Latn", "Czech", "Indo-European-Slavic"],
+    ["hrv", "hrv_Latn", "Croatian", "Indo-European-Slavic"],
+    ["mkd", "mkd_Cyrl", "Macedonian", "Indo-European-Slavic"],
+    ["pol", "pol_Latn", "Polish", "Indo-European-Slavic"],
+    ["rus", "rus_Cyrl", "Russian", "Indo-European-Slavic"],
+    ["slk", "slk_Latn", "Slovak", "Indo-European-Slavic"],
+    ["slv", "slv_Latn", "Slovenian", "Indo-European-Slavic"],
+    ["srp", "srp_Cyrl", "Serbian", "Indo-European-Slavic"],
+    ["ukr", "ukr_Cyrl", "Ukrainian", "Indo-European-Slavic"],
+    ["asm", "asm_Beng", "Assamese", "Indo-European-Indo-Aryan"],
+    ["ben", "ben_Beng", "Bengali", "Indo-European-Indo-Aryan"],
+    ["guj", "guj_Gujr", "Gujarati", "Indo-European-Indo-Aryan"],
+    ["hin", "hin_Deva", "Hindi", "Indo-European-Indo-Aryan"],
+    ["mar", "mar_Deva", "Marathi", "Indo-European-Indo-Aryan"],
+    ["npi", "npi_Deva", "Nepali", "Indo-European-Indo-Aryan"],
+    ["ory", "ory_Orya", "Oriya", "Indo-European-Indo-Aryan"],
+    ["pan", "pan_Guru", "Punjabi", "Indo-European-Indo-Aryan"],
+    ["snd", "snd_Arab", "Sindhi", "Indo-European-Indo-Aryan"],
+    ["urd", "urd_Arab", "Urdu", "Indo-European-Indo-Aryan"],
+    ["ckb", "ckb_Arab", "Kurdish", "Indo-European-Other"],
+    ["cym", "cym_Latn", "Welsh", "Indo-European-Other"],
+    ["ell", "ell_Grek", "Greek", "Indo-European-Other"],
+    ["fas", "pes_Arab", "Persian", "Indo-European-Other"],
+    ["gle", "gle_Latn", "Irish", "Indo-European-Other"],
+    ["hye", "hye_Armn", "Armenian", "Indo-European-Other"],
+    ["ita", "ita_Latn", "Italian", "Indo-European-Other"],
+    ["lav", "lvs_Latn", "Latvian", "Indo-European-Other"],
+    ["lit", "lit_Latn", "Lithuanian", "Indo-European-Other"],
+    ["pus", "pbt_Arab", "Pashto", "Indo-European-Other"],
+    ["tgk", "tgk_Cyrl", "Tajik", "Indo-European-Other"],
+    ["ceb", "ceb_Latn", "Cebuano", "Austronesian"],
+    ["ind", "ind_Latn", "Indonesian", "Austronesian"],
+    ["jav", "jav_Latn", "Javanese", "Austronesian"],
+    ["mri", "mri_Latn", "Maori", "Austronesian"],
+    ["msa", "zsm_Latn", "Malay", "Austronesian"],
+    ["tgl", "tgl_Latn", "Tagalog", "Austronesian"],
+    ["ibo", "ibo_Latn", "Igbo", "Atlantic-Congo"],
+    ["kam", "kam_Latn", "Kamba", "Atlantic-Congo"],
+    ["kea", "kea_Latn", "Kabuverdianu", "Atlantic-Congo"],
+    ["lin", "lin_Latn", "Lingala", "Atlantic-Congo"],
+    ["lug", "lug_Latn", "Luganda", "Atlantic-Congo"],
+    ["nso", "nso_Latn", "Northern Sotho", "Atlantic-Congo"],
+    ["nya", "nya_Latn", "Nyanja", "Atlantic-Congo"],
+    ["sna", "sna_Latn", "Shona", "Atlantic-Congo"],
+    ["swh", "swh_Latn", "Swahili", "Atlantic-Congo"],
+    ["umb", "umb_Latn", "Umbundu", "Atlantic-Congo"],
+    ["wol", "wol_Latn", "Wolof", "Atlantic-Congo"],
+    ["xho", "xho_Latn", "Xhosa", "Atlantic-Congo"],
+    ["yor", "yor_Latn", "Yoruba", "Atlantic-Congo"],
+    ["zul", "zul_Latn", "Zulu", "Atlantic-Congo"],
+    ["amh", "amh_Ethi", "Amharic", "Afro-Asiatic"],
+    ["ara", "arb_Arab", "Arabic", "Afro-Asiatic"],
+    ["ful", "fuv_Latn", "Fulah", "Afro-Asiatic"],
+    ["mlt", "mlt_Latn", "Maltese", "Afro-Asiatic"],
+    ["orm", "gaz_Latn", "Oromo", "Afro-Asiatic"],
+    ["som", "som_Latn", "Somali", "Afro-Asiatic"],
+    ["azj", "azj_Latn", "Azerbaijani", "Turkic"],
+    ["kaz", "kaz_Cyrl", "Kazakh", "Turkic"],
+    ["kir", "kir_Cyrl", "Kyrgyz", "Turkic"],
+    ["tur", "tur_Latn", "Turkish", "Turkic"],
+    ["uzb", "uzn_Latn", "Uzbek", "Turkic"],
+    ["kan", "kan_Knda", "Kannada", "Dravidian"],
+    ["mal", "mal_Mlym", "Malayalam", "Dravidian"],
+    ["tam", "tam_Taml", "Tamil", "Dravidian"],
+    ["tel", "tel_Telu", "Telugu", "Dravidian"],
+    ["mya", "mya_Mymr", "Burmese", "Sino-Tibetan"],
+    ["zho_simpl", "zho_Hans", "Chinese (Simpl)", "Sino-Tibetan"],
+    ["zho_trad", "zho_Hant", "Chinese (Trad)", "Sino-Tibetan"],
+    ["est", "est_Latn", "Estonian", "Other"],
+    ["fin", "fin_Latn", "Finnish", "Other"],
+    ["hau", "hau_Latn", "Hausa", "Other"],
+    ["heb", "heb_Hebr", "Hebrew", "Other"],
+    ["hun", "hun_Latn", "Hungarian", "Other"],
+    ["jpn", "jpn_Jpan", "Japanese", "Other"],
+    ["kat", "kat_Geor", "Georgian", "Other"],
+    ["khm", "khm_Khmr", "Khmer", "Other"],
+    ["kor", "kor_Hang", "Korean", "Other"],
+    ["lao", "lao_Laoo", "Lao", "Other"],
+    ["luo", "luo_Latn", "Luo", "Other"],
+    ["mon", "khk_Cyrl", "Mongolian", "Other"],
+    ["tha", "tha_Thai", "Thai", "Other"],
+    ["vie", "vie_Latn", "Vietnamese", "Other"],
+]
+flores_lang_map = {i[0]: i for i in _flores_lang_map}
+_flores_subtasks = [f"eng-{i}" for i in flores_lang_map if i != "eng"
+                    ] + [f"{i}-eng" for i in flores_lang_map if i != "eng"]
+
+flores_datasets = []
+for _flores_subtask in _flores_subtasks:
+    _src, _tgt = _flores_subtask.split("-")
+    _, _flores_source, _src_inst, _ = flores_lang_map[_src]
+    _, _flores_target, _tgt_inst, _ = flores_lang_map[_tgt]
+
+    flores_infer_cfg = dict(
+        ice_template=dict(
+            type=PromptTemplate,
+            template=dict(
+                begin="</E>",
+                round=[
+                    dict(
+                        role="HUMAN",
+                        prompt=
+                        f"Translate the following {_src_inst} statements to {_tgt_inst}.\n{{sentence_{_flores_source}}}"
+                    ),
+                    dict(role="BOT", prompt=f"{{sentence_{_flores_target}}}"),
+                ],
+            ),
+            ice_token="</E>",
+        ),
+        retriever=dict(type=TopkRetriever, ice_num=8),
+        inferencer=dict(type=GenInferencer),
+    )
+    flores_eval_cfg = dict(
+        evaluator=dict(type=BleuEvaluator),
+        pred_role="BOT",
+        pred_postprocessor=dict(type="flores"),
+        dataset_postprocessor=dict(type="flores"),
+    )
+    if _tgt == "zho_simpl":
+        flores_eval_cfg["pred_postprocessor"] = dict(type="flores-chinese")
+        flores_eval_cfg["dataset_postprocessor"] = dict(type="flores-chinese")
+    flores_datasets.append(
+        dict(
+            type=FloresFirst100Dataset,
+            abbr=f"flores_100_{_src}-{_tgt}",
+            name=f"{_flores_source}-{_flores_target}",
+            reader_cfg=dict(
+                input_columns=f"sentence_{_flores_source}",
+                output_column=f"sentence_{_flores_target}",
+                train_split="dev",
+                test_split="devtest"),
+            infer_cfg=flores_infer_cfg.copy(),
+            eval_cfg=flores_eval_cfg.copy(),
+        ))
+
+del _flores_lang_map, _flores_subtask, _src, _tgt, _, _flores_source, _src_inst, _flores_target, _tgt_inst
--- a/configs/datasets/glm/GaokaoBench.py
+++ b/configs/datasets/glm/GaokaoBench.py
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer, GLMChoiceInferencer
+from opencompass.datasets import GaokaoBenchDataset
+
+MCQ_TMPL = """\
+请你做一道{type}。
+请你一步一步思考并将思考过程写在[解析]和<eoe>之间。你将从A，B，C，D中选出正确的答案，并写在【答案]和<eoa>之间。
+例如：[答案]: A <eoa>
+完整的题目回答的格式如下：
+回答：[解析] ... <eoe>
+[答案] ... <eoa>
+请你严格按照上述格式作答。
+题目如下：{{question}}
+回答："""
+
+MULTI_MCQ_TMPL = """\
+请你做一道{type}。
+请你一步一步思考。每一题你将从A，B，C，D中选出正确的答案，并写在[答案]和<eoa>之间。
+例如：（1）[答案] A <eoa>
+(2)[答案] B <eoa>
+请你严格按照上述格式作答。
+题目如下：{{question}}
+回答："""
+
+CLOZE_TMPL = """\
+请你做一道{type}。
+请你一步一步思考。将符合题意的五个选项的字母写在[答案]和<eoa>之间。
+例如：[答案] A B C D E <eoa>
+请严格按照上述格式作答。
+题目如下：{{question}}
+回答："""
+
+_MCQ_prompts = [
+    {
+        "type": "single_choice",
+        "keyword": "2010-2022_Math_II_MCQs",
+        "prefix_prompt": MCQ_TMPL.format(type='数学选择题'),
+        "comment": ""
+    },
+    {
+        "type": "single_choice",
+        "keyword": "2010-2022_Math_I_MCQs",
+        "prefix_prompt": MCQ_TMPL.format(type='数学选择题'),
+        "comment": ""
+    },
+    {
+        "type": "single_choice",
+        "keyword": "2010-2022_History_MCQs",
+        "prefix_prompt": MCQ_TMPL.format(type='历史选择题'),
+    },
+    {
+        "type": "single_choice",
+        "keyword": "2010-2022_Biology_MCQs",
+        "prefix_prompt": MCQ_TMPL.format(type='生物选择题'),
+    },
+    {
+        "type": "single_choice",
+        "keyword": "2010-2022_Political_Science_MCQs",
+        "prefix_prompt": MCQ_TMPL.format(type='政治选择题'),
+    },
+    {
+        "type": "multi_choice",
+        "keyword": "2010-2022_Physics_MCQs",
+        "prefix_prompt": MCQ_TMPL.format(type='物理选择题'),
+    },
+    {
+        "type": "single_choice",
+        "keyword": "2010-2022_Chemistry_MCQs",
+        "prefix_prompt": MCQ_TMPL.format(type='化学选择题'),
+    },
+    {
+        "type": "single_choice",
+        "keyword": "2010-2013_English_MCQs",
+        "prefix_prompt": MCQ_TMPL.format(type='英语选择题'),
+    },
+    {
+        "type": "multi_question_choice",
+        "keyword": "2010-2022_Chinese_Modern_Lit",
+        "prefix_prompt": MULTI_MCQ_TMPL.format(type='语文阅读理解题，其中包含三个小题'),
+    },
+    {
+        "type": "multi_question_choice",
+        "keyword": "2010-2022_English_Fill_in_Blanks",
+        "prefix_prompt": MULTI_MCQ_TMPL.format(type='英语完形填空题,其中包含二十个小题'),
+    },
+    {
+        "type": "five_out_of_seven",
+        "keyword": "2012-2022_English_Cloze_Test",
+        "prefix_prompt": CLOZE_TMPL.format(type='英语完形填空题'),
+    },
+    {
+        "type": "multi_question_choice",
+        "keyword": "2010-2022_Geography_MCQs",
+        "prefix_prompt": MULTI_MCQ_TMPL.format(type='地理选择题'),
+    },
+    {
+        "type": "multi_question_choice",
+        "keyword": "2010-2022_English_Reading_Comp",
+        "prefix_prompt": MULTI_MCQ_TMPL.format(type='英语阅读理解题，其中包含三到五个小题。'),
+    },
+    {
+        "type": "multi_question_choice",
+        "keyword": "2010-2022_Chinese_Lang_and_Usage_MCQs",
+        "prefix_prompt": MCQ_TMPL.format(type='语文选择题'),
+    },
+]
+
+_FBQ_prompts = [{
+    "type": "cloze",
+    "keyword": "2010-2022_Math_I_Fill-in-the-Blank",
+    "prefix_prompt":
+    "请解答下面的数学填空题\n仔细阅读题目，解答其中的问题，请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的答案写在【答案】和<eoa>之间。\n完整的题目回答格式如下：\n【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答。\n题目如下:",
+    "comment": ""
+}, {
+    "type": "cloze",
+    "keyword": "2010-2022_Math_II_Fill-in-the-Blank",
+    "prefix_prompt":
+    "请解答下面的数学填空题\n仔细阅读题目，解答其中的问题，请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的答案写在【答案】和<eoa>之间。\n完整的题目回答格式如下：\n【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答。\n题目如下:",
+    "comment": ""
+}, {
+    "type": "cloze",
+    "keyword":
+    "2010-2022_Chinese_Language_Famous_Passages_and_Sentences_Dictation",
+    "prefix_prompt":
+    "请回答下面的语文填空题\n请你仔细阅读题目，先找到题目对应的中国名篇，再从名篇中找到合适的句子填写到题目的空白处。请你将思考过程写在【解析】和<eoe>之间，将最终答案写在【答案】和<eoa>之间。\n完整的题目回答格式如下：\n（1）【解析】 ...<eoe>\n【答案】...<eoa>\n（2）【解析】 ...<eoe>\n【答案】...<eoa>\n请严格按照上述格式作答，如果不止一道题，请分别作答。\n题目如下:",
+    "comment": ""
+}, {
+    "type": "cloze",
+    "keyword": "2014-2022_English_Language_Cloze_Passage",
+    "prefix_prompt":
+    "请回答下面的英语短文填词题\n仔细阅读题目，空白处请填入一个适当单词或者括号内单词的正确形式。请你一步步思考，将思考过程写在【解析】和<eoe>之间，将最终答案写在【答案】和<eoa>之间。\n完整的题目回答格式如下：\n（1）【解析】 ...<eoe>\n【答案】...<eoa>\n（2）【解析】 ...<eoe>\n【答案】...<eoa>\n请严格按照上述格式作答，如果不止一道题，请分别作答。\n题目如下:",
+    "comment": ""
+}]
+_OEQ_prompts = [
+    {
+        "type": "subjective",
+        "keyword": "2010-2022_Geography_Open-ended_Questions",
+        "prefix_prompt":
+        "请解答下面的地理解答题\n仔细阅读题目并充分结合你已有的知识，解答其中的问题，请你一步步思考并将思考过程写在【解析】和<eoe>之间。你的答案请写在【答案】和<eoa>之间\n完整的题目回答格式如下：\n(1)【解析】 ...<eoe>\n【答案】...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答，如果不止一道题，请分别作答。\n题目如下：",
+        "comment": ""
+    },
+    {
+        "type": "subjective",
+        "keyword": "2010-2022_Chemistry_Open-ended_Questions",
+        "prefix_prompt":
+        "请解答下面的化学解答题\n仔细阅读题目并充分结合你已有的知识，解答其中的问题，请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的答案写在【答案】和<eoa>之间\n完整的题目回答格式如下：\n(1)【解析】 ...<eoe>\n【答案】...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答，如果不止一道题，请分别作答。\n题目如下:",
+        "comment": ""
+    },
+    {
+        "type": "subjective",
+        "keyword": "2010-2022_Math_I_Open-ended_Questions",
+        "prefix_prompt":
+        "请解答下面的数学解答题\n仔细阅读题目并充分结合你已有的知识，解答其中的问题，请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的答案写在【答案】和<eoa>之间，答案需要有完整的解题步骤。\n完整的题目回答格式如下：\n(1)【解析】 ...<eoe>\n【答案】...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答，如果不止一道题，请分别作答。\n题目如下:",
+        "comment": ""
+    },
+    {
+        "type": "subjective",
+        "keyword": "2010-2022_History_Open-ended_Questions",
+        "prefix_prompt":
+        "请解答下面的历史解答题\n仔细阅读材料和题目，并充分结合你已有的知识，解答其中的问题。请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的答案写在【答案】和<eoa>之间\n完整的题目回答格式如下：\n(1)【解析】 ...<eoe>\n【答案】...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答，如果不止一道题，请分别作答。\n题目如下:",
+        "comment": ""
+    },
+    {
+        "type": "subjective",
+        "keyword": "2010-2022_Biology_Open-ended_Questions",
+        "prefix_prompt":
+        "请解答下面的生物解答题\n仔细阅读题目并充分结合你已有的知识，解答其中的问题，请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的答案写在【答案】和<eoa>之间,同一小题的答案用\t分隔开。\n完整的题目回答格式如下：\n(1)【解析】 ...<eoe>\n【答案】...\t...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...\t...<eoa>\n请你严格按照上述格式作答，如果不止一道题，请分别作答。\n题目如下:",
+        "comment": ""
+    },
+    {
+        "type": "subjective",
+        "keyword": "2010-2022_Math_II_Open-ended_Questions",
+        "prefix_prompt":
+        "请解答下面的数学解答题\n仔细阅读题目并充分结合你已有的知识，解答其中的问题，请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的答案写在【答案】和<eoa>之间，答案需要有完整的解题步骤。\n完整的题目回答格式如下：\n(1)【解析】 ...<eoe>\n【答案】...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答，如果不止一道题，请分别作答。\n题目如下:",
+        "comment": ""
+    },
+    {
+        "type": "subjective",
+        "keyword": "2010-2022_Physics_Open-ended_Questions",
+        "prefix_prompt":
+        "请解答下面的物理解答题，仔细阅读题目，注意其中可能含有单选题和多选题。请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的最终答案写在【答案】和<eoa>之间。选择题你要从选项中选出符合题意的答案，例如“【答案】A <eoa>”。\n完整的题目回答格式如下：（1）【解析】 ...<eoe>\n【答案】 ...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答。如果不止一道题，请分别作答。\n题目如下:",
+        "comment": ""
+    },
+    {
+        "type": "subjective",
+        "keyword": "2010-2022_Political_Science_Open-ended_Questions",
+        "prefix_prompt":
+        "请解答下面的政治解答题\n仔细阅读材料和题目，并充分结合你已有的知识，解答其中的问题，请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的答案写在【答案】和<eoa>之间\n完整的题目回答格式如下：\n(1)【解析】 ...<eoe>\n【答案】...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答，如果不止一道题，请分别作答。\n题目如下:",
+        "comment": ""
+    },
+    {
+        "type": "correction",
+        "keyword": "2012-2022_English_Language_Error_Correction",
+        "prefix_prompt":
+        "请解答下面的英语短文改错题，仔细阅读题目并充分结合你你已有的知识，找出其中10处需要改动的地方。请你一步步思考，把修改后的短文写在【答案】和<eoa>之间。\n完整的题目回答格式如下：【答案】 ...<eoa>\n 请你严格按照上述格式作答。\n题目如下:",
+        # "prefix_prompt": [
+        #     "请解答下面的英语短文改错题，仔细阅读题目并充分结合你你已有的知识，找出其中10处需要改动的地方。请你一步步思考，把修改后的短文写在【答案】和<eoa>之间。\n完整的题目回答格式如下：【答案】 ...<eoa>\n 请你严格按照上述格式作答。\n题目如下:",
+        #     "请比较下面两篇短文，找到第二篇和第一篇的10处不同，每处不同只涉及一个单词，请将结果写在【答案】和<eoa>之间。例如：【答案】1. 将play改为plays\n 2.增加了the\n ... <eoa>\n 完整的题目回答格式如下：【答案】(1) ... \n (2) ...\n ...(10) ...\n<eoa>\n请你严格按照上述格式作答。\n短文如下:"
+        # ],
+        "comment": ""
+    },
+    {
+        "type": "subjective",
+        "keyword": "2010-2022_Chinese_Language_Ancient_Poetry_Reading",
+        "prefix_prompt":
+        "请解答下面的语文古代诗歌阅读题，仔细阅读题目，注意其中可能含有单选题和多选题。请你一步步思考并将最终答案写在【答案】和<eoa>之间。选择题你要从选项中选出符合题意的答案，例如“【答案】A <eoa>”。\n完整的题目回答格式如下：（1）【答案】 ...<eoa>\n (2)【答案】...<eoa>\n请你严格按照上述格式作答，如果不止一道题，请分别作答。\n题目如下:",
+        "comment": ""
+    },
+    {
+        "type": "subjective",
+        "keyword": "2010-2022_Chinese_Language_Practical_Text_Reading",
+        "prefix_prompt":
+        "请解答下面的语文实用类文本阅读，仔细阅读题目，注意其中可能含有单选题和多选题。请你一步步思考并将最终答案写在【答案】和<eoa>之间。选择题你要从选项中选出符合题意的答案，例如“【答案】A <eoa>”。\n完整的题目回答格式如下：（1）[答案】 ...<eoa>\n (2)【答案】...<eoa>\n请你严格按照上述格式作答，如果不止一道题，请分别作答。\n题目如下:",
+        "comment": ""
+    },
+    {
+        "type": "subjective",
+        "keyword": "2010-2022_Chinese_Language_Literary_Text_Reading",
+        "prefix_prompt":
+        "请解答下面的语文文学类文本阅读，仔细阅读题目，注意其中可能含有单选题和多选题。请你一步步思考并将最终答案写在【答案】和<eoa>之间。选择题你要从选项中选出符合题意的答案，例如“【答案】A <eoa>”。\n完整的题目回答格式如下：（1）[答案】 ...<eoa>\n (2)【答案】...<eoa>\n请你严格按照上述格式作答,如果不止一道题，请分别作答。\n题目如下:",
+        "comment": ""
+    },
+    {
+        "type": "subjective",
+        "keyword": "2010-2022_Chinese_Language_Classical_Chinese_Reading",
+        "prefix_prompt":
+        "请解答下面的语文文言文阅读，仔细阅读题目，前三题是单选题，最后一题要将文言文翻译为现代汉语。请你一步步思考并把最终答案写在【答案】和<eoa>之间。选择题你要从选项中选出符合题意的答案，例如“【答案】A <eoa>”。翻译题把翻译后的现代汉语句子写在【答案】后面，例如”【答案】今天天气很好 <eoa>”\n完整的题目回答格式如下：（1）[答案】 ...<eoa>\n (2)【答案】...<eoa>\n请你严格按照上述格式作答，如果不止一道题，请分别作答。\n题目如下:",
+        "comment": ""
+    },
+    {
+        "type": "subjective",
+        "keyword":
+        "2010-2022_Chinese_Language_Language_and_Writing_Skills_Open-ended_Questions",
+        "prefix_prompt":
+        "请解答下面的语文解答题，仔细阅读题目，注意其中可能含有选择题。请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的最终答案写在【答案】和<eoa>之间。选择题你要从选项中选出符合题意的答案，例如“【答案】A <eoa>”。\n完整的题目回答格式如下：（1）【解析】 ...<eoe>\n【答案】 ...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答。如果不止一道题，请分别作答。\n题目如下:",
+        "comment": ""
+    }
+]
+
+gaokao_bench_datasets = []
+for _folder, _prompts in [
+    ("Multiple-choice_Questions", _MCQ_prompts),
+    ("Fill-in-the-blank_Questions", _FBQ_prompts),
+    ("Open-ended_Questions", _OEQ_prompts),
+]:
+    for _p in _prompts:
+        if _p['type'] == "single_choice":
+            continue
+        _reader_cfg = {
+            "input_columns": ['question'],
+            "output_column": 'answer',
+        }
+        _infer_cfg = {
+            "ice_template": {
+                "type": PromptTemplate,
+                "template": {
+                    "round": [{
+                        "role":
+                        "HUMAN",
+                        "prompt":
+                        _p['prefix_prompt'].format(question='</question>')
+                    }]
+                },
+                "column_token_map": {
+                    "question": "</question>"
+                },
+                "ice_token": "</E>"
+            },
+            "retriever": {
+                "type": ZeroRetriever
+            },
+            "inferencer": {
+                "type": GenInferencer
+            }
+        }
+        _eval_cfg = {
+            "evaluator": {
+                "type": "GaokaoBenchEvaluator" + "_" + _p['type'],
+            },
+            "pred_role": "BOT",
+        }
+        _base_path = './data/GAOKAO-BENCH/data'
+        _dataset = {
+            "type": GaokaoBenchDataset,
+            "abbr": "GaokaoBench_" + _p['keyword'],
+            "path": _base_path + '/' + _folder + '/' + _p['keyword'] + ".json",
+            "reader_cfg": _reader_cfg,
+            "infer_cfg": _infer_cfg,
+            "eval_cfg": _eval_cfg,
+        }
+
+        gaokao_bench_datasets.append(_dataset)
+
+# Single choice dataset
+_folder = "Multiple-choice_Questions"
+for _p in _MCQ_prompts:
+    if _p['type'] != "single_choice":
+        continue
+    _reader_cfg = {
+        "input_columns": ['question'],
+        "output_column": 'answer',
+    }
+    _infer_cfg = {
+        "ice_template": {
+            "type": PromptTemplate,
+            "template": {
+                answer: {
+                    "round": [{
+                        "role": "HUMAN",
+                        "prompt": _p['prefix_prompt'] + '</question>'
+                    }, {
+                        "role": "BOT",
+                        "prompt": f"【答案】{answer} <eoa>"
+                    }]
+                }
+                for answer in ['A', 'B', 'C', 'D']
+            },
+            "column_token_map": {
+                "question": "</question>"
+            },
+            "ice_token": "</E>"
+        },
+        "prompt_template": {
+            "type": PromptTemplate,
+            "template": {
+                "round": [{
+                    "role": "HUMAN",
+                    "prompt": _p['prefix_prompt'] + '</question>'
+                }, {
+                    "role": "BOT",
+                    "prompt": "【答案】("
+                }]
+            },
+            "column_token_map": {
+                "question": "</question>"
+            },
+        },
+        "retriever": {
+            "type": ZeroRetriever
+        },
+        "inferencer": {
+            "type": GLMChoiceInferencer,
+            "choices": ['A', 'B', 'C', 'D'],
+        }
+    }
+    _eval_cfg = {
+        "evaluator": {
+            "type": "GaokaoBenchEvaluator" + "_" + _p['type'],
+        },
+        "pred_role": "BOT",
+    }
+    _base_path = './data/GAOKAO-BENCH/data'
+    _dataset = {
+        "type": GaokaoBenchDataset,
+        "abbr": "GaokaoBench_" + _p['keyword'],
+        "path": _base_path + '/' + _folder + '/' + _p['keyword'] + ".json",
+        "reader_cfg": _reader_cfg,
+        "infer_cfg": _infer_cfg,
+        "eval_cfg": _eval_cfg,
+    }
+
+    gaokao_bench_datasets.append(_dataset)
+
+_temporary_variables = [k for k in globals() if k.startswith('_')]
+for _t in _temporary_variables:
+    del globals()[_t]
+del _temporary_variables, _t
--- a/configs/datasets/glm/_all_.py
+++ b/configs/datasets/glm/_all_.py
+_base_ = [
+    'bustm.py',
+    'afqmc.py',
+    'eprstmt.py',
+    'ocnli_fc.py',
+    'ocnli.py',
+    'cmnli.py',
+    'csl.py',
+    'chid.py',
+    'cluewsc.py',
+    'tnews.py',
+    'C3.py',
+    'CMRC.py',
+    'DRCD.py',
+    'lcsts.py',
+    'piqa.py',
+    'commonsenseqa.py',
+    'gsm8k.py',
+    'flores.py',
+    'humaneval.py',
+    'mbpp.py',
+    'triviaqa.py',
+    'nq.py',
+    'agieval.py',
+    'mmlu.py',
+    'ceval.py',
+]
+
+datasets = []
+for k, v in _base_.items():
+    if k.endswith("_datasets"):
+        datasets += v
--- a/configs/datasets/glm/afqmc.py
+++ b/configs/datasets/glm/afqmc.py
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import PPLInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import HFDataset
+
+afqmc_reader_cfg = dict(
+    input_columns=['sentence1', 'sentence2'],
+    output_column='label',
+    test_split='train')
+
+afqmc_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template={
+            #  0: "{sentence1},{sentence2}不同。",
+            #  1: "{sentence1},{sentence2}相似。"
+            0:
+            "I received the questions \"{sentence1}\" and \"{sentence2}\". Are they duplicates?[MASK]no",
+            1:
+            "I received the questions \"{sentence1}\" and \"{sentence2}\". Are they duplicates?[MASK]yes",
+        }),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=PPLInferencer))
+
+afqmc_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
+
+afqmc_datasets = [
+    dict(
+        type=HFDataset,
+        abbr='afqmc',
+        path='json',
+        data_files='./data/CLUE/AFQMC/test_public.json',
+        split='train',
+        reader_cfg=afqmc_reader_cfg,
+        infer_cfg=afqmc_infer_cfg,
+        eval_cfg=afqmc_eval_cfg)
+]
--- a/configs/datasets/glm/agieval.py
+++ b/configs/datasets/glm/agieval.py
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GLMChoiceInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import AGIEvalDataset
+
+agieval_reader_cfg = dict(
+    input_columns=['problem_input'], output_column='label')
+
+agieval_single_choice_sets = [
+    'gaokao-chinese',
+    'gaokao-english',
+    'gaokao-geography',
+    'gaokao-history',
+    'gaokao-biology',
+    'gaokao-chemistry',
+    'gaokao-physics',
+    'gaokao-mathqa',
+    'logiqa-zh',
+    'lsat-ar',
+    'lsat-lr',
+    'lsat-rc',
+    'logiqa-en',
+    'sat-math',
+    'sat-en',
+    'sat-en-without-passage',
+    'aqua-rat',
+]
+agieval_multiple_choices_sets = [
+    'jec-qa-kd',  # 数据需要额外处理
+    'jec-qa-ca',  # 数据需要额外处理
+]
+agieval_cloze_sets = ['gaokao-mathcloze', 'math']
+
+agieval_datasets = []
+for name in agieval_single_choice_sets:
+    agieval_infer_cfg = dict(
+        ice_template=dict(
+            type=PromptTemplate,
+            template={
+                label: f'{{problem_input}} {label}'
+                for label in ['A', 'B', 'C', 'D']
+            }),
+        retriever=dict(type=ZeroRetriever
+                       ),  # retriver 不起作用，以输入参数为准 (zero-shot / few-shot)
+        inferencer=dict(
+            type=GLMChoiceInferencer, choices=['A', 'B', 'C', 'D']))
+
+    agieval_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
+
+    agieval_datasets.append(
+        dict(
+            type=AGIEvalDataset,
+            path='./data/AGIEval/data/v1/',
+            name=name,
+            abbr='agieval-' + name,
+            setting_name='zero-shot',
+            reader_cfg=agieval_reader_cfg,
+            infer_cfg=agieval_infer_cfg.copy(),
+            eval_cfg=agieval_eval_cfg.copy()))
+
+for name in agieval_multiple_choices_sets:
+    _hint = '答案是： '
+    agieval_infer_cfg = dict(
+        ice_template=dict(
+            type=PromptTemplate,
+            template=dict(round=[
+                dict(role='HUMAN', prompt=f'{{question}}\n{{options}}\n{_hint}')
+            ]),
+            ice_token='</E>'),
+        retriever=dict(type=ZeroRetriever),
+        inferencer=dict(type='GenInferencer'))
+    agieval_eval_cfg = dict(
+        evaluator=dict(type=AccEvaluator),
+        pred_postprocessor=dict(type='first-capital-multi'))
+    agieval_datasets.append(
+        dict(
+            type='AGIEvalDataset_v2',
+            path='./data/AGIEval/data/v1/',
+            name=name,
+            abbr='agieval-' + name,
+            setting_name='zero-shot',
+            reader_cfg=agieval_reader_cfg,
+            infer_cfg=agieval_infer_cfg.copy(),
+            eval_cfg=agieval_eval_cfg.copy()))
+
+for name in agieval_cloze_sets:
+    agieval_infer_cfg = dict(
+        ice_template=dict(
+            type=PromptTemplate,
+            template='</E>{problem_input}',
+            ice_token='</E>'),
+        retriever=dict(type=ZeroRetriever
+                       ),  # retriver 不起作用，以输入参数为准 (zero-shot / few-shot)
+        inferencer=dict(type='GenInferencer'))
+
+    agieval_eval_cfg = dict(evaluator=dict(type='AGIEvalEvaluator'))
+
+    agieval_datasets.append(
+        dict(
+            type=AGIEvalDataset,
+            path='./data/AGIEval/data/v1/',
+            name=name,
+            abbr='agieval-' + name,
+            setting_name='zero-shot',
+            reader_cfg=agieval_reader_cfg,
+            infer_cfg=agieval_infer_cfg.copy(),
+            eval_cfg=agieval_eval_cfg.copy()))
+
+del name, agieval_infer_cfg, agieval_eval_cfg
--- a/configs/datasets/glm/cmnli.py
+++ b/configs/datasets/glm/cmnli.py
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import PPLInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import HFDataset
+
+cmnli_reader_cfg = dict(
+    input_columns=['sentence1', 'sentence2'],
+    output_column='label',
+    test_split='train')
+
+cmnli_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template={
+            'contradiction':
+            '{sentence1}\nKeeping in mind the above text, consider: {sentence2}？\nIs this "always", "sometimes", or "never" correct? [MASK]never',
+            'entailment':
+            '{sentence1}\nKeeping in mind the above text, consider: {sentence2}？\nIs this "always", "sometimes", or "never" correct? [MASK]always',
+            'neutral':
+            '{sentence1}\nKeeping in mind the above text, consider: {sentence2}？\nIs this "always", "sometimes", or "never" correct? [MASK]sometimes'
+        }),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=PPLInferencer))
+
+cmnli_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
+
+cmnli_datasets = [
+    dict(
+        type=HFDataset,
+        abbr='cmnli',
+        path='json',
+        split='train',
+        data_files='./data/CLUE/cmnli/cmnli_public/dev.json',
+        reader_cfg=cmnli_reader_cfg,
+        infer_cfg=cmnli_infer_cfg,
+        eval_cfg=cmnli_eval_cfg)
+]
--- a/configs/datasets/glm/csl.py
+++ b/configs/datasets/glm/csl.py
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GLMChoiceInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import CslDataset
+
+csl_reader_cfg = dict(
+    input_columns=["abst", "keywords"], output_column='label')
+
+csl_infer_cfg = dict(
+    ice_template=dict(
+        type=PromptTemplate,
+        template={
+            0: "</E>摘要：</A>",
+            1: "</E>摘要：</A>关键词：</K>"
+        },
+        column_token_map={
+            "abst": '</A>',
+            'keywords': '</K>'
+        },
+        ice_token='</E>'),
+    prompt_template=dict(
+        type=PromptTemplate,
+        template=
+        '</E>Abstract: </A>\nKeyword: </K>\n Does all keywords come from the given abstract? (Yes or No)',
+        column_token_map={
+            "abst": '</A>',
+            'keywords': '</K>'
+        },
+        ice_token='</E>'),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GLMChoiceInferencer, choices=['No', 'Yes']))
+
+csl_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
+
+csl_datasets = [
+    dict(
+        type=CslDataset,
+        path='json',
+        abbr='csl',
+        data_files='./data/FewCLUE/csl/test_public.json',
+        split='train',
+        reader_cfg=csl_reader_cfg,
+        infer_cfg=csl_infer_cfg,
+        eval_cfg=csl_eval_cfg)
+]
--- a/configs/datasets/glm/humaneval.py
+++ b/configs/datasets/glm/humaneval.py
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.datasets import HFDataset, HumanEvaluator
+
+humaneval_reader_cfg = dict(
+    input_columns=['prompt'], output_column='task_id', train_split='test')
+
+# TODO: allow empty output-column
+humaneval_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template='{prompt}'),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer))
+
+humaneval_eval_cfg = dict(
+    evaluator=dict(type=HumanEvaluator),
+    k=[1, 10, 100],  # the parameter only for humaneval
+    pred_postprocessor=dict(type='humaneval'),
+)
+
+humaneval_datasets = [
+    dict(
+        type=HFDataset,
+        path='openai_humaneval',
+        reader_cfg=humaneval_reader_cfg,
+        infer_cfg=humaneval_infer_cfg,
+        eval_cfg=humaneval_eval_cfg)
+]
--- a/configs/datasets/glm/ocnli.py
+++ b/configs/datasets/glm/ocnli.py
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import PPLInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import HFDataset
+
+ocnli_reader_cfg = dict(
+    input_columns=['sentence1', 'sentence2'], output_column='label')
+
+# TODO: two prompt templates for ocnli
+ocnli_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template={
+            'contradiction':
+            '{sentence1}\nKeeping in mind the above text, consider: {sentence2}？\nIs this "always", "sometimes", or "never" correct? [MASK]never',
+            'entailment':
+            '{sentence1}\nKeeping in mind the above text, consider: {sentence2}？\nIs this "always", "sometimes", or "never" correct? [MASK]always',
+            'neutral':
+            '{sentence1}\nKeeping in mind the above text, consider: {sentence2}？\nIs this "always", "sometimes", or "never" correct? [MASK]sometimes'
+        }),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=PPLInferencer))
+
+ocnli_eval_cfg = dict(evaluator=dict(type=AccEvaluator), )
+
+ocnli_datasets = [
+    dict(
+        type=HFDataset,
+        abbr='ocnli',
+        path='json',
+        split='train',
+        data_files='./data/CLUE/OCNLI/dev.json',
+        reader_cfg=ocnli_reader_cfg,
+        infer_cfg=ocnli_infer_cfg,
+        eval_cfg=ocnli_eval_cfg)
+]
--- a/configs/datasets/gsm8k/gsm8k_gen_6a4ce6.py
+++ b/configs/datasets/gsm8k/gsm8k_gen_6a4ce6.py
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import HFDataset
+
+gsm8k_reader_cfg = dict(input_columns=['question'], output_column='answer')
+
+gsm8k_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template=dict(
+            round=[
+                dict(role='HUMAN', prompt='Q: There are 15 trees in the grove. Grove workers will plant trees in the grove today. After they are done, there will be 21 trees. How many trees did the grove workers plant today?'),
+                dict(role='BOT', prompt='A: We start with 15 trees. Later we have 21 trees. The difference must be the number of trees they planted. So, they must have planted 21 - 15 = 6 trees. The answer is 6.\n'),
+                dict(role='HUMAN', prompt='Q: If there are 3 cars in the parking lot and 2 more cars arrive, how many cars are in the parking lot?'),
+                dict(role='BOT', prompt='A: There are 3 cars in the parking lot already. 2 more arrive. Now there are 3 + 2 = 5 cars. The answer is 5.\n'),
+                dict(role='HUMAN', prompt='Q: Leah had 32 chocolates and her sister had 42. If they ate 35, how many pieces do they have left in total?'),
+                dict(role='BOT', prompt="A: Leah had 32 chocolates and Leah's sister had 42. That means there were originally 32 + 42 = 74 chocolates. 35 have been eaten. So in total they still have 74 - 35 = 39 chocolates. The answer is 39.\n"),
+                dict(role='HUMAN', prompt='Q: Jason had 20 lollipops. He gave Denny some lollipops. Now Jason has 12 lollipops. How many lollipops did Jason give to Denny?'),
+                dict(role='BOT', prompt='A: Jason had 20 lollipops. Since he only has 12 now, he must have given the rest to Denny. The number of lollipops he has given to Denny must have been 20 - 12 = 8 lollipops. The answer is 8.\n'),
+                dict(role='HUMAN', prompt='Q: Shawn has five toys. For Christmas, he got two toys each from his mom and dad. How many toys does he have now?'),
+                dict(role='BOT', prompt='A: He has 5 toys. He got 2 from mom, so after that he has 5 + 2 = 7 toys. Then he got 2 more from dad, so in total he has 7 + 2 = 9 toys. The answer is 9.\n'),
+                dict(role='HUMAN', prompt='Q: There were nine computers in the server room. Five more computers were installed each day, from monday to thursday. How many computers are now in the server room?'),
+                dict(role='BOT', prompt='A: There are 4 days from monday to thursday. 5 computers were added each day. That means in total 4 * 5 = 20 computers were added. There were 9 computers in the beginning, so now there are 9 + 20 = 29 computers. The answer is 29.\n'),
+                dict(role='HUMAN', prompt='Q: Michael had 58 golf balls. On tuesday, he lost 23 golf balls. On wednesday, he lost 2 more. How many golf balls did he have at the end of wednesday?'),
+                dict(role='BOT', prompt='A: Michael initially had 58 balls. He lost 23 on Tuesday, so after that he has 58 - 23 = 35 balls. On Wednesday he lost 2 more so now he has 35 - 2 = 33 balls. The answer is 33.\n'),
+                dict(role='HUMAN', prompt='Q: Olivia has $23. She bought five bagels for $3 each. How much money does she have left?'),
+                dict(role='BOT', prompt='A: She bought 5 bagels for $3 each. This means she spent 5 * $3 = $15 on the bagels. She had $23 in beginning, so now she has $23 - $15 = $8. The answer is 8.\n'),
+                dict(role='HUMAN', prompt='Q: {question}'),
+                dict(role='BOT', prompt='A: {answer}\n'),
+            ],
+        )),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer))
+
+gsm8k_eval_cfg = dict(evaluator=dict(type=AccEvaluator),
+                      pred_role="BOT",
+                      pred_postprocessor=dict(type='gsm8k'),
+                      dataset_postprocessor=dict(type='gsm8k_dataset'))
+
+gsm8k_datasets = [
+    dict(
+        type=HFDataset,
+        path='gsm8k',
+        name='main',
+        reader_cfg=gsm8k_reader_cfg,
+        infer_cfg=gsm8k_infer_cfg,
+        eval_cfg=gsm8k_eval_cfg)
+]
--- a/configs/datasets/gsm8k/gsm8k_gen_76914c.py
+++ b/configs/datasets/gsm8k/gsm8k_gen_76914c.py
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import HFDataset
+
+gsm8k_reader_cfg = dict(input_columns=['question'], output_column='answer')
+
+gsm8k_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template=dict(
+            round=[
+                dict(role='HUMAN', prompt='Q: There are 15 trees in the grove. Grove workers will plant trees in the grove today. After they are done, there will be 21 trees. How many trees did the grove workers plant today?'),
+                dict(role='BOT', prompt='A: We start with 15 trees. Later we have 21 trees. The difference must be the number of trees they planted. So, they must have planted 21 - 15 = 6 trees. The answer is 6.\n'),
+                dict(role='HUMAN', prompt='Q: If there are 3 cars in the parking lot and 2 more cars arrive, how many cars are in the parking lot?'),
+                dict(role='BOT', prompt='A: There are 3 cars in the parking lot already. 2 more arrive. Now there are 3 + 2 = 5 cars. The answer is 5.\n'),
+                dict(role='HUMAN', prompt='Q: Leah had 32 chocolates and her sister had 42. If they ate 35, how many pieces do they have left in total?'),
+                dict(role='BOT', prompt="A: Leah had 32 chocolates and Leah's sister had 42. That means there were originally 32 + 42 = 74 chocolates. 35 have been eaten. So in total they still have 74 - 35 = 39 chocolates. The answer is 39.\n"),
+                dict(role='HUMAN', prompt='Q: Jason had 20 lollipops. He gave Denny some lollipops. Now Jason has 12 lollipops. How many lollipops did Jason give to Denny?'),
+                dict(role='BOT', prompt='A: Jason had 20 lollipops. Since he only has 12 now, he must have given the rest to Denny. The number of lollipops he has given to Denny must have been 20 - 12 = 8 lollipops. The answer is 8.\n'),
+                dict(role='HUMAN', prompt='Q: Shawn has five toys. For Christmas, he got two toys each from his mom and dad. How many toys does he have now?'),
+                dict(role='BOT', prompt='A: He has 5 toys. He got 2 from mom, so after that he has 5 + 2 = 7 toys. Then he got 2 more from dad, so in total he has 7 + 2 = 9 toys. The answer is 9.\n'),
+                dict(role='HUMAN', prompt='Q: There were nine computers in the server room. Five more computers were installed each day, from monday to thursday. How many computers are now in the server room?'),
+                dict(role='BOT', prompt='A: There are 4 days from monday to thursday. 5 computers were added each day. That means in total 4 * 5 = 20 computers were added. There were 9 computers in the beginning, so now there are 9 + 20 = 29 computers. The answer is 29.\n'),
+                dict(role='HUMAN', prompt='Q: Michael had 58 golf balls. On tuesday, he lost 23 golf balls. On wednesday, he lost 2 more. How many golf balls did he have at the end of wednesday?'),
+                dict(role='BOT', prompt='A: Michael initially had 58 balls. He lost 23 on Tuesday, so after that he has 58 - 23 = 35 balls. On Wednesday he lost 2 more so now he has 35 - 2 = 33 balls. The answer is 33.\n'),
+                dict(role='HUMAN', prompt='Q: Olivia has $23. She bought five bagels for $3 each. How much money does she have left?'),
+                dict(role='BOT', prompt='A: She bought 5 bagels for $3 each. This means she spent 5 * $3 = $15 on the bagels. She had $23 in beginning, so now she has $23 - $15 = $8. The answer is 8.\n'),
+                dict(role='HUMAN', prompt='Q: {question}'),
+                dict(role='BOT', prompt='A: {answer}\n'),
+            ],
+        )),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer))
+
+gsm8k_eval_cfg = dict(evaluator=dict(type=AccEvaluator),
+                      pred_role="BOT",
+                      pred_postprocessor=dict(type='gsm8k'),
+                      dataset_postprocessor=dict(type='gsm8k_dataset'))
+
+gsm8k_datasets = [
+    dict(
+        abbr='gsm8k',
+        type=HFDataset,
+        path='gsm8k',
+        name='main',
+        reader_cfg=gsm8k_reader_cfg,
+        infer_cfg=gsm8k_infer_cfg,
+        eval_cfg=gsm8k_eval_cfg)
+]
--- a/configs/datasets/gsm8k/gsm8k_gen_aa9688.py
+++ b/configs/datasets/gsm8k/gsm8k_gen_aa9688.py
--- a/configs/datasets/gsm8k/gsm8k_gen_d19ade.py
+++ b/configs/datasets/gsm8k/gsm8k_gen_d19ade.py
--- a/configs/datasets/hellaswag/hellaswag_ppl_8e07d6.py
+++ b/configs/datasets/hellaswag/hellaswag_ppl_8e07d6.py
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import PPLInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import hellaswagDataset
+
+hellaswag_reader_cfg = dict(
+    input_columns=['ctx', 'A', 'B', 'C', 'D'],
+    output_column='label',
+    train_split='validation',
+    test_split='validation')
+
+hellaswag_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template={
+            i: dict(round=[
+                dict(role="HUMAN", prompt="{ctx}"),
+                dict(role="BOT", prompt=f"{{{chr(ord('A') + i)}}}"),
+            ])
+            for i in range(4)
+        }),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=PPLInferencer))
+
+hellaswag_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
+
+hellaswag_datasets = [
+    dict(
+        type=hellaswagDataset,
+        path='hellaswag',
+        reader_cfg=hellaswag_reader_cfg,
+        infer_cfg=hellaswag_infer_cfg,
+        eval_cfg=hellaswag_eval_cfg)
+]
--- a/configs/datasets/hellaswag/hellaswag_ppl_e57e0f.py
+++ b/configs/datasets/hellaswag/hellaswag_ppl_e57e0f.py
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import PPLInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import hellaswagDataset
+
+hellaswag_reader_cfg = dict(
+    input_columns=['ctx', 'A', 'B', 'C', 'D'],
+    output_column='label',
+    train_split='validation',
+    test_split='validation')
+
+hellaswag_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template={
+            0: "{ctx} {A}",
+            1: "{ctx} {B}",
+            2: "{ctx} {C}",
+            3: "{ctx} {D}",
+        }),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=PPLInferencer))
+
+hellaswag_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
+
+hellaswag_datasets = [
+    dict(
+        type=hellaswagDataset,
+        path='hellaswag',
+        reader_cfg=hellaswag_reader_cfg,
+        infer_cfg=hellaswag_infer_cfg,
+        eval_cfg=hellaswag_eval_cfg)
+]