Initial commit

be3dfa50 · jerrrrry · be3dfa50 · be3dfa50 · be3dfa50 · be3dfa50
Commit be3dfa50 authored Aug 06, 2025 by jerrrrry
20 changed files
--- a/opencompass/configs/datasets/drop/drop_gen_a2697c.py
+++ b/opencompass/configs/datasets/drop/drop_gen_a2697c.py
+# USED IN BASE MODEL
+from mmengine.config import read_base
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.datasets import DropOpenAIDataset, DropOpenAIEvaluator
+
+with read_base():
+    from .drop_examples import drop_examples  # noqa: F401, F403
+
+drop_reader_cfg = dict(
+    input_columns=['prompt'],
+    output_column='answers',
+    train_split='validation',
+    test_split='validation',
+)
+
+template = f'''\
+You will be asked to read a passage and answer a question. Think step by step, then write a line of the form "Answer: $ANSWER" at the end of your response. Some examples of passages and Q&A are provided below.
+
+{drop_examples}
+
+# Your Task
+
+---
+{{prompt}}'''
+
+drop_infer_cfg = dict(
+    prompt_template=dict(type=PromptTemplate, template=dict(round=[dict(role='HUMAN', prompt=template)])),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer, stopping_criteria=['---', 'Passage', 'Question', 'You will be asked']),)
+
+drop_eval_cfg = dict(evaluator=dict(type=DropOpenAIEvaluator))
+
+drop_datasets = [
+    dict(
+        abbr='drop',
+        type=DropOpenAIDataset,
+        path='data/drop_simple_eval/dev.jsonl',
+        reader_cfg=drop_reader_cfg,
+        infer_cfg=drop_infer_cfg,
+        eval_cfg=drop_eval_cfg)
+]
--- a/opencompass/configs/datasets/drop/drop_gen_eb14af.py
+++ b/opencompass/configs/datasets/drop/drop_gen_eb14af.py
+from mmengine.config import read_base
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.datasets import DropOpenAIDataset, DropOpenAIEvaluator
+
+with read_base():
+    from .drop_examples import drop_examples  # noqa: F401, F403
+
+drop_reader_cfg = dict(
+    input_columns=['prompt'],
+    output_column='answers',
+    train_split='validation',
+    test_split='validation',
+)
+
+template = f'You will be asked to read a passage and answer a question. Think step by step, then write a line of the form "Answer: $ANSWER" at the end of your response. Some examples of passages and Q&A are provided below.\n\n{drop_examples}\n\n# Your Task\n\n---\n{{prompt}}'
+
+drop_infer_cfg = dict(
+    prompt_template=dict(type=PromptTemplate, template=dict(round=[dict(role='HUMAN', prompt=template)])),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer))
+
+drop_eval_cfg = dict(evaluator=dict(type=DropOpenAIEvaluator))
+
+drop_datasets = [
+    dict(
+        abbr='drop',
+        type=DropOpenAIDataset,
+        path='data/drop_simple_eval/dev.jsonl',
+        reader_cfg=drop_reader_cfg,
+        infer_cfg=drop_infer_cfg,
+        eval_cfg=drop_eval_cfg)
+]
--- a/opencompass/configs/datasets/drop/drop_openai_simple_evals_gen_3857b0.py
+++ b/opencompass/configs/datasets/drop/drop_openai_simple_evals_gen_3857b0.py
+from mmengine.config import read_base
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.datasets import DropOpenAIDataset, DropOpenAIEvaluator
+
+with read_base():
+    from .drop_examples import drop_examples  # noqa: F401, F403
+
+drop_reader_cfg = dict(
+    input_columns=['prompt'],
+    output_column='answers',
+    train_split='validation',
+    test_split='validation',
+)
+
+template = f'You will be asked to read a passage and answer a question. Some examples of passages and Q&A are provided below.\n\n{drop_examples}\n\n# Your Task\n\n---\n{{prompt}}\n\nThink step by step, then write a line of the form "Answer: $ANSWER" at the end of your response.'
+
+drop_infer_cfg = dict(
+    prompt_template=dict(type=PromptTemplate, template=dict(round=[dict(role='HUMAN', prompt=template)])),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer))
+
+drop_eval_cfg = dict(evaluator=dict(type=DropOpenAIEvaluator))
+
+drop_datasets = [
+    dict(
+        abbr='drop',
+        type=DropOpenAIDataset,
+        path='data/drop_simple_eval/dev.jsonl',
+        reader_cfg=drop_reader_cfg,
+        infer_cfg=drop_infer_cfg,
+        eval_cfg=drop_eval_cfg)
+]
--- a/opencompass/configs/datasets/ds1000/ds1000_compl_gen_cbc84f.py
+++ b/opencompass/configs/datasets/ds1000/ds1000_compl_gen_cbc84f.py
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.datasets import (DS1000Dataset, ds1000_completion_postprocess,
+                                  ds1000_matplotlib_postprocess,
+                                  DS1000Evaluator)
+
+ds1000_reader_cfg = dict(
+    input_columns=['prompt'],
+    output_column='test_column',
+    train_split='test',
+    test_split='test')
+
+ds1000_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template=dict(round=[
+            dict(
+                role='HUMAN',
+                prompt='{prompt}',
+            ),
+        ]),
+    ),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer),
+)
+
+ds1000_eval_cfg = dict(
+    evaluator=dict(type=DS1000Evaluator),
+    pred_role='BOT',
+    pred_postprocessor=dict(type=ds1000_completion_postprocess),
+)
+
+# The DS-1000 dataset can be downloaded from
+# https://github.com/HKUNLP/DS-1000/blob/main/ds1000_data.zip
+ds1000_datasets = [
+    dict(
+        abbr=f'ds1000_{lib}',
+        type=DS1000Dataset,
+        path='./data/ds1000_data/',
+        libs=f'{lib}',
+        mode='Completion',
+        reader_cfg=ds1000_reader_cfg,
+        infer_cfg=ds1000_infer_cfg,
+        eval_cfg=ds1000_eval_cfg,
+    ) for lib in [
+        'Pandas',
+        'Numpy',
+        'Tensorflow',
+        'Scipy',
+        'Sklearn',
+        'Pytorch',
+    ]
+]
+ds1000_datasets.append(
+    dict(
+        abbr='ds1000_Matplotlib',
+        type=DS1000Dataset,
+        path='./data/ds1000_data/',
+        libs='Matplotlib',
+        mode='Completion',
+        reader_cfg=ds1000_reader_cfg,
+        infer_cfg=ds1000_infer_cfg,
+        eval_cfg=dict(
+            evaluator=dict(type=DS1000Evaluator),
+            pred_role='BOT',
+            pred_postprocessor=dict(type=ds1000_matplotlib_postprocess),
+        ),
+    ))
--- a/opencompass/configs/datasets/ds1000/ds1000_compl_service_eval_gen_cbc84f.py
+++ b/opencompass/configs/datasets/ds1000/ds1000_compl_service_eval_gen_cbc84f.py
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.datasets import DS1000Dataset, DS1000ServiceEvaluator
+
+ds1000_reader_cfg = dict(
+    input_columns=['prompt'],
+    output_column='test_column',
+    train_split='test',
+    test_split='test')
+
+ds1000_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template=dict(round=[
+            dict(
+                role='HUMAN',
+                prompt='{prompt}',
+            ),
+        ]),
+    ),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer),
+)
+
+ds1000_eval_cfg_dict = {
+    lib: dict(
+        evaluator=dict(
+            type=DS1000ServiceEvaluator,
+            lib=lib,
+            ip_address=
+            'localhost',  # replace to your code_eval_server ip_address, port
+            port=5000
+            ),
+        pred_role='BOT')
+    for lib in [
+        'Pandas',
+        'Numpy',
+        'Tensorflow',
+        'Scipy',
+        'Sklearn',
+        'Pytorch',
+        'Matplotlib',
+    ]
+}
+
+# The DS-1000 dataset can be downloaded from
+# https://github.com/HKUNLP/DS-1000/blob/main/ds1000_data.zip
+ds1000_datasets = [
+    dict(
+        abbr=f'ds1000_{lib}',
+        type=DS1000Dataset,
+        path='./data/ds1000_data/',
+        libs=f'{lib}',
+        mode='Completion',
+        reader_cfg=ds1000_reader_cfg,
+        infer_cfg=ds1000_infer_cfg,
+        eval_cfg=ds1000_eval_cfg_dict[lib],
+    ) for lib in [
+        'Pandas',
+        'Numpy',
+        'Tensorflow',
+        'Scipy',
+        'Sklearn',
+        'Pytorch',
+        'Matplotlib',
+    ]
+]
--- a/opencompass/configs/datasets/ds1000/ds1000_gen_5c4bec.py
+++ b/opencompass/configs/datasets/ds1000/ds1000_gen_5c4bec.py
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import AgentInferencer
+from opencompass.datasets import DS1000Dataset_Interperter, DS1000InterpreterEvaluator
+
+ds1000_example = """
+In the following task, you should generate code with one assertion to testify the correctness of your code.
+
+Example:
+
+<HUMAN>Problem:
+How do I get the dimensions of an array? For instance, this is (2, 2):
+a = np.array([[1,2],[3,4]])
+<ASSISTANT>{thought} In Python, Numpy provides a method called `shape` which helps to get the dimensions of an array.
+{action} PythonInterpreter
+{action_input}
+```python
+import numpy as np
+def solution(x):
+    # Convert to np.ndarray
+    x = np.array(x)
+    # Getting the dimensions of the array
+    dimensions = x.shape
+    return dimensions
+assert solution([[1,2],[3,4]]) == (2, 2)
+```
+<SYSTEM>{response}True
+<ASSISTANT> {thought} By running this code, you can get the dimensions of an array.
+{finish}
+```python
+import numpy as np
+def solution(x):
+    # Convert to np.ndarray
+    x = np.array(x)
+    # Getting the dimensions of the array
+    dimensions = x.shape
+    return dimensions
+```
+"""
+
+ds1000_reader_cfg = dict(
+    input_columns=['prompt'],
+    output_column='test_column',
+    train_split='test',
+    test_split='test',
+)
+
+ds1000_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template="""{prompt}""",
+    ),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=AgentInferencer, example=ds1000_example),
+)
+
+ds1000_eval_cfg = dict(
+    evaluator=dict(type=DS1000InterpreterEvaluator),
+    pred_role='BOT',
+)
+
+# The DS-1000 dataset can be downloaded from
+# https://github.com/HKUNLP/DS-1000/blob/main/ds1000_data.zip
+
+# Matplotlib cannot fit this setting
+ds1000_datasets = [
+    dict(
+        abbr=f'ds1000_{lib}',
+        type=DS1000Dataset_Interperter,  # bustm share the same format with AFQMC
+        path='./data/ds1000_data/',
+        libs=f'{lib}',
+        reader_cfg=ds1000_reader_cfg,
+        infer_cfg=ds1000_infer_cfg,
+        eval_cfg=ds1000_eval_cfg,
+    )
+    for lib in [
+        'Pandas',
+        'Numpy',
+        # 'Tensorflow',  # error using tensorflow, skipped temporarily
+        'Scipy',
+        'Sklearn',
+        'Pytorch',
+    ]
+]
--- a/opencompass/configs/datasets/ds1000/ds1000_gen_cbc84f.py
+++ b/opencompass/configs/datasets/ds1000/ds1000_gen_cbc84f.py
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.datasets import (DS1000Dataset, ds1000_postprocess,
+                                  ds1000_matplotlib_postprocess,
+                                  DS1000Evaluator)
+
+ds1000_reader_cfg = dict(
+    input_columns=['prompt'],
+    output_column='test_column',
+    train_split='test',
+    test_split='test')
+
+ds1000_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template=dict(round=[
+            dict(
+                role='HUMAN',
+                prompt='{prompt}',
+            ),
+        ]),
+    ),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer),
+)
+
+ds1000_eval_cfg = dict(
+    evaluator=dict(type=DS1000Evaluator),
+    pred_role='BOT',
+    pred_postprocessor=dict(type=ds1000_postprocess),
+)
+
+# The DS-1000 dataset can be downloaded from
+# https://github.com/HKUNLP/DS-1000/blob/main/ds1000_data.zip
+ds1000_datasets = [
+    dict(
+        abbr=f'ds1000_{lib}',
+        type=DS1000Dataset,
+        path='./data/ds1000_data/',
+        libs=f'{lib}',
+        reader_cfg=ds1000_reader_cfg,
+        infer_cfg=ds1000_infer_cfg,
+        eval_cfg=ds1000_eval_cfg,
+    ) for lib in [
+        'Pandas',
+        'Numpy',
+        'Tensorflow',
+        'Scipy',
+        'Sklearn',
+        'Pytorch',
+    ]
+]
+ds1000_datasets.append(
+    dict(
+        abbr='ds1000_Matplotlib',
+        type=DS1000Dataset,
+        path='./data/ds1000_data/',
+        libs='Matplotlib',
+        reader_cfg=ds1000_reader_cfg,
+        infer_cfg=ds1000_infer_cfg,
+        eval_cfg=dict(
+            evaluator=dict(type=DS1000Evaluator),
+            pred_role='BOT',
+            pred_postprocessor=dict(type=ds1000_matplotlib_postprocess),
+        ),
+    ))
--- a/opencompass/configs/datasets/ds1000/ds1000_service_eval_gen_cbc84f.py
+++ b/opencompass/configs/datasets/ds1000/ds1000_service_eval_gen_cbc84f.py
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.datasets import DS1000Dataset, DS1000ServiceEvaluator
+
+ds1000_reader_cfg = dict(
+    input_columns=['prompt'],
+    output_column='test_column',
+    train_split='test',
+    test_split='test')
+
+ds1000_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template=dict(round=[
+            dict(
+                role='HUMAN',
+                prompt='{prompt}',
+            ),
+        ]),
+    ),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer),
+)
+
+ds1000_eval_cfg_dict = {
+    lib: dict(
+        evaluator=dict(
+            type=DS1000ServiceEvaluator,
+            lib=lib,
+            ip_address=
+            'localhost',  # replace to your code_eval_server ip_address, port
+            port=5000
+            ),
+        pred_role='BOT')
+    for lib in [
+        'Pandas',
+        'Numpy',
+        'Tensorflow',
+        'Scipy',
+        'Sklearn',
+        'Pytorch',
+        'Matplotlib',
+    ]
+}
+
+# The DS-1000 dataset can be downloaded from
+# https://github.com/HKUNLP/DS-1000/blob/main/ds1000_data.zip
+ds1000_datasets = [
+    dict(
+        abbr=f'ds1000_{lib}',
+        type=DS1000Dataset,
+        path='./data/ds1000_data/',
+        libs=f'{lib}',
+        reader_cfg=ds1000_reader_cfg,
+        infer_cfg=ds1000_infer_cfg,
+        eval_cfg=ds1000_eval_cfg_dict[lib],
+    ) for lib in [
+        'Pandas',
+        'Numpy',
+        'Tensorflow',
+        'Scipy',
+        'Sklearn',
+        'Pytorch',
+        'Matplotlib',
+    ]
+]
--- a/opencompass/configs/datasets/flores/flores_gen.py
+++ b/opencompass/configs/datasets/flores/flores_gen.py
+from mmengine.config import read_base
+
+with read_base():
+    from .flores_gen_806ede import flores_datasets  # noqa: F401, F403
--- a/opencompass/configs/datasets/flores/flores_gen_806ede.py
+++ b/opencompass/configs/datasets/flores/flores_gen_806ede.py
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import TopkRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import BleuEvaluator
+from opencompass.datasets import FloresFirst100Dataset
+
+_flores_lang_map = [
+    ['eng', 'eng_Latn', 'English', 'Indo-European-Germanic'],
+    ['afr', 'afr_Latn', 'Afrikaans', 'Indo-European-Germanic'],
+    ['dan', 'dan_Latn', 'Danish', 'Indo-European-Germanic'],
+    ['deu', 'deu_Latn', 'German', 'Indo-European-Germanic'],
+    ['isl', 'isl_Latn', 'Icelandic', 'Indo-European-Germanic'],
+    ['ltz', 'ltz_Latn', 'Luxembourgish', 'Indo-European-Germanic'],
+    ['nld', 'nld_Latn', 'Dutch', 'Indo-European-Germanic'],
+    ['nob', 'nob_Latn', 'Norwegian', 'Indo-European-Germanic'],
+    ['swe', 'swe_Latn', 'Swedish', 'Indo-European-Germanic'],
+    ['ast', 'ast_Latn', 'Asturian', 'Indo-European-Romance'],
+    ['cat', 'cat_Latn', 'Catalan', 'Indo-European-Romance'],
+    ['fra', 'fra_Latn', 'French', 'Indo-European-Romance'],
+    ['glg', 'glg_Latn', 'Galician', 'Indo-European-Romance'],
+    ['oci', 'oci_Latn', 'Occitan', 'Indo-European-Romance'],
+    ['por', 'por_Latn', 'Portuguese', 'Indo-European-Romance'],
+    ['ron', 'ron_Latn', 'Romanian', 'Indo-European-Romance'],
+    ['spa', 'spa_Latn', 'Spanish', 'Indo-European-Romance'],
+    ['bel', 'bel_Cyrl', 'Belarusian', 'Indo-European-Slavic'],
+    ['bos', 'bos_Latn', 'Bosnian', 'Indo-European-Slavic'],
+    ['bul', 'bul_Cyrl', 'Bulgarian', 'Indo-European-Slavic'],
+    ['ces', 'ces_Latn', 'Czech', 'Indo-European-Slavic'],
+    ['hrv', 'hrv_Latn', 'Croatian', 'Indo-European-Slavic'],
+    ['mkd', 'mkd_Cyrl', 'Macedonian', 'Indo-European-Slavic'],
+    ['pol', 'pol_Latn', 'Polish', 'Indo-European-Slavic'],
+    ['rus', 'rus_Cyrl', 'Russian', 'Indo-European-Slavic'],
+    ['slk', 'slk_Latn', 'Slovak', 'Indo-European-Slavic'],
+    ['slv', 'slv_Latn', 'Slovenian', 'Indo-European-Slavic'],
+    ['srp', 'srp_Cyrl', 'Serbian', 'Indo-European-Slavic'],
+    ['ukr', 'ukr_Cyrl', 'Ukrainian', 'Indo-European-Slavic'],
+    ['asm', 'asm_Beng', 'Assamese', 'Indo-European-Indo-Aryan'],
+    ['ben', 'ben_Beng', 'Bengali', 'Indo-European-Indo-Aryan'],
+    ['guj', 'guj_Gujr', 'Gujarati', 'Indo-European-Indo-Aryan'],
+    ['hin', 'hin_Deva', 'Hindi', 'Indo-European-Indo-Aryan'],
+    ['mar', 'mar_Deva', 'Marathi', 'Indo-European-Indo-Aryan'],
+    ['npi', 'npi_Deva', 'Nepali', 'Indo-European-Indo-Aryan'],
+    ['ory', 'ory_Orya', 'Oriya', 'Indo-European-Indo-Aryan'],
+    ['pan', 'pan_Guru', 'Punjabi', 'Indo-European-Indo-Aryan'],
+    ['snd', 'snd_Arab', 'Sindhi', 'Indo-European-Indo-Aryan'],
+    ['urd', 'urd_Arab', 'Urdu', 'Indo-European-Indo-Aryan'],
+    ['ckb', 'ckb_Arab', 'Kurdish', 'Indo-European-Other'],
+    ['cym', 'cym_Latn', 'Welsh', 'Indo-European-Other'],
+    ['ell', 'ell_Grek', 'Greek', 'Indo-European-Other'],
+    ['fas', 'pes_Arab', 'Persian', 'Indo-European-Other'],
+    ['gle', 'gle_Latn', 'Irish', 'Indo-European-Other'],
+    ['hye', 'hye_Armn', 'Armenian', 'Indo-European-Other'],
+    ['ita', 'ita_Latn', 'Italian', 'Indo-European-Other'],
+    ['lav', 'lvs_Latn', 'Latvian', 'Indo-European-Other'],
+    ['lit', 'lit_Latn', 'Lithuanian', 'Indo-European-Other'],
+    ['pus', 'pbt_Arab', 'Pashto', 'Indo-European-Other'],
+    ['tgk', 'tgk_Cyrl', 'Tajik', 'Indo-European-Other'],
+    ['ceb', 'ceb_Latn', 'Cebuano', 'Austronesian'],
+    ['ind', 'ind_Latn', 'Indonesian', 'Austronesian'],
+    ['jav', 'jav_Latn', 'Javanese', 'Austronesian'],
+    ['mri', 'mri_Latn', 'Maori', 'Austronesian'],
+    ['msa', 'zsm_Latn', 'Malay', 'Austronesian'],
+    ['tgl', 'tgl_Latn', 'Tagalog', 'Austronesian'],
+    ['ibo', 'ibo_Latn', 'Igbo', 'Atlantic-Congo'],
+    ['kam', 'kam_Latn', 'Kamba', 'Atlantic-Congo'],
+    ['kea', 'kea_Latn', 'Kabuverdianu', 'Atlantic-Congo'],
+    ['lin', 'lin_Latn', 'Lingala', 'Atlantic-Congo'],
+    ['lug', 'lug_Latn', 'Luganda', 'Atlantic-Congo'],
+    ['nso', 'nso_Latn', 'Northern Sotho', 'Atlantic-Congo'],
+    ['nya', 'nya_Latn', 'Nyanja', 'Atlantic-Congo'],
+    ['sna', 'sna_Latn', 'Shona', 'Atlantic-Congo'],
+    ['swh', 'swh_Latn', 'Swahili', 'Atlantic-Congo'],
+    ['umb', 'umb_Latn', 'Umbundu', 'Atlantic-Congo'],
+    ['wol', 'wol_Latn', 'Wolof', 'Atlantic-Congo'],
+    ['xho', 'xho_Latn', 'Xhosa', 'Atlantic-Congo'],
+    ['yor', 'yor_Latn', 'Yoruba', 'Atlantic-Congo'],
+    ['zul', 'zul_Latn', 'Zulu', 'Atlantic-Congo'],
+    ['amh', 'amh_Ethi', 'Amharic', 'Afro-Asiatic'],
+    ['ara', 'arb_Arab', 'Arabic', 'Afro-Asiatic'],
+    ['ful', 'fuv_Latn', 'Fulah', 'Afro-Asiatic'],
+    ['mlt', 'mlt_Latn', 'Maltese', 'Afro-Asiatic'],
+    ['orm', 'gaz_Latn', 'Oromo', 'Afro-Asiatic'],
+    ['som', 'som_Latn', 'Somali', 'Afro-Asiatic'],
+    ['azj', 'azj_Latn', 'Azerbaijani', 'Turkic'],
+    ['kaz', 'kaz_Cyrl', 'Kazakh', 'Turkic'],
+    ['kir', 'kir_Cyrl', 'Kyrgyz', 'Turkic'],
+    ['tur', 'tur_Latn', 'Turkish', 'Turkic'],
+    ['uzb', 'uzn_Latn', 'Uzbek', 'Turkic'],
+    ['kan', 'kan_Knda', 'Kannada', 'Dravidian'],
+    ['mal', 'mal_Mlym', 'Malayalam', 'Dravidian'],
+    ['tam', 'tam_Taml', 'Tamil', 'Dravidian'],
+    ['tel', 'tel_Telu', 'Telugu', 'Dravidian'],
+    ['mya', 'mya_Mymr', 'Burmese', 'Sino-Tibetan'],
+    ['zho_simpl', 'zho_Hans', 'Chinese (Simpl)', 'Sino-Tibetan'],
+    ['zho_trad', 'zho_Hant', 'Chinese (Trad)', 'Sino-Tibetan'],
+    ['est', 'est_Latn', 'Estonian', 'Other'],
+    ['fin', 'fin_Latn', 'Finnish', 'Other'],
+    ['hau', 'hau_Latn', 'Hausa', 'Other'],
+    ['heb', 'heb_Hebr', 'Hebrew', 'Other'],
+    ['hun', 'hun_Latn', 'Hungarian', 'Other'],
+    ['jpn', 'jpn_Jpan', 'Japanese', 'Other'],
+    ['kat', 'kat_Geor', 'Georgian', 'Other'],
+    ['khm', 'khm_Khmr', 'Khmer', 'Other'],
+    ['kor', 'kor_Hang', 'Korean', 'Other'],
+    ['lao', 'lao_Laoo', 'Lao', 'Other'],
+    ['luo', 'luo_Latn', 'Luo', 'Other'],
+    ['mon', 'khk_Cyrl', 'Mongolian', 'Other'],
+    ['tha', 'tha_Thai', 'Thai', 'Other'],
+    ['vie', 'vie_Latn', 'Vietnamese', 'Other'],
+]
+flores_lang_map = {i[0]: i for i in _flores_lang_map}
+_flores_subtasks = [f'eng-{i}' for i in flores_lang_map if i != 'eng'
+                    ] + [f'{i}-eng' for i in flores_lang_map if i != 'eng']
+
+flores_datasets = []
+for _flores_subtask in _flores_subtasks:
+    _src, _tgt = _flores_subtask.split('-')
+    _, _flores_source, _src_inst, _ = flores_lang_map[_src]
+    _, _flores_target, _tgt_inst, _ = flores_lang_map[_tgt]
+
+    flores_reader_cfg = dict(
+        input_columns=f'sentence_{_flores_source}',
+        output_column=f'sentence_{_flores_target}',
+        train_split='dev',
+        test_split='devtest'
+    )
+    flores_infer_cfg = dict(
+        ice_template=dict(
+            type=PromptTemplate,
+            template=dict(
+                begin='</E>',
+                round=[
+                    dict(
+                        role='HUMAN',
+                        prompt=
+                        f'Translate the following {_src_inst} statements to {_tgt_inst}.\n{{sentence_{_flores_source}}}'
+                    ),
+                    dict(role='BOT', prompt=f'{{sentence_{_flores_target}}}'),
+                ],
+            ),
+            ice_token='</E>',
+        ),
+        retriever=dict(type=TopkRetriever, ice_num=8),
+        inferencer=dict(type=GenInferencer),
+    )
+    flores_eval_cfg = dict(
+        evaluator=dict(type=BleuEvaluator),
+        pred_role='BOT',
+    )
+    if _tgt == 'zho_simpl':
+        flores_eval_cfg['pred_postprocessor'] = dict(type='flores')
+        flores_eval_cfg['dataset_postprocessor'] = dict(type='flores')
+    flores_datasets.append(
+        dict(
+            abbr=f'flores_100_{_src}-{_tgt}',
+            type=FloresFirst100Dataset,
+            path='opencompass/flores',
+            name=f'{_flores_source}-{_flores_target}',
+            reader_cfg=flores_reader_cfg.copy(),
+            infer_cfg=flores_infer_cfg.copy(),
+            eval_cfg=flores_eval_cfg.copy(),
+        ))
--- a/opencompass/configs/datasets/flores/flores_gen_aad4fd.py
+++ b/opencompass/configs/datasets/flores/flores_gen_aad4fd.py
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import TopkRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import BleuEvaluator
+from opencompass.datasets import FloresFirst100Dataset
+
+_flores_lang_map = [
+    ['eng', 'eng_Latn', 'English', 'Indo-European-Germanic'],
+    ['afr', 'afr_Latn', 'Afrikaans', 'Indo-European-Germanic'],
+    ['dan', 'dan_Latn', 'Danish', 'Indo-European-Germanic'],
+    ['deu', 'deu_Latn', 'German', 'Indo-European-Germanic'],
+    ['isl', 'isl_Latn', 'Icelandic', 'Indo-European-Germanic'],
+    ['ltz', 'ltz_Latn', 'Luxembourgish', 'Indo-European-Germanic'],
+    ['nld', 'nld_Latn', 'Dutch', 'Indo-European-Germanic'],
+    ['nob', 'nob_Latn', 'Norwegian', 'Indo-European-Germanic'],
+    ['swe', 'swe_Latn', 'Swedish', 'Indo-European-Germanic'],
+    ['ast', 'ast_Latn', 'Asturian', 'Indo-European-Romance'],
+    ['cat', 'cat_Latn', 'Catalan', 'Indo-European-Romance'],
+    ['fra', 'fra_Latn', 'French', 'Indo-European-Romance'],
+    ['glg', 'glg_Latn', 'Galician', 'Indo-European-Romance'],
+    ['oci', 'oci_Latn', 'Occitan', 'Indo-European-Romance'],
+    ['por', 'por_Latn', 'Portuguese', 'Indo-European-Romance'],
+    ['ron', 'ron_Latn', 'Romanian', 'Indo-European-Romance'],
+    ['spa', 'spa_Latn', 'Spanish', 'Indo-European-Romance'],
+    ['bel', 'bel_Cyrl', 'Belarusian', 'Indo-European-Slavic'],
+    ['bos', 'bos_Latn', 'Bosnian', 'Indo-European-Slavic'],
+    ['bul', 'bul_Cyrl', 'Bulgarian', 'Indo-European-Slavic'],
+    ['ces', 'ces_Latn', 'Czech', 'Indo-European-Slavic'],
+    ['hrv', 'hrv_Latn', 'Croatian', 'Indo-European-Slavic'],
+    ['mkd', 'mkd_Cyrl', 'Macedonian', 'Indo-European-Slavic'],
+    ['pol', 'pol_Latn', 'Polish', 'Indo-European-Slavic'],
+    ['rus', 'rus_Cyrl', 'Russian', 'Indo-European-Slavic'],
+    ['slk', 'slk_Latn', 'Slovak', 'Indo-European-Slavic'],
+    ['slv', 'slv_Latn', 'Slovenian', 'Indo-European-Slavic'],
+    ['srp', 'srp_Cyrl', 'Serbian', 'Indo-European-Slavic'],
+    ['ukr', 'ukr_Cyrl', 'Ukrainian', 'Indo-European-Slavic'],
+    ['asm', 'asm_Beng', 'Assamese', 'Indo-European-Indo-Aryan'],
+    ['ben', 'ben_Beng', 'Bengali', 'Indo-European-Indo-Aryan'],
+    ['guj', 'guj_Gujr', 'Gujarati', 'Indo-European-Indo-Aryan'],
+    ['hin', 'hin_Deva', 'Hindi', 'Indo-European-Indo-Aryan'],
+    ['mar', 'mar_Deva', 'Marathi', 'Indo-European-Indo-Aryan'],
+    ['npi', 'npi_Deva', 'Nepali', 'Indo-European-Indo-Aryan'],
+    ['ory', 'ory_Orya', 'Oriya', 'Indo-European-Indo-Aryan'],
+    ['pan', 'pan_Guru', 'Punjabi', 'Indo-European-Indo-Aryan'],
+    ['snd', 'snd_Arab', 'Sindhi', 'Indo-European-Indo-Aryan'],
+    ['urd', 'urd_Arab', 'Urdu', 'Indo-European-Indo-Aryan'],
+    ['ckb', 'ckb_Arab', 'Kurdish', 'Indo-European-Other'],
+    ['cym', 'cym_Latn', 'Welsh', 'Indo-European-Other'],
+    ['ell', 'ell_Grek', 'Greek', 'Indo-European-Other'],
+    ['fas', 'pes_Arab', 'Persian', 'Indo-European-Other'],
+    ['gle', 'gle_Latn', 'Irish', 'Indo-European-Other'],
+    ['hye', 'hye_Armn', 'Armenian', 'Indo-European-Other'],
+    ['ita', 'ita_Latn', 'Italian', 'Indo-European-Other'],
+    ['lav', 'lvs_Latn', 'Latvian', 'Indo-European-Other'],
+    ['lit', 'lit_Latn', 'Lithuanian', 'Indo-European-Other'],
+    ['pus', 'pbt_Arab', 'Pashto', 'Indo-European-Other'],
+    ['tgk', 'tgk_Cyrl', 'Tajik', 'Indo-European-Other'],
+    ['ceb', 'ceb_Latn', 'Cebuano', 'Austronesian'],
+    ['ind', 'ind_Latn', 'Indonesian', 'Austronesian'],
+    ['jav', 'jav_Latn', 'Javanese', 'Austronesian'],
+    ['mri', 'mri_Latn', 'Maori', 'Austronesian'],
+    ['msa', 'zsm_Latn', 'Malay', 'Austronesian'],
+    ['tgl', 'tgl_Latn', 'Tagalog', 'Austronesian'],
+    ['ibo', 'ibo_Latn', 'Igbo', 'Atlantic-Congo'],
+    ['kam', 'kam_Latn', 'Kamba', 'Atlantic-Congo'],
+    ['kea', 'kea_Latn', 'Kabuverdianu', 'Atlantic-Congo'],
+    ['lin', 'lin_Latn', 'Lingala', 'Atlantic-Congo'],
+    ['lug', 'lug_Latn', 'Luganda', 'Atlantic-Congo'],
+    ['nso', 'nso_Latn', 'Northern Sotho', 'Atlantic-Congo'],
+    ['nya', 'nya_Latn', 'Nyanja', 'Atlantic-Congo'],
+    ['sna', 'sna_Latn', 'Shona', 'Atlantic-Congo'],
+    ['swh', 'swh_Latn', 'Swahili', 'Atlantic-Congo'],
+    ['umb', 'umb_Latn', 'Umbundu', 'Atlantic-Congo'],
+    ['wol', 'wol_Latn', 'Wolof', 'Atlantic-Congo'],
+    ['xho', 'xho_Latn', 'Xhosa', 'Atlantic-Congo'],
+    ['yor', 'yor_Latn', 'Yoruba', 'Atlantic-Congo'],
+    ['zul', 'zul_Latn', 'Zulu', 'Atlantic-Congo'],
+    ['amh', 'amh_Ethi', 'Amharic', 'Afro-Asiatic'],
+    ['ara', 'arb_Arab', 'Arabic', 'Afro-Asiatic'],
+    ['ful', 'fuv_Latn', 'Fulah', 'Afro-Asiatic'],
+    ['mlt', 'mlt_Latn', 'Maltese', 'Afro-Asiatic'],
+    ['orm', 'gaz_Latn', 'Oromo', 'Afro-Asiatic'],
+    ['som', 'som_Latn', 'Somali', 'Afro-Asiatic'],
+    ['azj', 'azj_Latn', 'Azerbaijani', 'Turkic'],
+    ['kaz', 'kaz_Cyrl', 'Kazakh', 'Turkic'],
+    ['kir', 'kir_Cyrl', 'Kyrgyz', 'Turkic'],
+    ['tur', 'tur_Latn', 'Turkish', 'Turkic'],
+    ['uzb', 'uzn_Latn', 'Uzbek', 'Turkic'],
+    ['kan', 'kan_Knda', 'Kannada', 'Dravidian'],
+    ['mal', 'mal_Mlym', 'Malayalam', 'Dravidian'],
+    ['tam', 'tam_Taml', 'Tamil', 'Dravidian'],
+    ['tel', 'tel_Telu', 'Telugu', 'Dravidian'],
+    ['mya', 'mya_Mymr', 'Burmese', 'Sino-Tibetan'],
+    ['zho_simpl', 'zho_Hans', 'Chinese (Simpl)', 'Sino-Tibetan'],
+    ['zho_trad', 'zho_Hant', 'Chinese (Trad)', 'Sino-Tibetan'],
+    ['est', 'est_Latn', 'Estonian', 'Other'],
+    ['fin', 'fin_Latn', 'Finnish', 'Other'],
+    ['hau', 'hau_Latn', 'Hausa', 'Other'],
+    ['heb', 'heb_Hebr', 'Hebrew', 'Other'],
+    ['hun', 'hun_Latn', 'Hungarian', 'Other'],
+    ['jpn', 'jpn_Jpan', 'Japanese', 'Other'],
+    ['kat', 'kat_Geor', 'Georgian', 'Other'],
+    ['khm', 'khm_Khmr', 'Khmer', 'Other'],
+    ['kor', 'kor_Hang', 'Korean', 'Other'],
+    ['lao', 'lao_Laoo', 'Lao', 'Other'],
+    ['luo', 'luo_Latn', 'Luo', 'Other'],
+    ['mon', 'khk_Cyrl', 'Mongolian', 'Other'],
+    ['tha', 'tha_Thai', 'Thai', 'Other'],
+    ['vie', 'vie_Latn', 'Vietnamese', 'Other'],
+]
+flores_lang_map = {i[0]: i for i in _flores_lang_map}
+_flores_subtasks = [f'eng-{i}' for i in flores_lang_map if i != 'eng'
+                    ] + [f'{i}-eng' for i in flores_lang_map if i != 'eng']
+
+flores_datasets = []
+for _flores_subtask in _flores_subtasks:
+    _src, _tgt = _flores_subtask.split('-')
+    _, _flores_source, _src_inst, _ = flores_lang_map[_src]
+    _, _flores_target, _tgt_inst, _ = flores_lang_map[_tgt]
+
+    flores_reader_cfg = dict(
+        input_columns=f'sentence_{_flores_source}',
+        output_column=f'sentence_{_flores_target}',
+        train_split='dev',
+        test_split='devtest'
+    )
+    flores_infer_cfg = dict(
+        ice_template=dict(
+            type=PromptTemplate,
+            template=f'</E>{{sentence_{_flores_source}}} = {{sentence_{_flores_target}}}' if _flores_subtask != 'zho_simpl-eng'
+            else f'</E>Chinese: {{sentence_{_flores_source}}}\nEnglish: {{sentence_{_flores_target}}}',
+            ice_token='</E>',
+        ),
+        retriever=dict(type=TopkRetriever, ice_num=8),
+        inferencer=dict(type=GenInferencer),
+    )
+    flores_eval_cfg = dict(
+        evaluator=dict(type=BleuEvaluator),
+        pred_role='BOT',
+        pred_postprocessor=dict(type='flores'),
+        dataset_postprocessor=dict(type='flores'),
+    )
+    if _tgt == 'zho_simpl':
+        flores_eval_cfg['pred_postprocessor'] = dict(type='flores-chinese')
+        flores_eval_cfg['dataset_postprocessor'] = dict(type='flores-chinese')
+    flores_datasets.append(
+        dict(
+            abbr=f'flores_100_{_src}-{_tgt}',
+            type=FloresFirst100Dataset,
+            path='opencompass/flores',
+            name=f'{_flores_source}-{_flores_target}',
+            reader_cfg=flores_reader_cfg.copy(),
+            infer_cfg=flores_infer_cfg.copy(),
+            eval_cfg=flores_eval_cfg.copy(),
+        ))
--- a/opencompass/configs/datasets/game24/game24_gen.py
+++ b/opencompass/configs/datasets/game24/game24_gen.py
+from mmengine.config import read_base
+
+with read_base():
+    from .game24_gen_52a460 import game24_datasets  # noqa: F401, F403
--- a/opencompass/configs/datasets/game24/game24_gen_52a460.py
+++ b/opencompass/configs/datasets/game24/game24_gen_52a460.py
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import ToTInferencer
+from opencompass.datasets import (Game24Dataset, game24_postprocess,
+                                  Game24Evaluator, Game24PromptWrapper)
+
+generation_kwargs = dict(do_sample=False, temperature=0.7)
+
+game24_reader_cfg = dict(
+    input_columns=['input'],
+    output_column='output')
+
+game24_infer_cfg = dict(
+        prompt_template=dict(
+        type=PromptTemplate,
+        template='{input}'),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=ToTInferencer, generation_kwargs=generation_kwargs, method_generate='propose',
+                    method_evaluate='value', method_select='greedy', n_evaluate_sample=3, n_select_sample=5, prompt_wrapper=dict(type=Game24PromptWrapper)))
+
+game24_eval_cfg = dict(
+    evaluator=dict(type=Game24Evaluator),
+    pred_postprocessor=dict(type=game24_postprocess),
+)
+
+game24_datasets = [
+    dict(
+        abbr='game24',
+        type=Game24Dataset,
+        path='./data/game24/game24.csv',
+        reader_cfg=game24_reader_cfg,
+        infer_cfg=game24_infer_cfg,
+        eval_cfg=game24_eval_cfg)
+]
--- a/opencompass/configs/datasets/gaokao_math/README.md
+++ b/opencompass/configs/datasets/gaokao_math/README.md
+# GaoKao MATH Answer Evaluation Dataset
+A dataset for testing the performance of the model in the GaoKao MATH Answer Extraction task.
+Now support the following format of GAOKAO math questions:
+1. '单选题'：Single choice question
+2. '多选题'：Multiple choice question
+3. '填空题'：Fill in the blank question, can be multiple blanks
+4. '解答题'：Answer question, can be multiple answers
+
+Sample data:
+```json
+[
+    {
+        "id": "3b270bc4-570a-4d77-b122-a2fc372f7d6a",
+        "question": "过椭圆${x^2\\over {16}} +{ y^2 \\over {4}}=1$ %内一点$M(2,1)$ %引一条弦，使该弦被点$M$ %平分，则这条弦所在直线的方程为（ ）．\nA. $x+2y-4=0$ %\nB. $x-2y-4=0$ %\nC. $x+2y+4=0$ %\nD. $x-2y+4=0$ %\n\n",
+        "response": "本题主要考查直线与圆锥曲线．设所求直线与椭圆的一个交点为$A(x,y)$ %，由于中点$M(2,1)$ %，所以另一个交点$B$ %为$(4-x,2-y)$ %．因为$A$ %，$B$ %两点都在椭圆上，所以$x^2+4y^2=16$ %，$(4-x)^2+4(2-y)^2=16$ %，两式相减，整理可得$x+2y-4=0$ %．由于过$A$ %，$B$ %两点的直线只有一条，所以这条弦所在直线的方程为$x+2y-4=0$ %．故本题正确答案为A．\n答案是：A",
+        "extract_answer": "A",
+        "question_type": "单选题"
+    },
+    {
+        "id": "d60e42d7-30ee-44f9-a94d-aff6a8127750",
+        "question": "若函数$f(x)$ 具有下列性质：1.定义域为$(-1,1)$ ；2.对于任意的$x,y\\in(-1,1)$ ，都有$f(x)+f(y)=f\\left({\\dfrac{x+y}{1+xy}}\\right)$ ；3.当$-1< x< 0$ 时，$f(x)>0$ ，则称函数$f(x)$ 为$δ$ 的函数$.$ 若函数$f(x)$ 为$δ$ 的函数，则以下结论正确的是$(\\quad)$\nA. $\nB. x)$ 为奇函数\nC. $\nD. x)$ 为偶函数\nE. $\nF. x)$ 为单调递减函数\nG. $\nH. x)$ 为单调递增函数\n\n",
+        "response": "函数$f(x)$ 为$δ$ 的函数，令$x=y=0$ ，则$f(0)+f(0)=f(0)$ ，即$f(0)=0$ ，令$y=-x$ ，则$f(x)+f(-x)=f\\left(\\dfrac{x-x}{1-{x}^{2}}\\right)=f(0)=0$ ，则$f(-x)=-f(x)$ ，即函数$f(x)$ 是奇函数，设$-1< x< y< 1$ ，则$f(x)-f(y)=f(x)+f(-y)=f\\left(\\dfrac{x-y}{1-xy}\\right)$ ，$∵-1< x< y< 1$ ，$∴-1< \\dfrac{x-y}{1-xy}< 0$ ，则$f\\left(\\dfrac{x-y}{1-xy}\\right)>0$ ，即$f(x)-f(y)>0$ ，则$f(x)>f(y)$ ，即$f(x)$ 在$(-1,1)$ 上是减函数.故选$AC.$ 本题考查函数的奇偶性和单调性的判断，注意运用定义法，考查运算能力和推理能力，属于中档题.可令$x=y=0$ ，求得$f(0)=0$ ，再令$y=-x$ 可得$f(-x)=-f(x)$ ，可得$f(x)$ 的奇偶性；再令$-1< x< y< 1$ ，运用单调性的定义，结合其偶性的定义可得其单调性．\n答案是：A; C",
+        "extract_answer": "A, C",
+        "question_type": "多选题"
+    },
+    {
+        "id": "31b3f702-e60c-4a20-9a40-73bd72b92d1e",
+        "question": "请完成以下题目(1)曲线$$y=-5\\text{e}^{x}+3$$在点$$(0,-2)$$处的切线方程为___.(2)若曲线$$f(x)=x \\sin x+1$$在$$x=\\dfrac{ \\pi }{2}$$处的切线与直线$$ax+2y+1=0$$相互垂直,则实数$$a=$$___.\n\n",
+        "response": "(1)由$$y=-5\\text{e}^{x}+3$$,得$$y'=-5\\text{e}^{x}$$,所以切线的斜率$$k=y'|_{x=0}=-5$$,所以切线方程为$$y+2=-5(x-0)$$,即$$5x+y+2=0$$.(2)因为$$f'(x)= \\sin x+x \\cos x$$,所以$$f'\\left(\\dfrac{ \\pi }{2}\\right)= \\sin \\dfrac{ \\pi }{2}+\\dfrac{ \\pi }{2}\\cdot \\cos \\dfrac{ \\pi }{2}=1$$.又直线$$ax+2y+1=0$$的斜率为$$-\\dfrac{a}{2}$$,所以根据题意得$$1\\times \\left(-\\dfrac{a}{2}\\right)=-1$$,解得$$a=2$$.\n答案是：(1)$$5x+y+2=0$$ (2)$$2$$",
+        "extract_answer": "['(1)$$5x+y+2=0$$ (2)$$2$$']",
+        "question_type": "填空题"
+    },
+    {
+        "id": "16878941-1772-4290-bc61-00b193d5cf70",
+        "question": "已知函数$f\\left( x \\right)=\\left| 2x-1 \\right|$.（1）若不等式$f\\left( x+\\frac{1}{2} \\right)\\ge 2m+1\\left( m > 0 \\right)$的解集为$\\left( -\\infty ,-2 \\right]\\bigcup \\left[ 2,+\\infty \\right)$，求实数$m$的值；（2）若不等式$f\\left( x \\right)\\le {{2}^{y}}+\\frac{a}{{{2}^{y}}}+\\left| 2x+3 \\right|$对任意的实数$x,y\\in R$恒成立，求实数$a$的最小值.\n\n",
+        "response": "（1）直接写出不等式，解含有绝对值的函数不等式即可；（2）这是恒成立求参的问题,根据绝对值三角不等式得到左侧函数的最值，再结合均值不等式得最值.（1）由条件得$\\left| 2x \\right|\\le 2m+1$得$-m-\\frac{1}{2}\\le x\\le m+\\frac{1}{2}$，所以$m=\\frac{3}{2}$.（2）原不等式等价于$\\left| 2x-1 \\right|-\\left| 2x+3 \\right|\\le {{2}^{y}}+\\frac{a}{{{2}^{y}}}$，而$\\left| 2x-1 \\right|-\\left| 2x+3 \\right|\\le \\left| \\left( 2x-1 \\right)-\\left( 2x+3 \\right) \\right|=4$，所以${{2}^{y}}+\\frac{a}{{{2}^{y}}}\\ge 4$，则$a\\ge {{\\left[ {{2}^{y}}\\left( 4-{{2}^{y}} \\right) \\right]}_{\\text{max}}}=4$，当且仅当$y=1$时取得.\n答案是：(1) $m=\\frac{3}{2}$；(2) 最小值为$a=4$.",
+        "extract_answer": [
+            "(1) $m=\\frac{3}{2}$；(2) 最小值为$a=4$."
+        ],
+        "question_type": "解答题"
+    }
+]
+```
+## How to use
+
+### 1. Prepare the dataset
+```bash
+cd opencompass
+cp -rf /cpfs01/shared/public/liuhongwei/data/gaokao_math_dataset/gaokao_math ./data
+```
+📢：If you want to evaluate your own gaokao math data, replace the `test_v2.jsonl` with your own data, but follow the format above.
+
+### 2. Set the evaluation model
+
+open `opencompass.datasets.gaokao_math.gaokao_math_gen_9b076f` and set the model name and api url for evaluation, multiple urls are supported for acceleration.
+
+```python
+...
+
+gaokao_math_eval_cfg = dict(
+    evaluator=dict(type=GaoKaoMATHEvaluator, model_name='EVALUATE_MODEL_NAME', url=['http://0.0.0.0:23333/v1', 'http://...']))
+
+...
+
+```
+We recommand `Qwen2.5-72B-Instruct` model for evaluation.
+
+
+### 3. Set Extractor model and run the evaluation
+
+```python
+from mmengine.config import read_base
+from opencompass.models import HuggingFacewithChatTemplate
+
+
+with read_base():
+    from opencompass.datasets.gaokao_math.gaokao_math_gen_9b076f import gaokao_math_datasets
+
+
+trained_qwen2_1_5b_model = [ # trained extractor model
+    dict(
+        type=HuggingFacewithChatTemplate,
+        abbr='gaokao_math_extractor_1_5b_v02',
+        path='/cpfs01/shared/public/liuhongwei/models/gaokao_math_trained/gaokao_math_extractor_1_5b_v02',
+        max_out_len=1024,
+        batch_size=8,
+        run_cfg=dict(num_gpus=1),
+    )
+]
+
+datasets = sum([v for k, v in locals().items() if k.endswith("_datasets")], [])
+models = sum([v for k, v in locals().items() if k.endswith("_model")], [])
+
+...
+```
+
+### 4. Run the evaluation
+
+```bash
+python run.py eval.py --dump-eval-details # eval and dump the evaluation details to `results` folder
+```
+
+
+### 5. Evaluation results
+
+| Evaluator / Extractor | Qwen2.5-72B-Instruct | gaokao_math_extractor_1.5b_v0.2 |
+|-----------------------|-----------------------|----------------------------------|
+| Qwen2.5-72B-Instruct (ACC) | 95.85 | 95.2 |
--- a/opencompass/configs/datasets/gaokao_math/gaokao_math_gen_f5fd28.py
+++ b/opencompass/configs/datasets/gaokao_math/gaokao_math_gen_f5fd28.py
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.datasets import GaoKaoMATHDataset, GaoKaoMATHEvaluator
+
+
+MATH_CN_PROMPT="""
+你是一个数学阅卷专家，任务是从给定的回答句子中提取精确的关键答案。你必须只提供提取的关键答案，不包括任何额外的文字。
+—
+我将为你提供一个问题、回答句子和问题类型。回答句子是对所提供问题的回应。利用提供的信息，你必须准确而精确地确定并从回答句子中提取预期的关键答案。请不要对问题发表主观看法。
+
+对于单选题，答案应该是选项字母，例如 "A"；
+对于多选题，答案应该是一个选项字母的列表，例如 ["A"] 或 ["A", "B", "C"]；
+对于填空题，答案应该是一个填入空白处的答案列表，列表的数量应该与问题中的空白数量相同，例如 ["$$\\frac{{1}}{{2}}$$"] 或 ["$$\\frac{{1}}{{2}}$$", "2"]。
+对于问答题，类似填空题，为每个小问抽出相应答案，例如 ["$$\\frac{{1}}{{2}}$$"] 或 ["$$\\frac{{1}}{{2}}$$", "2"]。
+
+如果回答句子提供了多个不同的答案，请仔细判断后面提供的答案是否是对前面答案的修正或修改。如果是这样，提取这个修正或修改后的答案作为最终答案。相反，如果回答句子在多个答案之间波动而没有明确的最终答案，你应该输出 [No valid answer]。
+—
+问题类型: {question_type}
+原始问题: {question}
+回答: {response}
+提取的关键答案:
+"""
+
+gaokao_math_reader_cfg = dict(input_columns=['question', 'response', 'question_type'], output_column='extract_answer')
+
+
+gaokao_math_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template=dict(round=[
+            dict(role='HUMAN', prompt=MATH_CN_PROMPT),
+        ])),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer, max_out_len=512))
+
+gaokao_math_eval_cfg = dict(
+    evaluator=dict(type=GaoKaoMATHEvaluator, model_name='Qwen/Qwen2.5-72B-Instruct', url=['http://22.8.73.119:23333/v1', 'http://22.8.4.97:23333/v1', 'http://22.8.22.254:23333/v1', 'http://22.8.17.14:23333/v1']))
+
+gaokao_math_datasets = [
+    dict(
+        type=GaoKaoMATHDataset,
+        abbr='GaoKaoMATH',
+        path='./data/gaokao_math/test_2k.json',
+        reader_cfg=gaokao_math_reader_cfg,
+        infer_cfg=gaokao_math_infer_cfg,
+        eval_cfg=gaokao_math_eval_cfg)
+]
--- a/opencompass/configs/datasets/govrepcrs/govrepcrs_gen.py
+++ b/opencompass/configs/datasets/govrepcrs/govrepcrs_gen.py
+from mmengine.config import read_base
+
+with read_base():
+    from .govrepcrs_gen_db7930 import govrepcrs_datasets  # noqa: F401, F403
--- a/opencompass/configs/datasets/govrepcrs/govrepcrs_gen_aa5eb3.py
+++ b/opencompass/configs/datasets/govrepcrs/govrepcrs_gen_aa5eb3.py
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import BleuEvaluator
+from opencompass.datasets import GovRepcrsDataset
+from opencompass.utils.text_postprocessors import general_cn_postprocess
+
+govrepcrs_reader_cfg = dict(
+    input_columns='content',
+    output_column='summary',
+    train_split='test',
+    test_split='test')
+
+govrepcrs_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template=
+        'Please summarize the following English report in English:{content}\n{summary}.'),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(
+        type=GenInferencer, batch_size=4, max_out_len=500, max_seq_len=8192))
+
+govrepcrs_eval_cfg = dict(
+    evaluator=dict(type=BleuEvaluator),
+    pred_postprocessor=dict(type=general_cn_postprocess),
+    dataset_postprocessor=dict(type=general_cn_postprocess))
+
+govrepcrs_datasets = [
+    dict(
+        type=GovRepcrsDataset,
+        path='./data/govrep/',
+        abbr='GovRepcrs',
+        reader_cfg=govrepcrs_reader_cfg,
+        infer_cfg=govrepcrs_infer_cfg,
+        eval_cfg=govrepcrs_eval_cfg)
+]
--- a/opencompass/configs/datasets/govrepcrs/govrepcrs_gen_db7930.py
+++ b/opencompass/configs/datasets/govrepcrs/govrepcrs_gen_db7930.py
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import BleuEvaluator
+from opencompass.datasets import GovRepcrsDataset
+from opencompass.utils.text_postprocessors import general_cn_postprocess
+
+govrepcrs_reader_cfg = dict(
+    input_columns='content',
+    output_column='summary',
+    train_split='test',
+    test_split='test')
+
+govrepcrs_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template=dict(
+            begin=[
+                dict(
+                    role='SYSTEM',
+                    fallback_role='HUMAN',
+                    prompt=
+                    'Please summarize the following English report in English:'
+                ),
+            ],
+            round=[
+                dict(role='HUMAN', prompt='{content}'),
+                dict(role='BOT', prompt='{summary}'),
+            ])),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(
+        type=GenInferencer, batch_size=4, max_out_len=500, max_seq_len=8192))
+
+govrepcrs_eval_cfg = dict(
+    evaluator=dict(type=BleuEvaluator),
+    pred_role='BOT',
+    pred_postprocessor=dict(type=general_cn_postprocess),
+    dataset_postprocessor=dict(type=general_cn_postprocess))
+
+govrepcrs_datasets = [
+    dict(
+        type=GovRepcrsDataset,
+        path='./data/govrep/',
+        abbr='GovRepcrs',
+        reader_cfg=govrepcrs_reader_cfg,
+        infer_cfg=govrepcrs_infer_cfg,
+        eval_cfg=govrepcrs_eval_cfg)
+]
--- a/opencompass/configs/datasets/gpqa/README.md
+++ b/opencompass/configs/datasets/gpqa/README.md
+# GPQA
+
+```bash
+python3 run.py --models hf_internlm2_7b --datasets gpqa_ppl_6bf57a --debug
+python3 run.py --models hf_internlm2_chat_7b --datasets gpqa_gen_4baadb --debug
+```
+
+## Base Models
+
+|          model           |   GPQA_diamond |
+|:------------------------:|---------------:|
+|    llama-7b-turbomind    |          24.24 |
+|   llama-13b-turbomind    |          25.25 |
+|   llama-30b-turbomind    |          22.73 |
+|   llama-65b-turbomind    |          21.72 |
+|   llama-2-7b-turbomind   |          25.25 |
+|  llama-2-13b-turbomind   |          23.74 |
+|  llama-2-70b-turbomind   |          28.28 |
+|   llama-3-8b-turbomind   |          31.82 |
+|  llama-3-70b-turbomind   |          40.91 |
+| internlm2-1.8b-turbomind |          24.24 |
+|  internlm2-7b-turbomind  |          28.28 |
+| internlm2-20b-turbomind  |          31.31 |
+|   qwen-1.8b-turbomind    |          28.79 |
+|    qwen-7b-turbomind     |          24.75 |
+|    qwen-14b-turbomind    |          27.78 |
+|    qwen-72b-turbomind    |          31.31 |
+|     qwen1.5-0.5b-hf      |          23.74 |
+|     qwen1.5-1.8b-hf      |          28.79 |
+|      qwen1.5-4b-hf       |          23.23 |
+|      qwen1.5-7b-hf       |          20.71 |
+|      qwen1.5-14b-hf      |          32.32 |
+|      qwen1.5-32b-hf      |          30.81 |
+|      qwen1.5-72b-hf      |          31.82 |
+|   qwen1.5-moe-a2-7b-hf   |          28.79 |
+|    mistral-7b-v0.1-hf    |          24.75 |
+|    mistral-7b-v0.2-hf    |          23.74 |
+|   mixtral-8x7b-v0.1-hf   |          28.79 |
+|  mixtral-8x22b-v0.1-hf   |          36.36 |
+|         yi-6b-hf         |          28.28 |
+|        yi-34b-hf         |          35.86 |
+|   deepseek-7b-base-hf    |          20.71 |
+|   deepseek-67b-base-hf   |          25.25 |
+
+## Chat Models
+
+|             model             |   GPQA_diamond |
+|:-----------------------------:|---------------:|
+|     qwen1.5-0.5b-chat-hf      |          19.70 |
+|     qwen1.5-1.8b-chat-hf      |          29.80 |
+|      qwen1.5-4b-chat-hf       |          25.25 |
+|      qwen1.5-7b-chat-hf       |          31.82 |
+|      qwen1.5-14b-chat-hf      |          30.30 |
+|      qwen1.5-32b-chat-hf      |          31.31 |
+|      qwen1.5-72b-chat-hf      |          32.83 |
+|     qwen1.5-110b-chat-hf      |          35.86 |
+|    internlm2-chat-1.8b-hf     |          25.76 |
+|  internlm2-chat-1.8b-sft-hf   |          26.26 |
+|     internlm2-chat-7b-hf      |          28.28 |
+|   internlm2-chat-7b-sft-hf    |          27.27 |
+|     internlm2-chat-20b-hf     |          30.30 |
+|   internlm2-chat-20b-sft-hf   |          29.29 |
+|    llama-3-8b-instruct-hf     |          25.76 |
+|    llama-3-70b-instruct-hf    |          37.88 |
+| llama-3-8b-instruct-lmdeploy  |          25.76 |
+| llama-3-70b-instruct-lmdeploy |          37.88 |
+|  mistral-7b-instruct-v0.1-hf  |          30.30 |
+|  mistral-7b-instruct-v0.2-hf  |          25.25 |
+| mixtral-8x7b-instruct-v0.1-hf |          30.30 |
--- a/opencompass/configs/datasets/gpqa/gpqa_0shot_nocot_gen_772ea0.py
+++ b/opencompass/configs/datasets/gpqa/gpqa_0shot_nocot_gen_772ea0.py
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.datasets import GPQADataset, GPQA_Simple_Eval_postprocess, GPQAEvaluator
+
+# openai_simple_eval prompt
+align_prompt = """
+Answer the following multiple choice question. The last line of your response should be of the following format: 'ANSWER: $LETTER' (without quotes) where LETTER is one of ABCD.
+
+{question}
+
+A) {A}
+B) {B}
+C) {C}
+D) {D}
+""".strip()
+
+gpqa_reader_cfg = dict(
+    input_columns=['question', 'A', 'B', 'C', 'D'],
+    output_column='answer')
+
+gpqa_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template=dict(
+            round=[
+                dict(role='HUMAN', prompt=align_prompt),
+            ], )),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer))
+
+gpqa_eval_cfg = dict(evaluator=dict(type=GPQAEvaluator),
+                     pred_postprocessor=dict(type=GPQA_Simple_Eval_postprocess))
+
+gpqa_datasets = []
+gpqa_subsets = {
+    # 'extended': 'gpqa_extended.csv',
+    # 'main': 'gpqa_main.csv',
+    'diamond': 'gpqa_diamond.csv'
+}
+
+for split in list(gpqa_subsets.keys()):
+    gpqa_datasets.append(
+        dict(
+            abbr='GPQA_' + split,
+            type=GPQADataset,
+            path='./data/gpqa/',
+            name=gpqa_subsets[split],
+            reader_cfg=gpqa_reader_cfg,
+            infer_cfg=gpqa_infer_cfg,
+            eval_cfg=gpqa_eval_cfg)
+    )