[Sync] deprecate old mbpps (#1064)

8c85edd1 · Fengzhe Zhou · GitHub · c1724013 · 8c85edd1 · 8c85edd1
Unverified Commit 8c85edd1 authored Apr 19, 2024 by Fengzhe Zhou Committed by GitHub Apr 19, 2024
20 changed files
--- a/configs/datasets/collections/base_core.py
+++ b/configs/datasets/collections/base_core.py
@@ -15,6 +15,6 @@ with read_base():
    from ..math.math_evaluatorv2_gen_9d2049 import math_datasets
    from ..TheoremQA.TheoremQA_post_v2_gen_2c2583 import TheoremQA_datasets
    from ..humaneval.humaneval_gen_d2537e import humaneval_datasets
-    from ..mbpp.sanitized_mbpp_gen_cb43ef import sanitized_mbpp_datasets
+    from ..mbpp.deprecated_sanitized_mbpp_gen_cb43ef import sanitized_mbpp_datasets

 datasets = sum((v for k, v in locals().items() if k.endswith("_datasets")), [])
--- a/configs/datasets/collections/base_medium.py
+++ b/configs/datasets/collections/base_medium.py
@@ -7,7 +7,7 @@ with read_base():
    from ..GaokaoBench.GaokaoBench_mixed_f2038e import GaokaoBench_datasets
    from ..bbh.bbh_gen_5b92b0 import bbh_datasets
    from ..humaneval.humaneval_gen_8e312c import humaneval_datasets
-    from ..mbpp.mbpp_gen_1e1056 import mbpp_datasets
+    from ..mbpp.deprecated_mbpp_gen_1e1056 import mbpp_datasets
    from ..CLUE_C3.CLUE_C3_ppl_e24a31 import C3_datasets
    from ..CLUE_CMRC.CLUE_CMRC_gen_1bd3c8 import CMRC_datasets
    from ..CLUE_DRCD.CLUE_DRCD_gen_1bd3c8 import DRCD_datasets

--- a/configs/datasets/collections/base_medium_llama.py
+++ b/configs/datasets/collections/base_medium_llama.py
@@ -7,7 +7,7 @@ with read_base():
    from ..GaokaoBench.GaokaoBench_mixed_f2038e import GaokaoBench_datasets
    from ..bbh.bbh_gen_5b92b0 import bbh_datasets
    from ..humaneval.humaneval_gen_a82cae import humaneval_datasets
-    from ..mbpp.mbpp_gen_1e1056 import mbpp_datasets
+    from ..mbpp.deprecated_mbpp_gen_1e1056 import mbpp_datasets
    from ..CLUE_C3.CLUE_C3_ppl_e24a31 import C3_datasets
    from ..CLUE_CMRC.CLUE_CMRC_gen_1bd3c8 import CMRC_datasets
    from ..CLUE_DRCD.CLUE_DRCD_gen_1bd3c8 import DRCD_datasets

--- a/configs/datasets/collections/base_small.py
+++ b/configs/datasets/collections/base_small.py
@@ -11,7 +11,7 @@ with read_base():
    from ..FewCLUE_cluewsc.FewCLUE_cluewsc_ppl_868415 import cluewsc_datasets
    from ..FewCLUE_eprstmt.FewCLUE_eprstmt_ppl_f1e631 import eprstmt_datasets
    from ..humaneval.humaneval_gen_8e312c import humaneval_datasets
-    from ..mbpp.mbpp_gen_1e1056 import mbpp_datasets
+    from ..mbpp.deprecated_mbpp_gen_1e1056 import mbpp_datasets
    from ..lambada.lambada_gen_217e11 import lambada_datasets
    from ..storycloze.storycloze_ppl_496661 import storycloze_datasets
    from ..SuperGLUE_AX_b.SuperGLUE_AX_b_ppl_6db806 import AX_b_datasets

--- a/configs/datasets/collections/chat_core.py
+++ b/configs/datasets/collections/chat_core.py
@@ -15,6 +15,6 @@ with read_base():
    from ..math.math_evaluatorv2_gen_cecb31 import math_datasets
    from ..TheoremQA.TheoremQA_post_v2_gen_ef26ca import TheoremQA_datasets
    from ..humaneval.humaneval_gen_8e312c import humaneval_datasets
-    from ..mbpp.sanitized_mbpp_gen_1e1056 import sanitized_mbpp_datasets
+    from ..mbpp.deprecated_sanitized_mbpp_gen_1e1056 import sanitized_mbpp_datasets

 datasets = sum((v for k, v in locals().items() if k.endswith("_datasets")), [])
--- a/configs/datasets/collections/chat_medium.py
+++ b/configs/datasets/collections/chat_medium.py
@@ -7,7 +7,7 @@ with read_base():
    from ..GaokaoBench.GaokaoBench_gen_5cfe9e import GaokaoBench_datasets
    from ..bbh.bbh_gen_5b92b0 import bbh_datasets
    from ..humaneval.humaneval_gen_8e312c import humaneval_datasets
-    from ..mbpp.mbpp_gen_1e1056 import mbpp_datasets
+    from ..mbpp.deprecated_mbpp_gen_1e1056 import mbpp_datasets
    from ..CLUE_C3.CLUE_C3_gen_8c358f import C3_datasets
    from ..CLUE_CMRC.CLUE_CMRC_gen_1bd3c8 import CMRC_datasets
    from ..CLUE_DRCD.CLUE_DRCD_gen_1bd3c8 import DRCD_datasets

--- a/configs/datasets/collections/chat_small.py
+++ b/configs/datasets/collections/chat_small.py
@@ -12,7 +12,7 @@ with read_base():
    from ..FewCLUE_cluewsc.FewCLUE_cluewsc_gen_c68933 import cluewsc_datasets
    from ..FewCLUE_eprstmt.FewCLUE_eprstmt_gen_740ea0 import eprstmt_datasets
    from ..humaneval.humaneval_gen_8e312c import humaneval_datasets
-    from ..mbpp.mbpp_gen_1e1056 import mbpp_datasets
+    from ..mbpp.deprecated_mbpp_gen_1e1056 import mbpp_datasets
    from ..lambada.lambada_gen_217e11 import lambada_datasets
    from ..storycloze.storycloze_gen_7f656a import storycloze_datasets
    from ..SuperGLUE_AX_b.SuperGLUE_AX_b_gen_4dfefa import AX_b_datasets

--- a/configs/datasets/collections/leaderboard/qwen.py
+++ b/configs/datasets/collections/leaderboard/qwen.py
@@ -44,7 +44,7 @@ with read_base():
    from ...gsm8k.gsm8k_gen_1d7fe4 import gsm8k_datasets
    from ...drop.drop_gen_8a9ed9 import drop_datasets
    from ...humaneval.humaneval_gen_a82cae import humaneval_datasets
-    from ...mbpp.mbpp_gen_1e1056 import mbpp_datasets
+    from ...mbpp.deprecated_mbpp_gen_1e1056 import mbpp_datasets
    from ...bbh.bbh_gen_5bf00b import bbh_datasets



--- a/configs/datasets/collections/leaderboard/qwen_chat.py
+++ b/configs/datasets/collections/leaderboard/qwen_chat.py
@@ -44,7 +44,7 @@ with read_base():
    from ...gsm8k.gsm8k_gen_1d7fe4 import gsm8k_datasets
    from ...drop.drop_gen_8a9ed9 import drop_datasets
    from ...humaneval.humaneval_gen_a82cae import humaneval_datasets
-    from ...mbpp.mbpp_gen_1e1056 import mbpp_datasets
+    from ...mbpp.deprecated_mbpp_gen_1e1056 import mbpp_datasets
    from ...bbh.bbh_gen_5b92b0 import bbh_datasets



--- a/configs/datasets/gpqa/gpqa_gen_015262.py
+++ b/configs/datasets/gpqa/gpqa_gen_015262.py
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.datasets import GPQADataset, GPQAEvaluator
+from opencompass.utils import first_option_postprocess
+
+gpqa_reader_cfg = dict(
+    input_columns=['question', 'A', 'B', 'C', 'D'],
+    output_column='answer')
+
+gpqa_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template=dict(
+            round=[
+                dict(role='HUMAN', prompt='What is the correct answer to this question: {question}\nChoices:\n'
+                                          '(A){A}\n'
+                                          '(B){B}\n'
+                                          '(C){C}\n'
+                                          '(D){D}\n'
+                                          'Format your response as follows: "The correct answer is (insert answer here)"'),
+            ], )),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer))
+
+gpqa_eval_cfg = dict(evaluator=dict(type=GPQAEvaluator),
+                     pred_postprocessor=dict(type=first_option_postprocess, options='ABCD'))
+
+gpqa_datasets = []
+gpqa_subsets = {
+    'extended': 'gpqa_extended.csv',
+    'main': 'gpqa_main.csv',
+    'diamond': 'gpqa_diamond.csv'
+}
+
+for split in list(gpqa_subsets.keys()):
+    gpqa_datasets.append(
+        dict(
+            abbr='GPQA_' + split,
+            type=GPQADataset,
+            path='./data/gpqa/',
+            name=gpqa_subsets[split],
+            reader_cfg=gpqa_reader_cfg,
+            infer_cfg=gpqa_infer_cfg,
+            eval_cfg=gpqa_eval_cfg)
+    )
--- a/configs/datasets/mbpp/mbpp_gen_1e1056.py
+++ b/configs/datasets/mbpp/mbpp_gen_1e1056.py
--- a/configs/datasets/mbpp/mbpp_gen_6590b0.py
+++ b/configs/datasets/mbpp/mbpp_gen_6590b0.py
--- a/configs/datasets/mbpp/mbpp_gen_caa7ab.py
+++ b/configs/datasets/mbpp/mbpp_gen_caa7ab.py
--- a/configs/datasets/mbpp/mbpp_passk_gen_1e1056.py
+++ b/configs/datasets/mbpp/mbpp_passk_gen_1e1056.py
--- a/configs/datasets/mbpp/mbpp_repeat10_gen_1e1056.py
+++ b/configs/datasets/mbpp/mbpp_repeat10_gen_1e1056.py
--- a/configs/datasets/mbpp/sanitized_mbpp_gen_1e1056.py
+++ b/configs/datasets/mbpp/sanitized_mbpp_gen_1e1056.py
--- a/configs/datasets/mbpp/sanitized_mbpp_gen_cb43ef.py
+++ b/configs/datasets/mbpp/sanitized_mbpp_gen_cb43ef.py
--- a/configs/datasets/mbpp/sanitized_mbpp_passk_gen_1e1056.py
+++ b/configs/datasets/mbpp/sanitized_mbpp_passk_gen_1e1056.py
--- a/configs/datasets/mbpp/sanitized_mbpp_repeat10_gen_1e1056.py
+++ b/configs/datasets/mbpp/sanitized_mbpp_repeat10_gen_1e1056.py
--- a/configs/datasets/mbpp/mbpp_gen.py
+++ b/configs/datasets/mbpp/mbpp_gen.py
 from mmengine.config import read_base

 with read_base():
-    from .mbpp_gen_1e1056 import mbpp_datasets  # noqa: F401, F403
+    from .mbpp_gen_830460 import mbpp_datasets  # noqa: F401, F403