[Feat] support opencompass

fb111087 · yingfhu · 7d346000 · fb111087 · fb111087 · fb111087
Commit fb111087 authored Jul 04, 2023 by yingfhu
20 changed files
--- a/configs/datasets/SuperGLUE_COPA/SuperGLUE_COPA_ppl.py
+++ b/configs/datasets/SuperGLUE_COPA/SuperGLUE_COPA_ppl.py
+from mmengine.config import read_base
+with read_base():
+    from .SuperGLUE_COPA_ppl_ddb78c import COPA_datasets  # noqa: F401, F403
--- a/configs/datasets/SuperGLUE_COPA/SuperGLUE_COPA_ppl_ed59be.py
+++ b/configs/datasets/SuperGLUE_COPA/SuperGLUE_COPA_ppl_ed59be.py
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import PPLInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import HFDataset
+COPA_reader_cfg = dict(
+    input_columns=['question', 'premise', 'choice1', 'choice2'],
+    output_column='label',
+    test_split='train')
+COPA_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template={
+            0: "Premise:{premise}。\nQuestion:{question}。\nAnswer: {choice1}.",
+            1: "Passage:{premise}。\nQuestion:{question}。\nAnswer: {choice2}.",
+        }),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=PPLInferencer))
+COPA_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
+COPA_datasets = [
+    dict(
+        type=HFDataset,
+        abbr='COPA',
+        path='json',
+        data_files='./data/SuperGLUE/COPA/val.jsonl',
+        split='train',
+        reader_cfg=COPA_reader_cfg,
+        infer_cfg=COPA_infer_cfg,
+        eval_cfg=COPA_eval_cfg)
+]
--- a/configs/datasets/SuperGLUE_MultiRC/SuperGLUE_MultiRC_gen.py
+++ b/configs/datasets/SuperGLUE_MultiRC/SuperGLUE_MultiRC_gen.py
+from mmengine.config import read_base
+with read_base():
+    from .SuperGLUE_MultiRC_gen_26c9dc import MultiRC_datasets  # noqa: F401, F403
--- a/configs/datasets/SuperGLUE_MultiRC/SuperGLUE_MultiRC_ppl_1123bd.py
+++ b/configs/datasets/SuperGLUE_MultiRC/SuperGLUE_MultiRC_ppl_1123bd.py
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import PPLInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import MultiRCDataset
+MultiRC_reader_cfg = dict(
+    input_columns=['question', 'text', 'answer'], output_column='label')
+MultiRC_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template={
+            0: "Passage:{text}。\nQuestion:{question}。\nAnswer: {answer}. It is false.",
+            1: "Passage:</P>。\nQuestion:{question}。\nAnswer: {answer}. It is true.",
+        }),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=PPLInferencer))
+MultiRC_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
+MultiRC_datasets = [
+    dict(
+        type=MultiRCDataset,
+        abbr='MultiRC',
+        path='./data/SuperGLUE/MultiRC/val.jsonl',
+        reader_cfg=MultiRC_reader_cfg,
+        infer_cfg=MultiRC_infer_cfg,
+        eval_cfg=MultiRC_eval_cfg)
+]
--- a/configs/datasets/SuperGLUE_RTE/SuperGLUE_RTE_ppl.py
+++ b/configs/datasets/SuperGLUE_RTE/SuperGLUE_RTE_ppl.py
+from mmengine.config import read_base
+with read_base():
+    from .SuperGLUE_RTE_ppl_29a22c import RTE_datasets  # noqa: F401, F403
--- a/configs/datasets/SuperGLUE_ReCoRD/SuperGLUE_ReCoRD_gen.py
+++ b/configs/datasets/SuperGLUE_ReCoRD/SuperGLUE_ReCoRD_gen.py
+from mmengine.config import read_base
+with read_base():
+    from .SuperGLUE_ReCoRD_gen_d8f19c import ReCoRD_datasets  # noqa: F401, F403
--- a/configs/datasets/SuperGLUE_ReCoRD/SuperGLUE_ReCoRD_gen_d8f19c.py
+++ b/configs/datasets/SuperGLUE_ReCoRD/SuperGLUE_ReCoRD_gen_d8f19c.py
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import EMEvaluator
+from opencompass.datasets import ReCoRDDataset
+ReCoRD_reader_cfg = dict(
+    input_columns=["question", "text"],
+    output_column="answers",
+)
+ReCoRD_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template=dict(round=[
+            dict(
+                role="HUMAN",
+                prompt=
+                "Passage: {text}\nResult: {question}\nQuestion: What entity does ____ refer to in the result? Give me the entity name:"
+            ),
+        ]),
+    ),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer),
+)
+ReCoRD_eval_cfg = dict(
+    evaluator=dict(type=EMEvaluator),
+    pred_role='BOT',
+    pred_postprocessor=dict(type="ReCoRD"),
+)
+ReCoRD_datasets = [
+    dict(
+        type=ReCoRDDataset,
+        abbr="ReCoRD",
+        path="./data/SuperGLUE/ReCoRD/val.jsonl",
+        reader_cfg=ReCoRD_reader_cfg,
+        infer_cfg=ReCoRD_infer_cfg,
+        eval_cfg=ReCoRD_eval_cfg,
+    )
+]
--- a/configs/datasets/SuperGLUE_WSC/SuperGLUE_WSC_ppl_85f45f.py
+++ b/configs/datasets/SuperGLUE_WSC/SuperGLUE_WSC_ppl_85f45f.py
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import PPLInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import WSCDataset_V2
+WSC_reader_cfg = dict(
+    input_columns=["span1", "span2", "text"],
+    output_column="label",
+)
+WSC_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template={
+            'A':
+            dict(round=[
+                dict(
+                    role="HUMAN",
+                    prompt=
+                    "{text}\nIs '{span1}' and '{span2}' refers to the same entity in the above sentence?"
+                ),
+                dict(role='BOT', prompt='Yes'),
+            ]),
+            'B':
+            dict(round=[
+                dict(
+                    role="HUMAN",
+                    prompt=
+                    "{text}\nIs '{span1}' and '{span2}' refers to the same entity in the above sentence?"
+                ),
+                dict(role='BOT', prompt='No'),
+            ]),
+        },
+    ),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=PPLInferencer),
+)
+WSC_eval_cfg = dict(evaluator=dict(type=AccEvaluator), )
+WSC_datasets = [
+    dict(
+        abbr="WSC",
+        type=WSCDataset_V2,
+        path="./data/SuperGLUE/WSC/val.jsonl",
+        reader_cfg=WSC_reader_cfg,
+        infer_cfg=WSC_infer_cfg,
+        eval_cfg=WSC_eval_cfg,
+    )
+]
--- a/configs/datasets/XLSum/XLSum_gen.py
+++ b/configs/datasets/XLSum/XLSum_gen.py
+from mmengine.config import read_base
+with read_base():
+    from .XLSum_gen_1cc5f6 import XLSum_datasets  # noqa: F401, F403
--- a/configs/datasets/bbh/bbh_gen.py
+++ b/configs/datasets/bbh/bbh_gen.py
+from mmengine.config import read_base
+with read_base():
+    from .bbh_gen_58abc3 import bbh_datasets  # noqa: F401, F403
--- a/configs/datasets/collections/base_small.py
+++ b/configs/datasets/collections/base_small.py
+from mmengine.config import read_base
+with read_base():
+    from ..ceval.ceval_ppl_275812 import ceval_datasets
+    from ..bbh.bbh_gen_58abc3 import bbh_datasets
+    from ..CLUE_CMRC.CLUE_CMRC_gen_72a8d5 import CMRC_datasets
+    from ..CLUE_DRCD.CLUE_DRCD_gen_03b96b import DRCD_datasets
+    from ..CLUE_afqmc.CLUE_afqmc_ppl_c83c36 import afqmc_datasets
+    from ..FewCLUE_bustm.FewCLUE_bustm_ppl_47f2ab import bustm_datasets
+    from ..FewCLUE_chid.FewCLUE_chid_ppl_b6cd88 import chid_datasets
+    from ..FewCLUE_cluewsc.FewCLUE_cluewsc_ppl_2a9e61 import cluewsc_datasets
+    from ..FewCLUE_eprstmt.FewCLUE_eprstmt_ppl_d3c387 import eprstmt_datasets
+    from ..humaneval.humaneval_gen_d428f1 import humaneval_datasets
+    from ..mbpp.mbpp_gen_4104e4 import mbpp_datasets
+    from ..lambada.lambada_gen_7ffe3d import lambada_datasets
+    from ..storycloze.storycloze_ppl_c1912d import storycloze_datasets
+    from ..SuperGLUE_AX_b.SuperGLUE_AX_b_ppl_4bd960 import AX_b_datasets
+    from ..SuperGLUE_AX_g.SuperGLUE_AX_g_ppl_8d9bf9 import AX_g_datasets
+    from ..SuperGLUE_BoolQ.SuperGLUE_BoolQ_ppl_f80fb0 import BoolQ_datasets
+    from ..SuperGLUE_CB.SuperGLUE_CB_ppl_32adbb import CB_datasets
+    from ..SuperGLUE_COPA.SuperGLUE_COPA_ppl_ddb78c import COPA_datasets
+    from ..SuperGLUE_MultiRC.SuperGLUE_MultiRC_ppl_83a304 import MultiRC_datasets
+    from ..SuperGLUE_RTE.SuperGLUE_RTE_ppl_29a22c import RTE_datasets
+    from ..SuperGLUE_ReCoRD.SuperGLUE_ReCoRD_gen_d8f19c import ReCoRD_datasets
+    from ..SuperGLUE_WiC.SuperGLUE_WiC_ppl_4118db import WiC_datasets
+    from ..SuperGLUE_WSC.SuperGLUE_WSC_ppl_85f45f import WSC_datasets
+    from ..race.race_ppl_04e06a import race_datasets
+    from ..math.math_gen_78bcba import math_datasets
+    from ..gsm8k.gsm8k_gen_2dd372 import gsm8k_datasets
+    from ..summedits.summedits_ppl_163352 import summedits_datasets
+    from ..hellaswag.hellaswag_ppl_8e07d6 import hellaswag_datasets
+    from ..piqa.piqa_ppl_788dbe import piqa_datasets
+    from ..winogrande.winogrande_ppl_00f8ad import winogrande_datasets
+    from ..obqa.obqa_ppl_2b5b12 import obqa_datasets
+    from ..nq.nq_gen_c00b89 import nq_datasets
+    from ..triviaqa.triviaqa_gen_cc3cbf import triviaqa_datasets
+    from ..crowspairs.crowspairs_ppl_f60797 import crowspairs_datasets
+datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
--- a/configs/datasets/glm/nq.py
+++ b/configs/datasets/glm/nq.py
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.datasets import NaturalQuestionDataset, NQEvaluator
+nq_reader_cfg = dict(
+    input_columns=['question'], output_column='answer', train_split='test')
+nq_infer_cfg = dict(
+    ice_template=dict(
+        type=PromptTemplate,
+        template="Q: </Q>?\nA: </A>",
+        column_token_map={
+            'question': '</Q>',
+            'answer': '</A>'
+        }),
+    prompt_template=dict(
+        type=PromptTemplate,
+        template="</E>Question: </Q>? Answer: ",
+        column_token_map={
+            'question': '</Q>',
+            'answer': '</A>'
+        },
+        ice_token='</E>'),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer))
+nq_eval_cfg = dict(evaluator=dict(type=NQEvaluator))
+nq_datasets = [
+    dict(
+        type=NaturalQuestionDataset,
+        abbr='nq',
+        path='/mnt/petrelfs/wuzhiyong/datasets/nq/',
+        reader_cfg=nq_reader_cfg,
+        infer_cfg=nq_infer_cfg,
+        eval_cfg=nq_eval_cfg)
+]
--- a/configs/datasets/glm/triviaqa.py
+++ b/configs/datasets/glm/triviaqa.py
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.datasets import TriviaQADataset, TriviaQAEvaluator
+triviaqa_reader_cfg = dict(
+    input_columns=['question'],
+    output_column='answer',
+    train_split='dev',
+    test_split='dev')
+triviaqa_infer_cfg = dict(
+    ice_template=dict(
+        type=PromptTemplate,
+        template='Q: </Q>\nA: </A>',
+        column_token_map={
+            'question': '</Q>',
+            'answer': '</A>'
+        }),
+    prompt_template=dict(
+        type=PromptTemplate,
+        template='</E>Question: </Q> Answer:',
+        column_token_map={
+            'question': '</Q>',
+            'answer': '</A>'
+        },
+        ice_token='</E>'),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer, max_out_len=50))
+triviaqa_eval_cfg = dict(evaluator=dict(type=TriviaQAEvaluator))
+triviaqa_datasets = [
+    dict(
+        type=TriviaQADataset,
+        abbr='triviaqa',
+        path='./data/triviaqa/',
+        reader_cfg=triviaqa_reader_cfg,
+        infer_cfg=triviaqa_infer_cfg,
+        eval_cfg=triviaqa_eval_cfg)
+]
--- a/configs/datasets/humaneval/humaneval_gen_581044.py
+++ b/configs/datasets/humaneval/humaneval_gen_581044.py
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.datasets import HFDataset, HumanEvaluator
+humaneval_reader_cfg = dict(
+    input_columns=['prompt'], output_column='task_id', train_split='test')
+# TODO: allow empty output-column
+humaneval_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template=dict(
+            begin=[
+                dict(
+                    role='SYSTEM',
+                    fallback_role='HUMAN',
+                    prompt='Complete the following python code:'),
+            ],
+            round=[
+                dict(role='HUMAN', prompt='{prompt}'),
+            ])),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer))
+humaneval_eval_cfg = dict(
+    evaluator=dict(type=HumanEvaluator),
+    pred_role='BOT',
+    k=[1, 10, 100],  # the parameter only for humaneval
+    pred_postprocessor=dict(type='humaneval'),
+)
+humaneval_datasets = [
+    dict(
+        type=HFDataset,
+        path='openai_humaneval',
+        reader_cfg=humaneval_reader_cfg,
+        infer_cfg=humaneval_infer_cfg,
+        eval_cfg=humaneval_eval_cfg)
+]
--- a/configs/datasets/jigsawmultilingual/jigsawmultilingual_ppl.py
+++ b/configs/datasets/jigsawmultilingual/jigsawmultilingual_ppl.py
+from mmengine.config import read_base
+with read_base():
+    from .jigsawmultilingual_ppl_640128 import jigsawmultilingual_datasets  # noqa: F401, F403
--- a/configs/datasets/jigsawmultilingual/jigsawmultilingual_ppl_640128.py
+++ b/configs/datasets/jigsawmultilingual/jigsawmultilingual_ppl_640128.py
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import CLPInferencer
+from opencompass.openicl.icl_evaluator import AUCROCEvaluator
+from opencompass.datasets import JigsawMultilingualDataset
+jigsawmultilingual_reader_cfg = dict(
+    input_columns=['text'],
+    output_column='label',
+    train_split='test',
+    test_split='test')
+# Jigsaw multilingual toxic dataset is not supported in Huggingface
+# Please download the test dataset files `test.csv` and `test_labels.csv` from
+# below link and move them the data/ folder to perform evaluation.
+# https://www.kaggle.com/competitions/jigsaw-multilingual-toxic-comment-classification/data
+jigsawmultilingual_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template=dict(round=[
+            dict(
+                role="HUMAN",
+                prompt="Text: {text}\nQuestion: Does the above text contain "
+                "rude, hateful, aggressive, disrespectful or unreasonable "
+                "language?\nAnswer:")
+        ])),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=CLPInferencer))
+jigsawmultilingual_eval_cfg = dict(evaluator=dict(type=AUCROCEvaluator), )
+lang = ['es', 'fr', 'it', 'pt', 'ru', 'tr']
+jigsawmultilingual_datasets = []
+for _l in lang:
+    jigsawmultilingual_datasets.append(
+        dict(
+            abbr=f'jigsaw_multilingual_{_l}',
+            type=JigsawMultilingualDataset,
+            path='data/test.csv',
+            label='data/test_labels.csv',
+            lang=_l,
+            reader_cfg=jigsawmultilingual_reader_cfg,
+            infer_cfg=jigsawmultilingual_infer_cfg,
+            eval_cfg=jigsawmultilingual_eval_cfg))
+del lang, _l
--- a/configs/datasets/jigsawmultilingual/jigsawmultilingual_ppl_da5d28.py
+++ b/configs/datasets/jigsawmultilingual/jigsawmultilingual_ppl_da5d28.py
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import CLPInferencer
+from opencompass.openicl.icl_evaluator import AUCROCEvaluator
+from opencompass.datasets import JigsawMultilingualDataset
+jigsawmultilingual_reader_cfg = dict(
+    input_columns=['text'],
+    output_column='label',
+    train_split='test',
+    test_split='test')
+# Jigsaw multilingual toxic dataset is not supported in Huggingface
+# Please download the test dataset files `test.csv` and `test_labels.csv` from
+# below link and move them the data/ folder to perform evaluation.
+# https://www.kaggle.com/competitions/jigsaw-multilingual-toxic-comment-classification/data
+jigsawmultilingual_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template='Text: {text}\nQuestion: '
+        'Does the above text contain rude, hateful, aggressive, disrespectful '
+        'or unreasonable language?\nAnswer:'),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=CLPInferencer))
+jigsawmultilingual_eval_cfg = dict(evaluator=dict(type=AUCROCEvaluator), )
+lang = ['es', 'fr', 'it', 'pt', 'ru', 'tr']
+jigsawmultilingual_datasets = []
+for _l in lang:
+    jigsawmultilingual_datasets.append(
+        dict(
+            abbr=f'jigsaw_multilingual_{_l}',
+            type=JigsawMultilingualDataset,
+            path='data/test.csv',
+            label='data/test_labels.csv',
+            lang=_l,
+            reader_cfg=jigsawmultilingual_reader_cfg,
+            infer_cfg=jigsawmultilingual_infer_cfg,
+            eval_cfg=jigsawmultilingual_eval_cfg))
+del lang, _l
--- a/configs/datasets/lambada/lambada_gen.py
+++ b/configs/datasets/lambada/lambada_gen.py
+from mmengine.config import read_base
+with read_base():
+    from .lambada_gen_7ffe3d import lambada_datasets  # noqa: F401, F403
--- a/configs/datasets/math/math_gen.py
+++ b/configs/datasets/math/math_gen.py
+from mmengine.config import read_base
+with read_base():
+    from .math_gen_78bcba import math_datasets  # noqa: F401, F403
--- a/configs/datasets/math/math_gen_b4c82a.py
+++ b/configs/datasets/math/math_gen_b4c82a.py
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.datasets import MATHDataset, MATHEvaluator
+math_reader_cfg = dict(input_columns=['problem'], output_column='solution')
+math_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template='''Problem:
+Find the domain of the expression $\frac{{\sqrt{{x-2}}}}{{\sqrt{{5-x}}}}$.}}
+Solution:
+The expressions inside each square root must be non-negative. Therefore, $x-2 \ge 0$, so $x\ge2$, and $5 - x \ge 0$, so $x \le 5$. Also, the denominator cannot be equal to zero, so $5-x>0$, which gives $x<5$. Therefore, the domain of the expression is $\boxed{{[2,5)}}$.
+Final Answer: The final answer is $[2,5)$. I hope it is correct.
+Problem:
+If $\det \mathbf{{A}} = 2$ and $\det \mathbf{{B}} = 12,$ then find $\det (\mathbf{{A}} \mathbf{{B}}).$
+Solution:
+We have that $\det (\mathbf{{A}} \mathbf{{B}}) = (\det \mathbf{{A}})(\det \mathbf{{B}}) = (2)(12) = \boxed{{24}}.$
+Final Answer: The final answer is $24$. I hope it is correct.
+Problem:
+Terrell usually lifts two 20-pound weights 12 times. If he uses two 15-pound weights instead, how many times must Terrell lift them in order to lift the same total weight?
+Solution:
+If Terrell lifts two 20-pound weights 12 times, he lifts a total of $2\cdot 12\cdot20=480$ pounds of weight. If he lifts two 15-pound weights instead for $n$ times, he will lift a total of $2\cdot15\cdot n=30n$ pounds of weight. Equating this to 480 pounds, we can solve for $n$: \begin{{align*}} 30n&=480\\ \Rightarrow\qquad n&=480/30=\boxed{{16}} \end{{align*}}
+Final Answer: The final answer is $16$. I hope it is correct.
+Problem:
+If the system of equations: \begin{{align*}} 6x-4y&=a,\\ 6y-9x &=b. \end{{align*}}has a solution $(x, y)$ where $x$ and $y$ are both nonzero, find $\frac{{a}}{{b}},$ assuming $b$ is nonzero.
+Solution:
+If we multiply the first equation by $-\frac{{3}}{{2}}$, we obtain $$6y-9x=-\frac{{3}}{{2}}a.$$Since we also know that $6y-9x=b$, we have $$-\frac{{3}}{{2}}a=b\Rightarrow\frac{{a}}{{b}}=\boxed{{-\frac{{2}}{{3}}}}.$$
+Final Answer: The final answer is $-\frac{{2}}{{3}}$. I hope it is correct.
+Problem:
+{problem}
+Solution:
+{solution}'''),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer, max_out_len=512))
+math_eval_cfg = dict(
+    evaluator=dict(type=MATHEvaluator), pred_postprocessor=dict(type='math'))
+math_datasets = [
+    dict(
+        type=MATHDataset,
+        abbr='math',
+        path='./data/math/math.json',
+        reader_cfg=math_reader_cfg,
+        infer_cfg=math_infer_cfg,
+        eval_cfg=math_eval_cfg)
+]