Update configs (#9)

* Update implements * Update

Update configs (#9)
* Update implements * Update
86d5ec3d · Leymore · GitHub · 2d0b184b · 86d5ec3d · 86d5ec3d
Unverified Commit 86d5ec3d authored Jul 06, 2023 by Leymore Committed by GitHub Jul 06, 2023
20 changed files
--- a/configs/datasets/gsm8k/gsm8k_gen_1d7fe4.py
+++ b/configs/datasets/gsm8k/gsm8k_gen_1d7fe4.py
@@ -2,7 +2,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
 from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.openicl.icl_evaluator import AccEvaluator
-from opencompass.datasets import HFDataset
+from opencompass.datasets import HFDataset, gsm8k_postprocess, gsm8k_dataset_postprocess
 gsm8k_reader_cfg = dict(input_columns=['question'], output_column='answer')
@@ -26,8 +26,8 @@ gsm8k_infer_cfg = dict(
    inferencer=dict(type=GenInferencer, max_out_len=512))
 gsm8k_eval_cfg = dict(evaluator=dict(type=AccEvaluator),
-                      pred_postprocessor=dict(type='gsm8k'),
+                      pred_postprocessor=dict(type=gsm8k_postprocess),
-                      dataset_postprocessor=dict(type='gsm8k_dataset'))
+                      dataset_postprocessor=dict(type=gsm8k_dataset_postprocess))
 gsm8k_datasets = [
    dict(

--- a/configs/datasets/gsm8k/gsm8k_gen_1dce88.py
+++ b/configs/datasets/gsm8k/gsm8k_gen_1dce88.py
@@ -2,7 +2,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
 from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.openicl.icl_evaluator import AccEvaluator
-from opencompass.datasets import HFDataset
+from opencompass.datasets import HFDataset, gsm8k_postprocess, gsm8k_dataset_postprocess
 gsm8k_reader_cfg = dict(input_columns=['question'], output_column='answer')
@@ -73,8 +73,8 @@ Question: {question}{answer}
 gsm8k_eval_cfg = dict(
    evaluator=dict(type=AccEvaluator),
-    pred_postprocessor=dict(type='gsm8k'),
+    pred_postprocessor=dict(type=gsm8k_postprocess),
-    dataset_postprocessor=dict(type='gsm8k_dataset'))
+    dataset_postprocessor=dict(type=gsm8k_dataset_postprocess))
 gsm8k_datasets = [
    dict(

--- a/configs/datasets/gsm8k/gsm8k_gen_e9e91e.py
+++ b/configs/datasets/gsm8k/gsm8k_gen_e9e91e.py
@@ -2,7 +2,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
 from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.openicl.icl_evaluator import AccEvaluator
-from opencompass.datasets import HFDataset
+from opencompass.datasets import HFDataset, gsm8k_postprocess, gsm8k_dataset_postprocess
 gsm8k_reader_cfg = dict(input_columns=['question'], output_column='answer')
@@ -36,8 +36,8 @@ gsm8k_infer_cfg = dict(
 gsm8k_eval_cfg = dict(evaluator=dict(type=AccEvaluator),
                      pred_role="BOT",
-                      pred_postprocessor=dict(type='gsm8k'),
+                      pred_postprocessor=dict(type=gsm8k_postprocess),
-                      dataset_postprocessor=dict(type='gsm8k_dataset'))
+                      dataset_postprocessor=dict(type=gsm8k_dataset_postprocess))
 gsm8k_datasets = [
    dict(

--- a/configs/datasets/hellaswag/hellaswag_gen_6faab5.py
+++ b/configs/datasets/hellaswag/hellaswag_gen_6faab5.py
@@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.openicl.icl_evaluator import AccEvaluator
 from opencompass.datasets import hellaswagDataset_V2
+from opencompass.utils.text_postprocessors import first_capital_postprocess
 hellaswag_reader_cfg = dict(
    input_columns=["ctx", "A", "B", "C", "D"],
@@ -30,7 +31,7 @@ hellaswag_infer_cfg = dict(
 hellaswag_eval_cfg = dict(
    evaluator=dict(type=AccEvaluator),
    pred_role="BOT",
-    pred_postprocessor=dict(type="first-capital"),
+    pred_postprocessor=dict(type=first_capital_postprocess),
 )
 hellaswag_datasets = [

--- a/configs/datasets/humaneval/humaneval_gen_6f294d.py
+++ b/configs/datasets/humaneval/humaneval_gen_6f294d.py
 from opencompass.openicl.icl_prompt_template import PromptTemplate
 from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
-from opencompass.datasets import HFDataset, HumanEvaluator
+from opencompass.datasets import HFDataset, HumanEvaluator, humaneval_postprocess
 humaneval_reader_cfg = dict(
    input_columns=['prompt'], output_column='task_id', train_split='test')
@@ -27,7 +27,7 @@ humaneval_eval_cfg = dict(
    evaluator=dict(type=HumanEvaluator),
    pred_role='BOT',
    k=[1, 10, 100],  # the parameter only for humaneval
-    pred_postprocessor=dict(type='humaneval'),
+    pred_postprocessor=dict(type=humaneval_postprocess),
 )
 humaneval_datasets = [

--- a/configs/datasets/humaneval/humaneval_gen_8e312c.py
+++ b/configs/datasets/humaneval/humaneval_gen_8e312c.py
 from opencompass.openicl.icl_prompt_template import PromptTemplate
 from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
-from opencompass.datasets import HFDataset, HumanEvaluator
+from opencompass.datasets import HFDataset, HumanEvaluator, humaneval_postprocess
 humaneval_reader_cfg = dict(
    input_columns=['prompt'], output_column='task_id', train_split='test')
@@ -22,7 +22,7 @@ humaneval_eval_cfg = dict(
    evaluator=dict(type=HumanEvaluator),
    pred_role='BOT',
    k=[1, 10, 100],  # the parameter only for humaneval
-    pred_postprocessor=dict(type='humaneval'),
+    pred_postprocessor=dict(type=humaneval_postprocess),
 )
 humaneval_datasets = [

--- a/configs/datasets/humaneval/humaneval_gen_fd5822.py
+++ b/configs/datasets/humaneval/humaneval_gen_fd5822.py
 from opencompass.openicl.icl_prompt_template import PromptTemplate
 from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
-from opencompass.datasets import HFDataset, HumanEvaluator
+from opencompass.datasets import HFDataset, HumanEvaluator, humaneval_postprocess
 humaneval_reader_cfg = dict(
    input_columns=['prompt'], output_column='task_id', train_split='test')
@@ -17,7 +17,7 @@ humaneval_infer_cfg = dict(
 humaneval_eval_cfg = dict(
    evaluator=dict(type=HumanEvaluator),
    k=[1, 10, 100],  # the parameter only for humaneval
-    pred_postprocessor=dict(type='humaneval'),
+    pred_postprocessor=dict(type=humaneval_postprocess),
 )
 humaneval_datasets = [

--- a/configs/datasets/humaneval/humaneval_gen_ff7054.py
+++ b/configs/datasets/humaneval/humaneval_gen_ff7054.py
 from opencompass.openicl.icl_prompt_template import PromptTemplate
 from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
-from opencompass.datasets import HFDataset, HumanEvaluator
+from opencompass.datasets import HFDataset, HumanEvaluator, humaneval_postprocess
 humaneval_reader_cfg = dict(
    input_columns=['prompt'], output_column='task_id', train_split='test')
@@ -27,7 +27,7 @@ humaneval_eval_cfg = dict(
    evaluator=dict(type=HumanEvaluator),
    pred_role='BOT',
    k=[1, 10, 100],  # the parameter only for humaneval
-    pred_postprocessor=dict(type='humaneval'),
+    pred_postprocessor=dict(type=humaneval_postprocess),
 )
 humaneval_datasets = [

--- a/configs/datasets/iwslt2017/iwslt2017_gen_69ce16.py
+++ b/configs/datasets/iwslt2017/iwslt2017_gen_69ce16.py
@@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import BM25Retriever
 from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.openicl.icl_evaluator import BleuEvaluator
 from opencompass.datasets import IWSLT2017Dataset
+from opencompass.utils.text_postprocessors import general_cn_postprocess
 iwslt2017_reader_cfg = dict(
    input_columns='en', output_column='de', train_split='validation')
@@ -15,10 +16,10 @@ iwslt2017_infer_cfg = dict(
    inferencer=dict(type=GenInferencer))
 iwslt2017_eval_cfg = dict(
-    evaluator=dict(type=BleuEvaluator), 
+    evaluator=dict(type=BleuEvaluator),
-    pred_role='BOT', 
+    pred_role='BOT',
-    pred_postprocessor=dict(type='general_cn'),
+    pred_postprocessor=dict(type=general_cn_postprocess),
-    dataset_postprocessor=dict(type='general_cn'))
+    dataset_postprocessor=dict(type=general_cn_postprocess))
 iwslt2017_datasets = [
    dict(
@@ -28,4 +29,4 @@ iwslt2017_datasets = [
        reader_cfg=iwslt2017_reader_cfg,
        infer_cfg=iwslt2017_infer_cfg,
        eval_cfg=iwslt2017_eval_cfg)
 ]
\ No newline at end of file
--- a/configs/datasets/iwslt2017/iwslt2017_gen_b4a814.py
+++ b/configs/datasets/iwslt2017/iwslt2017_gen_b4a814.py
@@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import BM25Retriever
 from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.openicl.icl_evaluator import BleuEvaluator
 from opencompass.datasets import IWSLT2017Dataset
+from opencompass.utils.text_postprocessors import general_cn_postprocess
 iwslt2017_reader_cfg = dict(
    input_columns='en', output_column='de', train_split='validation')
@@ -24,10 +25,10 @@ iwslt2017_infer_cfg = dict(
    inferencer=dict(type=GenInferencer))
 iwslt2017_eval_cfg = dict(
-    evaluator=dict(type=BleuEvaluator), 
+    evaluator=dict(type=BleuEvaluator),
-    pred_role='BOT', 
+    pred_role='BOT',
-    pred_postprocessor=dict(type='general_cn'),
+    pred_postprocessor=dict(type=general_cn_postprocess),
-    dataset_postprocessor=dict(type='general_cn'))
+    dataset_postprocessor=dict(type=general_cn_postprocess))
 iwslt2017_datasets = [
    dict(
@@ -37,4 +38,4 @@ iwslt2017_datasets = [
        reader_cfg=iwslt2017_reader_cfg,
        infer_cfg=iwslt2017_infer_cfg,
        eval_cfg=iwslt2017_eval_cfg)
 ]
\ No newline at end of file
--- a/configs/datasets/iwslt2017/iwslt2017_gen_d0ebd1.py
+++ b/configs/datasets/iwslt2017/iwslt2017_gen_d0ebd1.py
@@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import BM25Retriever
 from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.openicl.icl_evaluator import BleuEvaluator
 from opencompass.datasets import IWSLT2017Dataset
+from opencompass.utils.text_postprocessors import general_cn_postprocess
 iwslt2017_reader_cfg = dict(
    input_columns='en', output_column='de', train_split='validation')
@@ -22,10 +23,10 @@ iwslt2017_infer_cfg = dict(
    inferencer=dict(type=GenInferencer))
 iwslt2017_eval_cfg = dict(
-    evaluator=dict(type=BleuEvaluator), 
+    evaluator=dict(type=BleuEvaluator),
-    pred_role='BOT', 
+    pred_role='BOT',
-    pred_postprocessor=dict(type='general_cn'),
+    pred_postprocessor=dict(type=general_cn_postprocess),
-    dataset_postprocessor=dict(type='general_cn'))
+    dataset_postprocessor=dict(type=general_cn_postprocess))
 iwslt2017_datasets = [
    dict(
@@ -35,4 +36,4 @@ iwslt2017_datasets = [
        reader_cfg=iwslt2017_reader_cfg,
        infer_cfg=iwslt2017_infer_cfg,
        eval_cfg=iwslt2017_eval_cfg)
 ]
\ No newline at end of file
--- a/configs/datasets/jigsawmultilingual/jigsawmultilingual_ppl_1af0ae.py
+++ b/configs/datasets/jigsawmultilingual/jigsawmultilingual_ppl_1af0ae.py
@@ -33,8 +33,8 @@ for _l in lang:
        dict(
            abbr=f'jigsaw_multilingual_{_l}',
            type=JigsawMultilingualDataset,
-            path='data/test.csv',
+            path='data/jigsawmultilingual/test.csv',
-            label='data/test_labels.csv',
+            label='data/jigsawmultilingual/test_labels.csv',
            lang=_l,
            reader_cfg=jigsawmultilingual_reader_cfg,
            infer_cfg=jigsawmultilingual_infer_cfg,

--- a/configs/datasets/jigsawmultilingual/jigsawmultilingual_ppl_fe50d8.py
+++ b/configs/datasets/jigsawmultilingual/jigsawmultilingual_ppl_fe50d8.py
@@ -37,8 +37,8 @@ for _l in lang:
        dict(
            abbr=f'jigsaw_multilingual_{_l}',
            type=JigsawMultilingualDataset,
-            path='data/test.csv',
+            path='data/jigsawmultilingual/test.csv',
-            label='data/test_labels.csv',
+            label='data/jigsawmultilingual/test_labels.csv',
            lang=_l,
            reader_cfg=jigsawmultilingual_reader_cfg,
            infer_cfg=jigsawmultilingual_infer_cfg,

--- a/configs/datasets/lcsts/lcsts_gen_8ee1fe.py
+++ b/configs/datasets/lcsts/lcsts_gen_8ee1fe.py
@@ -2,7 +2,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
 from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.openicl.icl_evaluator import RougeEvaluator
-from opencompass.datasets import LCSTSDataset
+from opencompass.datasets import LCSTSDataset, lcsts_postprocess
 lcsts_reader_cfg = dict(input_columns=['content'], output_column='abst')
@@ -18,7 +18,7 @@ lcsts_infer_cfg = dict(
 lcsts_eval_cfg = dict(
    evaluator=dict(type=RougeEvaluator),
    pred_role='BOT',
-    pred_postprocessor=dict(type='lcsts'),
+    pred_postprocessor=dict(type=lcsts_postprocess),
 )
 lcsts_datasets = [

--- a/configs/datasets/lcsts/lcsts_gen_9b0b89.py
+++ b/configs/datasets/lcsts/lcsts_gen_9b0b89.py
@@ -2,7 +2,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
 from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.openicl.icl_evaluator import RougeEvaluator
-from opencompass.datasets import LCSTSDataset
+from opencompass.datasets import LCSTSDataset, lcsts_postprocess
 lcsts_reader_cfg = dict(input_columns=['content'], output_column='abst')
@@ -14,7 +14,7 @@ lcsts_infer_cfg = dict(
 lcsts_eval_cfg = dict(
    evaluator=dict(type=RougeEvaluator),
-    pred_postprocessor=dict(type='lcsts'),
+    pred_postprocessor=dict(type=lcsts_postprocess),
 )
 lcsts_datasets = [

--- a/configs/datasets/math/math_gen.py
+++ b/configs/datasets/math/math_gen.py
 from mmengine.config import read_base
 with read_base():
-    from .math_gen_3e92f6 import math_datasets  # noqa: F401, F403
+    from .math_gen_265cce import math_datasets  # noqa: F401, F403
--- a/configs/datasets/math/math_gen_3e92f6.py
+++ b/configs/datasets/math/math_gen_3e92f6.py
 from opencompass.openicl.icl_prompt_template import PromptTemplate
 from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
-from opencompass.datasets import MATHDataset, MATHEvaluator
+from opencompass.datasets import MATHDataset, MATHEvaluator, math_postprocess
 math_reader_cfg = dict(input_columns=['problem'], output_column='solution')
@@ -12,12 +12,12 @@ math_infer_cfg = dict(
            dict(
                role="HUMAN",
                prompt=
-                "Problem:\nFind the domain of the expression $\frac{{\sqrt{{x-2}}}}{{\sqrt{{5-x}}}}$.}}\nSolution:"
+                "Problem:\nFind the domain of the expression $\\frac{{\sqrt{{x-2}}}}{{\sqrt{{5-x}}}}$.}}\nSolution:"
            ),
            dict(
                role="BOT",
                prompt=
-                "The expressions inside each square root must be non-negative. Therefore, $x-2 \ge 0$, so $x\ge2$, and $5 - x \ge 0$, so $x \le 5$. Also, the denominator cannot be equal to zero, so $5-x>0$, which gives $x<5$. Therefore, the domain of the expression is $\boxed{{[2,5)}}$.\nFinal Answer: The final answer is $[2,5)$. I hope it is correct."
+                "The expressions inside each square root must be non-negative. Therefore, $x-2 \ge 0$, so $x\ge2$, and $5 - x \ge 0$, so $x \le 5$. Also, the denominator cannot be equal to zero, so $5-x>0$, which gives $x<5$. Therefore, the domain of the expression is $\\boxed{{[2,5)}}$.\nFinal Answer: The final answer is $[2,5)$. I hope it is correct.\n"
            ),
            dict(
                role="HUMAN",
@@ -27,7 +27,7 @@ math_infer_cfg = dict(
            dict(
                role="BOT",
                prompt=
-                "We have that $\det (\mathbf{{A}} \mathbf{{B}}) = (\det \mathbf{{A}})(\det \mathbf{{B}}) = (2)(12) = \boxed{{24}}.$\nFinal Answer: The final answer is $24$. I hope it is correct."
+                "We have that $\det (\mathbf{{A}} \mathbf{{B}}) = (\det \mathbf{{A}})(\det \mathbf{{B}}) = (2)(12) = \\boxed{{24}}.$\nFinal Answer: The final answer is $24$. I hope it is correct.\n"
            ),
            dict(
                role="HUMAN",
@@ -37,17 +37,17 @@ math_infer_cfg = dict(
            dict(
                role="BOT",
                prompt=
-                "If Terrell lifts two 20-pound weights 12 times, he lifts a total of $2\cdot 12\cdot20=480$ pounds of weight. If he lifts two 15-pound weights instead for $n$ times, he will lift a total of $2\cdot15\cdot n=30n$ pounds of weight. Equating this to 480 pounds, we can solve for $n$: \begin{{align*}} 30n&=480\\ \Rightarrow\qquad n&=480/30=\boxed{{16}} \end{{align*}}\nFinal Answer: The final answer is $16$. I hope it is correct."
+                "If Terrell lifts two 20-pound weights 12 times, he lifts a total of $2\cdot 12\cdot20=480$ pounds of weight. If he lifts two 15-pound weights instead for $n$ times, he will lift a total of $2\cdot15\cdot n=30n$ pounds of weight. Equating this to 480 pounds, we can solve for $n$: \\begin{{align*}} 30n&=480\\\\ \Rightarrow\qquad n&=480/30=\\boxed{{16}} \end{{align*}}\nFinal Answer: The final answer is $16$. I hope it is correct.\n"
            ),
            dict(
                role="HUMAN",
                prompt=
-                "Problem:\nIf the system of equations: \begin{{align*}} 6x-4y&=a,\\ 6y-9x &=b. \end{{align*}}has a solution $(x, y)$ where $x$ and $y$ are both nonzero, find $\frac{{a}}{{b}},$ assuming $b$ is nonzero.\nSolution:"
+                "Problem:\nIf the system of equations: \\begin{{align*}} 6x-4y&=a,\\\\ 6y-9x &=b. \end{{align*}}has a solution $(x, y)$ where $x$ and $y$ are both nonzero, find $\\frac{{a}}{{b}},$ assuming $b$ is nonzero.\nSolution:"
            ),
            dict(
                role="BOT",
                prompt=
-                "If we multiply the first equation by $-\frac{{3}}{{2}}$, we obtain $$6y-9x=-\frac{{3}}{{2}}a.$$Since we also know that $6y-9x=b$, we have $$-\frac{{3}}{{2}}a=b\Rightarrow\frac{{a}}{{b}}=\boxed{{-\frac{{2}}{{3}}}}.$$\nFinal Answer: The final answer is $-\frac{{2}}{{3}}$. I hope it is correct."
+                "If we multiply the first equation by $-\\frac{{3}}{{2}}$, we obtain $$6y-9x=-\\frac{{3}}{{2}}a.$$Since we also know that $6y-9x=b$, we have $$-\\frac{{3}}{{2}}a=b\Rightarrow\\frac{{a}}{{b}}=\\boxed{{-\\frac{{2}}{{3}}}}.$$\nFinal Answer: The final answer is $-\\frac{{2}}{{3}}$. I hope it is correct.\n"
            ),
            dict(role="HUMAN", prompt="Problem:\n{problem}\nSolution:\n"),
        ])),
@@ -55,7 +55,7 @@ math_infer_cfg = dict(
    inferencer=dict(type=GenInferencer, max_out_len=512))
 math_eval_cfg = dict(
-    evaluator=dict(type=MATHEvaluator), pred_postprocessor=dict(type='math'))
+    evaluator=dict(type=MATHEvaluator), pred_postprocessor=dict(type=math_postprocess))
 math_datasets = [
    dict(

--- a/configs/datasets/math/math_gen_01261e.py
+++ b/configs/datasets/math/math_gen_01261e.py
 from opencompass.openicl.icl_prompt_template import PromptTemplate
 from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
-from opencompass.datasets import MATHDataset, MATHEvaluator
+from opencompass.datasets import MATHDataset, MATHEvaluator, math_postprocess
 math_reader_cfg = dict(input_columns=['problem'], output_column='solution')
@@ -9,28 +9,28 @@ math_infer_cfg = dict(
    prompt_template=dict(
        type=PromptTemplate,
        template='''Problem:
-Find the domain of the expression $\frac{{\sqrt{{x-2}}}}{{\sqrt{{5-x}}}}$.}}
+Find the domain of the expression $\\frac{{\sqrt{{x-2}}}}{{\sqrt{{5-x}}}}$.}}
 Solution:
-The expressions inside each square root must be non-negative. Therefore, $x-2 \ge 0$, so $x\ge2$, and $5 - x \ge 0$, so $x \le 5$. Also, the denominator cannot be equal to zero, so $5-x>0$, which gives $x<5$. Therefore, the domain of the expression is $\boxed{{[2,5)}}$.
+The expressions inside each square root must be non-negative. Therefore, $x-2 \ge 0$, so $x\ge2$, and $5 - x \ge 0$, so $x \le 5$. Also, the denominator cannot be equal to zero, so $5-x>0$, which gives $x<5$. Therefore, the domain of the expression is $\\boxed{{[2,5)}}$.
 Final Answer: The final answer is $[2,5)$. I hope it is correct.
 Problem:
 If $\det \mathbf{{A}} = 2$ and $\det \mathbf{{B}} = 12,$ then find $\det (\mathbf{{A}} \mathbf{{B}}).$
 Solution:
-We have that $\det (\mathbf{{A}} \mathbf{{B}}) = (\det \mathbf{{A}})(\det \mathbf{{B}}) = (2)(12) = \boxed{{24}}.$
+We have that $\det (\mathbf{{A}} \mathbf{{B}}) = (\det \mathbf{{A}})(\det \mathbf{{B}}) = (2)(12) = \\boxed{{24}}.$
 Final Answer: The final answer is $24$. I hope it is correct.
 Problem:
 Terrell usually lifts two 20-pound weights 12 times. If he uses two 15-pound weights instead, how many times must Terrell lift them in order to lift the same total weight?
 Solution:
-If Terrell lifts two 20-pound weights 12 times, he lifts a total of $2\cdot 12\cdot20=480$ pounds of weight. If he lifts two 15-pound weights instead for $n$ times, he will lift a total of $2\cdot15\cdot n=30n$ pounds of weight. Equating this to 480 pounds, we can solve for $n$: \begin{{align*}} 30n&=480\\ \Rightarrow\qquad n&=480/30=\boxed{{16}} \end{{align*}}
+If Terrell lifts two 20-pound weights 12 times, he lifts a total of $2\cdot 12\cdot20=480$ pounds of weight. If he lifts two 15-pound weights instead for $n$ times, he will lift a total of $2\cdot15\cdot n=30n$ pounds of weight. Equating this to 480 pounds, we can solve for $n$: \\begin{{align*}} 30n&=480\\\\ \Rightarrow\qquad n&=480/30=\\boxed{{16}} \end{{align*}}
 Final Answer: The final answer is $16$. I hope it is correct.
 Problem:
-If the system of equations: \begin{{align*}} 6x-4y&=a,\\ 6y-9x &=b. \end{{align*}}has a solution $(x, y)$ where $x$ and $y$ are both nonzero, find $\frac{{a}}{{b}},$ assuming $b$ is nonzero.
+If the system of equations: \\begin{{align*}} 6x-4y&=a,\\\\ 6y-9x &=b. \end{{align*}}has a solution $(x, y)$ where $x$ and $y$ are both nonzero, find $\\frac{{a}}{{b}},$ assuming $b$ is nonzero.
 Solution:
-If we multiply the first equation by $-\frac{{3}}{{2}}$, we obtain $$6y-9x=-\frac{{3}}{{2}}a.$$Since we also know that $6y-9x=b$, we have $$-\frac{{3}}{{2}}a=b\Rightarrow\frac{{a}}{{b}}=\boxed{{-\frac{{2}}{{3}}}}.$$
+If we multiply the first equation by $-\\frac{{3}}{{2}}$, we obtain $$6y-9x=-\\frac{{3}}{{2}}a.$$Since we also know that $6y-9x=b$, we have $$-\\frac{{3}}{{2}}a=b\Rightarrow\\frac{{a}}{{b}}=\\boxed{{-\\frac{{2}}{{3}}}}.$$
-Final Answer: The final answer is $-\frac{{2}}{{3}}$. I hope it is correct.
+Final Answer: The final answer is $-\\frac{{2}}{{3}}$. I hope it is correct.
 Problem:
 {problem}
@@ -40,7 +40,7 @@ Solution:
    inferencer=dict(type=GenInferencer, max_out_len=512))
 math_eval_cfg = dict(
-    evaluator=dict(type=MATHEvaluator), pred_postprocessor=dict(type='math'))
+    evaluator=dict(type=MATHEvaluator), pred_postprocessor=dict(type=math_postprocess))
 math_datasets = [
    dict(

--- a/configs/datasets/math/math_gen_664168.py
+++ b/configs/datasets/math/math_gen_664168.py
 from opencompass.openicl.icl_prompt_template import PromptTemplate
 from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
-from opencompass.datasets import MATHDataset, MATHEvaluator
+from opencompass.datasets import MATHDataset, MATHEvaluator, math_postprocess
 math_infer_cfg = dict(
    prompt_template=dict(
        type=PromptTemplate,
        template='''Problem:
-Find the domain of the expression $\frac{{\sqrt{{x-2}}}}{{\sqrt{{5-x}}}}$.}}
+Find the domain of the expression $\\frac{{\sqrt{{x-2}}}}{{\sqrt{{5-x}}}}$.}}
 Solution:
-The expressions inside each square root must be non-negative. Therefore, $x-2 \ge 0$, so $x\ge2$, and $5 - x \ge 0$, so $x \le 5$. Also, the denominator cannot be equal to zero, so $5-x>0$, which gives $x<5$. Therefore, the domain of the expression is $\boxed{{[2,5)}}$.
+The expressions inside each square root must be non-negative. Therefore, $x-2 \ge 0$, so $x\ge2$, and $5 - x \ge 0$, so $x \le 5$. Also, the denominator cannot be equal to zero, so $5-x>0$, which gives $x<5$. Therefore, the domain of the expression is $\\boxed{{[2,5)}}$.
 Final Answer: The final answer is $[2,5)$. I hope it is correct.
 Problem:
 If $\det \mathbf{{A}} = 2$ and $\det \mathbf{{B}} = 12,$ then find $\det (\mathbf{{A}} \mathbf{{B}}).$
 Solution:
-We have that $\det (\mathbf{{A}} \mathbf{{B}}) = (\det \mathbf{{A}})(\det \mathbf{{B}}) = (2)(12) = \boxed{{24}}.$
+We have that $\det (\mathbf{{A}} \mathbf{{B}}) = (\det \mathbf{{A}})(\det \mathbf{{B}}) = (2)(12) = \\boxed{{24}}.$
 Final Answer: The final answer is $24$. I hope it is correct.
 Problem:
 Terrell usually lifts two 20-pound weights 12 times. If he uses two 15-pound weights instead, how many times must Terrell lift them in order to lift the same total weight?
 Solution:
-If Terrell lifts two 20-pound weights 12 times, he lifts a total of $2\cdot 12\cdot20=480$ pounds of weight. If he lifts two 15-pound weights instead for $n$ times, he will lift a total of $2\cdot15\cdot n=30n$ pounds of weight. Equating this to 480 pounds, we can solve for $n$: \begin{{align*}} 30n&=480\\ \Rightarrow\qquad n&=480/30=\boxed{{16}} \end{{align*}}
+If Terrell lifts two 20-pound weights 12 times, he lifts a total of $2\cdot 12\cdot20=480$ pounds of weight. If he lifts two 15-pound weights instead for $n$ times, he will lift a total of $2\cdot15\cdot n=30n$ pounds of weight. Equating this to 480 pounds, we can solve for $n$: \\begin{{align*}} 30n&=480\\\\ \Rightarrow\qquad n&=480/30=\\boxed{{16}} \end{{align*}}
 Final Answer: The final answer is $16$. I hope it is correct.
 Problem:
-If the system of equations: \begin{{align*}} 6x-4y&=a,\\ 6y-9x &=b. \end{{align*}}has a solution $(x, y)$ where $x$ and $y$ are both nonzero, find $\frac{{a}}{{b}},$ assuming $b$ is nonzero.
+If the system of equations: \\begin{{align*}} 6x-4y&=a,\\\\ 6y-9x &=b. \end{{align*}}has a solution $(x, y)$ where $x$ and $y$ are both nonzero, find $\\frac{{a}}{{b}},$ assuming $b$ is nonzero.
 Solution:
-If we multiply the first equation by $-\frac{{3}}{{2}}$, we obtain $$6y-9x=-\frac{{3}}{{2}}a.$$Since we also know that $6y-9x=b$, we have $$-\frac{{3}}{{2}}a=b\Rightarrow\frac{{a}}{{b}}=\boxed{{-\frac{{2}}{{3}}}}.$$
+If we multiply the first equation by $-\\frac{{3}}{{2}}$, we obtain $$6y-9x=-\\frac{{3}}{{2}}a.$$Since we also know that $6y-9x=b$, we have $$-\\frac{{3}}{{2}}a=b\Rightarrow\\frac{{a}}{{b}}=\\boxed{{-\\frac{{2}}{{3}}}}.$$
-Final Answer: The final answer is $-\frac{{2}}{{3}}$. I hope it is correct.
+Final Answer: The final answer is $-\\frac{{2}}{{3}}$. I hope it is correct.
 Problem:
 {problem}Solution:
@@ -37,7 +37,7 @@ Problem:
    inferencer=dict(type=GenInferencer, max_out_len=512))
 math_eval_cfg = dict(
-    evaluator=dict(type=MATHEvaluator), pred_postprocessor=dict(type='math'))
+    evaluator=dict(type=MATHEvaluator), pred_postprocessor=dict(type=math_postprocess))
 math_datasets = [
    dict(

--- a/configs/datasets/mbpp/mbpp_gen_1e1056.py
+++ b/configs/datasets/mbpp/mbpp_gen_1e1056.py
@@ -4,7 +4,7 @@ from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.datasets import MBPPDataset, MBPPEvaluator
 mbpp_reader_cfg = dict(
-    input_columns=['text', 'test_list'], output_column='code')
+    input_columns=['text', 'test_list'], output_column='test_list_2')
 mbpp_infer_cfg = dict(
    prompt_template=dict(