pre-commit

01b129bb · lintangsutawika · 89de5103 · 01b129bb · 01b129bb · 01b129bb
Commit 01b129bb authored Aug 05, 2024 by lintangsutawika
15 changed files
--- a/lm_eval/tasks/mmlu_pro/README.md
+++ b/lm_eval/tasks/mmlu_pro/README.md
@@ -17,7 +17,7 @@ Homepage (preprocessed): https://huggingface.co/datasets/sjyuxyz/MMLU-Pro-with-s

 ```bibtex
 @misc{wang2024mmlupro,
-      title={MMLU-Pro: A More Robust and Challenging Multi-Task Language Understanding Benchmark}, 
+      title={MMLU-Pro: A More Robust and Challenging Multi-Task Language Understanding Benchmark},
      author={Yubo Wang and Xueguang Ma and Ge Zhang and Yuansheng Ni and Abhranil Chandra and Shiguang Guo and Weiming Ren and Aaran Arulraj and Xuan He and Ziyan Jiang and Tianle Li and Max Ku and Kai Wang and Alex Zhuang and Rongqi Fan and Xiang Yue and Wenhu Chen},
      year={2024},
      eprint={2406.01574},

--- a/lm_eval/tasks/mmlu_pro/mmlu_pro_biology.yaml
+++ b/lm_eval/tasks/mmlu_pro/mmlu_pro_biology.yaml
@@ -2,4 +2,4 @@ description: "The following are multiple choice questions (with answers) about b
 include: "_default_template_yaml"
 task: "mmlu_pro_biology"
 task_alias: "biology"
-process_docs: !function utils.process_biology
\ No newline at end of file
+process_docs: !function utils.process_biology
--- a/lm_eval/tasks/mmlu_pro/mmlu_pro_chemistry.yaml
+++ b/lm_eval/tasks/mmlu_pro/mmlu_pro_chemistry.yaml
@@ -2,4 +2,4 @@ description: "The following are multiple choice questions (with answers) about c
 include: "_default_template_yaml"
 task: "mmlu_pro_chemistry"
 task_alias: "chemistry"
-process_docs: !function utils.process_chemistry
\ No newline at end of file
+process_docs: !function utils.process_chemistry
--- a/lm_eval/tasks/mmlu_pro/mmlu_pro_computer_science.yaml
+++ b/lm_eval/tasks/mmlu_pro/mmlu_pro_computer_science.yaml
@@ -2,4 +2,4 @@ description: "The following are multiple choice questions (with answers) about c
 include: "_default_template_yaml"
 task: "mmlu_pro_computer_science"
 task_alias: "computer_science"
-process_docs: !function utils.process_computer_science
\ No newline at end of file
+process_docs: !function utils.process_computer_science
--- a/lm_eval/tasks/mmlu_pro/mmlu_pro_economics.yaml
+++ b/lm_eval/tasks/mmlu_pro/mmlu_pro_economics.yaml
@@ -2,4 +2,4 @@ description: "The following are multiple choice questions (with answers) about e
 include: "_default_template_yaml"
 task: "mmlu_pro_economics"
 task_alias: "economics"
-process_docs: !function utils.process_economics
\ No newline at end of file
+process_docs: !function utils.process_economics
--- a/lm_eval/tasks/mmlu_pro/mmlu_pro_engineering.yaml
+++ b/lm_eval/tasks/mmlu_pro/mmlu_pro_engineering.yaml
@@ -2,4 +2,4 @@ description: "The following are multiple choice questions (with answers) about e
 include: "_default_template_yaml"
 task: "mmlu_pro_engineering"
 task_alias: "engineering"
-process_docs: !function utils.process_engineering
\ No newline at end of file
+process_docs: !function utils.process_engineering
--- a/lm_eval/tasks/mmlu_pro/mmlu_pro_health.yaml
+++ b/lm_eval/tasks/mmlu_pro/mmlu_pro_health.yaml
@@ -2,4 +2,4 @@ description: "The following are multiple choice questions (with answers) about h
 include: "_default_template_yaml"
 task: "mmlu_pro_health"
 task_alias: "health"
-process_docs: !function utils.process_health
\ No newline at end of file
+process_docs: !function utils.process_health
--- a/lm_eval/tasks/mmlu_pro/mmlu_pro_history.yaml
+++ b/lm_eval/tasks/mmlu_pro/mmlu_pro_history.yaml
@@ -2,4 +2,4 @@ description: "The following are multiple choice questions (with answers) about h
 include: "_default_template_yaml"
 task: "mmlu_pro_history"
 task_alias: "history"
-process_docs: !function utils.process_history
\ No newline at end of file
+process_docs: !function utils.process_history
--- a/lm_eval/tasks/mmlu_pro/mmlu_pro_law.yaml
+++ b/lm_eval/tasks/mmlu_pro/mmlu_pro_law.yaml
@@ -2,4 +2,4 @@ description: "The following are multiple choice questions (with answers) about l
 include: "_default_template_yaml"
 task: "mmlu_pro_law"
 task_alias: "law"
-process_docs: !function utils.process_law
\ No newline at end of file
+process_docs: !function utils.process_law
--- a/lm_eval/tasks/mmlu_pro/mmlu_pro_math.yaml
+++ b/lm_eval/tasks/mmlu_pro/mmlu_pro_math.yaml
@@ -2,4 +2,4 @@ description: "The following are multiple choice questions (with answers) about m
 include: "_default_template_yaml"
 task: "mmlu_pro_math"
 task_alias: "math"
-process_docs: !function utils.process_math
\ No newline at end of file
+process_docs: !function utils.process_math
--- a/lm_eval/tasks/mmlu_pro/mmlu_pro_other.yaml
+++ b/lm_eval/tasks/mmlu_pro/mmlu_pro_other.yaml
@@ -2,4 +2,4 @@ description: "The following are multiple choice questions (with answers) about o
 include: "_default_template_yaml"
 task: "mmlu_pro_other"
 task_alias: "other"
-process_docs: !function utils.process_other
\ No newline at end of file
+process_docs: !function utils.process_other
--- a/lm_eval/tasks/mmlu_pro/mmlu_pro_philosophy.yaml
+++ b/lm_eval/tasks/mmlu_pro/mmlu_pro_philosophy.yaml
@@ -2,4 +2,4 @@ description: "The following are multiple choice questions (with answers) about p
 include: "_default_template_yaml"
 task: "mmlu_pro_philosophy"
 task_alias: "philosophy"
-process_docs: !function utils.process_philosophy
\ No newline at end of file
+process_docs: !function utils.process_philosophy
--- a/lm_eval/tasks/mmlu_pro/mmlu_pro_physics.yaml
+++ b/lm_eval/tasks/mmlu_pro/mmlu_pro_physics.yaml
@@ -2,4 +2,4 @@ description: "The following are multiple choice questions (with answers) about p
 include: "_default_template_yaml"
 task: "mmlu_pro_physics"
 task_alias: "physics"
-process_docs: !function utils.process_physics
\ No newline at end of file
+process_docs: !function utils.process_physics
--- a/lm_eval/tasks/mmlu_pro/mmlu_pro_psychology.yaml
+++ b/lm_eval/tasks/mmlu_pro/mmlu_pro_psychology.yaml
@@ -2,4 +2,4 @@ description: "The following are multiple choice questions (with answers) about p
 include: "_default_template_yaml"
 task: "mmlu_pro_psychology"
 task_alias: "psychology"
-process_docs: !function utils.process_psychology
\ No newline at end of file
+process_docs: !function utils.process_psychology
--- a/lm_eval/tasks/mmlu_pro/utils.py
+++ b/lm_eval/tasks/mmlu_pro/utils.py
@@ -3,7 +3,26 @@ from functools import partial

 from lm_eval.api.filter import Filter

-choices = ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P"]
+
+choices = [
+    "A",
+    "B",
+    "C",
+    "D",
+    "E",
+    "F",
+    "G",
+    "H",
+    "I",
+    "J",
+    "K",
+    "L",
+    "M",
+    "N",
+    "O",
+    "P",
+]
+

 def format_cot_example(example, including_answer=True):
    prompt = "Question:\n"
@@ -14,8 +33,9 @@ def format_cot_example(example, including_answer=True):
    for i, opt in enumerate(options):
        prompt += "{}. {}\n".format(choices[i], opt)
    if including_answer:
-        cot_content = example["cot_content"].replace("A: Let's think step by step.",
-                                                     "Answer: Let's think step by step.")
+        cot_content = example["cot_content"].replace(
+            "A: Let's think step by step.", "Answer: Let's think step by step."
+        )
        prompt += cot_content + "\n\n"
    else:
        prompt += "Answer: Let's think step by step."
@@ -29,6 +49,7 @@ fewshot_to_text = partial(format_cot_example, including_answer=True)
 def process_docs(dataset, subject):
    return dataset.filter(lambda x: x["category"] == subject)

+
 process_biology = partial(process_docs, subject="biology")
 process_business = partial(process_docs, subject="business")
 process_chemistry = partial(process_docs, subject="chemistry")
@@ -45,26 +66,15 @@ process_physics = partial(process_docs, subject="physics")
 process_psychology = partial(process_docs, subject="psychology")


-# def generate_cot_prompt(val_df, curr, k):
-#     prompt = ""
-#     with open(f"cot_prompt_lib/initial_prompt.txt", "r") as fi:
-#         for line in fi.readlines():
-#             prompt += line
-#     subject = curr["category"]
-#     val_df = select_by_category(val_df, subject)
-#     val_df = val_df[: k]
-#     prompt = prompt.replace("{$}", subject) + "\n"
-#     for example in val_df:
-#         prompt += format_cot_example(example, including_answer=True)
-#     prompt += format_cot_example(curr, including_answer=False)
-#     return prompt
-
 class CustomRegexFilter(Filter):
    """ """

    def __init__(
        self,
-        regex_pattern: list = [r"answer is \(?([ABCDEFGHIJ])\)?", r".*[aA]nswer:\s*([A-J])"],
+        regex_pattern: list = [
+            r"answer is \(?([ABCDEFGHIJ])\)?",
+            r".*[aA]nswer:\s*([A-J])",
+        ],
        group_select=0,
        fallback: str = "[invalid]",
    ) -> None:
@@ -89,7 +99,7 @@ class CustomRegexFilter(Filter):
                if match:
                    filtered_resps.append(match.group(1))
                    break
-        
+
        if len(filtered_resps) == 0:
            filtered_resps = [None]