moved files out, and removed unused versions

1f6a6ebc · lintangsutawika · 5be2bb10 · 1f6a6ebc · 1f6a6ebc · 1f6a6ebc
Commit 1f6a6ebc authored Jun 27, 2024 by lintangsutawika
17 changed files
--- a/lm_eval/api/samplers.py
+++ b/lm_eval/api/samplers.py
@@ -15,7 +15,11 @@ class ContextSampler:
        self.target_delimiter = self.config.target_delimiter
        self.fewshot_delimiter = self.config.fewshot_delimiter
-        self.doc_to_text = self.task.doc_to_text
+        if self.config.fewshot_config is not None and self.config.fewshot_config.get("doc_to_text", None) is not None:
+            self.doc_to_text = self.config.fewshot_config.get("doc_to_text", None)
+        else:
+            self.doc_to_text = self.task.doc_to_text
        self.doc_to_target = self.task.doc_to_target
        self.doc_to_choice = self.task.doc_to_choice

--- a/lm_eval/tasks/mmlu_pro/_default_template_yaml
+++ b/lm_eval/tasks/mmlu_pro/_default_template_yaml
+dataset_path: TIGER-Lab/MMLU-Pro
+test_split: test
+fewshot_split: validation
+fewshot_config:
+  sampler: first_n
+  doc_to_text: !function utils.fewshot_to_text
+output_type: generate_until
+doc_to_text: !function utils.doc_to_text
+doc_to_target: answer
+# filter_list:
+#   - name: "custom-extract"
+#     filter:
+#       - function: !function utils.CustomRegexFilter
+#       - function: "take_first"
+generation_kwargs:
+  until:
+    - "</s>"
+    - "Q:"
+    - "<|im_end|>"
+  do_sample: false
+  temperature: 0.0
+num_fewshot: 5
+metric_list:
+  - metric: exact_match
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+metadata:
+  version: 0.0
--- a/lm_eval/tasks/mmlu_pro/mmlu_pro_biology.yaml
+++ b/lm_eval/tasks/mmlu_pro/mmlu_pro_biology.yaml
+description: "The following are multiple choice questions (with answers) about biology. Think step by step and then finish your answer with \"the answer is (X)\" where X is the correct letter choice."
+include: "_default_template_yaml"
+task: "mmlu_pro_biology"
+task_alias: "biology"
+process_docs: !function utils.process_biology
\ No newline at end of file
--- a/lm_eval/tasks/mmlu_pro/mmlu_pro_business.yaml
+++ b/lm_eval/tasks/mmlu_pro/mmlu_pro_business.yaml
+dataset_name: "business"
+description: "The following are multiple choice questions (with answers) about business. Think step by step and then finish your answer with \"the answer is (X)\" where X is the correct letter choice."
+include: "_default_template_yaml"
+task: "mmlu_pro_business"
+task_alias: "business"
--- a/lm_eval/tasks/mmlu_pro/mmlu_pro_chemistry.yaml
+++ b/lm_eval/tasks/mmlu_pro/mmlu_pro_chemistry.yaml
+dataset_name: "math"
+description: "The following are multiple choice questions (with answers) about chemistry. Think step by step and then finish your answer with \"the answer is (X)\" where X is the correct letter choice."
+include: "_default_template_yaml"
+task: "mmlu_pro_math"
+task_alias: "math"
--- a/lm_eval/tasks/mmlu_pro/mmlu_pro_computer_science.yaml
+++ b/lm_eval/tasks/mmlu_pro/mmlu_pro_computer_science.yaml
+dataset_name: "computer_science"
+description: "The following are multiple choice questions (with answers) about computer science. Think step by step and then finish your answer with \"the answer is (X)\" where X is the correct letter choice."
+include: "_default_template_yaml"
+task: "mmlu_pro_computer_science"
+task_alias: "computer_science"
--- a/lm_eval/tasks/mmlu_pro/mmlu_pro_economics.yaml
+++ b/lm_eval/tasks/mmlu_pro/mmlu_pro_economics.yaml
+dataset_name: "economics"
+description: "The following are multiple choice questions (with answers) about economics. Think step by step and then finish your answer with \"the answer is (X)\" where X is the correct letter choice."
+include: "_default_template_yaml"
+task: "mmlu_pro_economics"
+task_alias: "economics"
--- a/lm_eval/tasks/mmlu_pro/mmlu_pro_engineering.yaml
+++ b/lm_eval/tasks/mmlu_pro/mmlu_pro_engineering.yaml
+dataset_name: "engineering"
+description: "The following are multiple choice questions (with answers) about engineering. Think step by step and then finish your answer with \"the answer is (X)\" where X is the correct letter choice."
+include: "_default_template_yaml"
+task: "mmlu_pro_engineering"
+task_alias: "engineering"
--- a/lm_eval/tasks/mmlu_pro/mmlu_pro_health.yaml
+++ b/lm_eval/tasks/mmlu_pro/mmlu_pro_health.yaml
+dataset_name: "health"
+description: "The following are multiple choice questions (with answers) about health. Think step by step and then finish your answer with \"the answer is (X)\" where X is the correct letter choice."
+include: "_default_template_yaml"
+task: "mmlu_pro_health"
+task_alias: "health"
--- a/lm_eval/tasks/mmlu_pro/mmlu_pro_history.yaml
+++ b/lm_eval/tasks/mmlu_pro/mmlu_pro_history.yaml
+dataset_name: "history"
+description: "The following are multiple choice questions (with answers) about history. Think step by step and then finish your answer with \"the answer is (X)\" where X is the correct letter choice."
+include: "_default_template_yaml"
+task: "mmlu_pro_history"
+task_alias: "history"
--- a/lm_eval/tasks/mmlu_pro/mmlu_pro_law.yaml
+++ b/lm_eval/tasks/mmlu_pro/mmlu_pro_law.yaml
+dataset_name: "law"
+description: "The following are multiple choice questions (with answers) about law. Think step by step and then finish your answer with \"the answer is (X)\" where X is the correct letter choice."
+include: "_default_template_yaml"
+task: "mmlu_pro_law"
+task_alias: "law"
--- a/lm_eval/tasks/mmlu_pro/mmlu_pro_math.yaml
+++ b/lm_eval/tasks/mmlu_pro/mmlu_pro_math.yaml
+dataset_name: "math"
+description: "The following are multiple choice questions (with answers) about math. Think step by step and then finish your answer with \"the answer is (X)\" where X is the correct letter choice."
+include: "_default_template_yaml"
+task: "mmlu_pro_math"
+task_alias: "math"
--- a/lm_eval/tasks/mmlu_pro/mmlu_pro_other.yaml
+++ b/lm_eval/tasks/mmlu_pro/mmlu_pro_other.yaml
+dataset_name: "other"
+description: "The following are multiple choice questions (with answers) about other. Think step by step and then finish your answer with \"the answer is (X)\" where X is the correct letter choice."
+include: "_default_template_yaml"
+task: "mmlu_pro_other"
+task_alias: "other"
--- a/lm_eval/tasks/mmlu_pro/mmlu_pro_philosophy.yaml
+++ b/lm_eval/tasks/mmlu_pro/mmlu_pro_philosophy.yaml
+dataset_name: "philosophy"
+description: "The following are multiple choice questions (with answers) about philosophy. Think step by step and then finish your answer with \"the answer is (X)\" where X is the correct letter choice."
+include: "_default_template_yaml"
+task: "mmlu_pro_philosophy"
+task_alias: "philosophy"
--- a/lm_eval/tasks/mmlu_pro/mmlu_pro_physics.yaml
+++ b/lm_eval/tasks/mmlu_pro/mmlu_pro_physics.yaml
+dataset_name: "physics"
+description: "The following are multiple choice questions (with answers) about physics. Think step by step and then finish your answer with \"the answer is (X)\" where X is the correct letter choice."
+include: "_default_template_yaml"
+task: "mmlu_pro_physics"
+task_alias: "physics"
--- a/lm_eval/tasks/mmlu_pro/mmlu_pro_psychology.yaml
+++ b/lm_eval/tasks/mmlu_pro/mmlu_pro_psychology.yaml
+dataset_name: "psychology"
+description: "The following are multiple choice questions (with answers) about psychology. Think step by step and then finish your answer with \"the answer is (X)\" where X is the correct letter choice."
+include: "_default_template_yaml"
+task: "mmlu_pro_psychology"
+task_alias: "psychology"
--- a/lm_eval/tasks/mmlu_pro/utils.py
+++ b/lm_eval/tasks/mmlu_pro/utils.py
+import re
+from functools import partial
+from lm_eval.api.filter import Filter
+choices = ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P"]
+def format_cot_example(example, including_answer=True):
+    prompt = "Question:\n"
+    question = example["question"]
+    options = example["options"]
+    prompt += question + "\n"
+    prompt += "Options:\n"
+    for i, opt in enumerate(options):
+        prompt += "{}. {}\n".format(choices[i], opt)
+    if including_answer:
+        cot_content = example["cot_content"].replace("A: Let's think step by step.",
+                                                     "Answer: Let's think step by step.")
+        prompt += cot_content + "\n\n"
+    else:
+        prompt += "Answer: Let's think step by step."
+    return prompt
+doc_to_text = partial(format_cot_example, including_answer=False)
+fewshot_to_text = partial(format_cot_example, including_answer=True)
+def process_docs(dataset, subject):
+    return dataset.filter(lambda x: x["category"] == subject)
+process_biology = partial(process_docs, subject="biology")
+process_business = partial(process_docs, subject="business")
+process_chemistry = partial(process_docs, subject="chemistry")
+process_computer_science = partial(process_docs, subject="computer_science")
+process_economics = partial(process_docs, subject="economics")
+process_engineering = partial(process_docs, subject="engineering")
+process_health = partial(process_docs, subject="health")
+process_history = partial(process_docs, subject="history")
+process_law = partial(process_docs, subject="law")
+process_math = partial(process_docs, subject="math")
+process_other = partial(process_docs, subject="other")
+process_philosophy = partial(process_docs, subject="philosophy")
+process_physics = partial(process_docs, subject="physics")
+process_psychology = partial(process_docs, subject="psychology")
+# def generate_cot_prompt(val_df, curr, k):
+#     prompt = ""
+#     with open(f"cot_prompt_lib/initial_prompt.txt", "r") as fi:
+#         for line in fi.readlines():
+#             prompt += line
+#     subject = curr["category"]
+#     val_df = select_by_category(val_df, subject)
+#     val_df = val_df[: k]
+#     prompt = prompt.replace("{$}", subject) + "\n"
+#     for example in val_df:
+#         prompt += format_cot_example(example, including_answer=True)
+#     prompt += format_cot_example(curr, including_answer=False)
+#     return prompt
+class CustomRegexFilter(Filter):
+    """ """
+    def __init__(
+        self,
+        regex_pattern: list = [r"answer is \(?([ABCDEFGHIJ])\)?", r".*[aA]nswer:\s*([A-J])"],
+        group_select=0,
+        fallback: str = "[invalid]",
+    ) -> None:
+        """
+        pass a string `regex` to run `re.compile(r"regex")` on.
+        `fallback` defines the output returned if no matches for the regex are located.
+        """
+        self.regex_pattern = regex_pattern
+        self.regex = [re.compile(pattern) for pattern in regex_pattern]
+        self.group_select = group_select
+        self.fallback = fallback
+    def apply(self, resps, docs):
+        # here, we assume we have a list, in which each element is
+        # a list of model responses for some particular input/target pair.
+        # so we process each of these (same input/target response sets)
+        # independently (and keep them a list.)
+        filtered_resps = []
+        for resp in resps:
+            for pattern in self.regex:
+                match = pattern.search(resp)
+                if match:
+                    filtered_resps.append(match.group(1))
+                    break
+        if len(filtered_resps) == 0:
+            filtered_resps = [None]
+        return filtered_resps