format

458342e2 · lintangsutawika · b8122d98 · 458342e2 · 458342e2
Commit 458342e2 authored Aug 05, 2024 by lintangsutawika
Show whitespace changes
Inline Side-by-side

Showing with 3 additions and 44 deletions

lm_eval/tasks/mmlu_pro/_default_template_yaml lm_eval/tasks/mmlu_pro/_default_template_yaml +3 -1

lm_eval/tasks/mmlu_pro/utils.py lm_eval/tasks/mmlu_pro/utils.py +0 -43

No files found.
--- a/lm_eval/tasks/mmlu_pro/_default_template_yaml
+++ b/lm_eval/tasks/mmlu_pro/_default_template_yaml
@@ -11,7 +11,9 @@ doc_to_target: answer
 filter_list:
  - name: "custom-extract"
    filter:
-      - function: !function utils.CustomRegexFilter
+      - function: "regex"
+        regex_pattern: r"answer is \(?([ABCDEFGHIJ])\)?"
+        # regex_pattern: r".*[aA]nswer:\s*([A-J])",
      - function: "take_first"
 generation_kwargs:
  until:

--- a/lm_eval/tasks/mmlu_pro/utils.py
+++ b/lm_eval/tasks/mmlu_pro/utils.py
-import re
 from functools import partial
-from lm_eval.api.filter import Filter
 choices = [
    "A",
@@ -64,43 +61,3 @@ process_other = partial(process_docs, subject="other")
 process_philosophy = partial(process_docs, subject="philosophy")
 process_physics = partial(process_docs, subject="physics")
 process_psychology = partial(process_docs, subject="psychology")
-class CustomRegexFilter(Filter):
-    """ """
-    def __init__(
-        self,
-        regex_pattern: list = [
-            r"answer is \(?([ABCDEFGHIJ])\)?",
-            r".*[aA]nswer:\s*([A-J])",
-        ],
-        group_select=0,
-        fallback: str = "[invalid]",
-    ) -> None:
-        """
-        pass a string `regex` to run `re.compile(r"regex")` on.
-        `fallback` defines the output returned if no matches for the regex are located.
-        """
-        self.regex_pattern = regex_pattern
-        self.regex = [re.compile(pattern) for pattern in regex_pattern]
-        self.group_select = group_select
-        self.fallback = fallback
-    def apply(self, resps, docs):
-        # here, we assume we have a list, in which each element is
-        # a list of model responses for some particular input/target pair.
-        # so we process each of these (same input/target response sets)
-        # independently (and keep them a list.)
-        filtered_resps = []
-        for resp in resps:
-            for pattern in self.regex:
-                match = pattern.search(resp)
-                if match:
-                    filtered_resps.append(match.group(1))
-                    break
-        if len(filtered_resps) == 0:
-            filtered_resps = [None]
-        return filtered_resps