format

a2009452 · lintangsutawika · 4578ca14 · a2009452 · a2009452 · a2009452
Commit a2009452 authored Sep 19, 2023 by lintangsutawika
4 changed files
--- a/lm_eval/tasks/__init__.py
+++ b/lm_eval/tasks/__init__.py
@@ -37,15 +37,12 @@ def register_configurable_task(config: Dict[str, str]) -> int:
    return 0
 def register_configurable_group(config: Dict[str, str]) -> int:
    group = config["group"]
    all_task_list = config["task"]
-    config_list = [
+    config_list = [task for task in all_task_list if type(task) != str]
-        task for task in all_task_list if type(task) != str
+    task_list = [task for task in all_task_list if type(task) == str]
-    ]
-    task_list = [
-        task for task in all_task_list if type(task) == str
-    ]
    for task_config in config_list:
        var_configs = check_prompt_config(

--- a/lm_eval/tasks/squadv2/utils.py
+++ b/lm_eval/tasks/squadv2/utils.py
@@ -2,28 +2,38 @@ import re
 import string
 import collections
 def normalize_answer(s):
    """Lower text and remove punctuation, articles and extra whitespace."""
    def remove_articles(text):
-        regex = re.compile(r'\b(a|an|the)\b', re.UNICODE)
+        regex = re.compile(r"\b(a|an|the)\b", re.UNICODE)
-        return re.sub(regex, ' ', text)
+        return re.sub(regex, " ", text)
    def white_space_fix(text):
-        return ' '.join(text.split())
+        return " ".join(text.split())
    def remove_punc(text):
        exclude = set(string.punctuation)
-        return ''.join(ch for ch in text if ch not in exclude)
+        return "".join(ch for ch in text if ch not in exclude)
    def lower(text):
        return text.lower()
    return white_space_fix(remove_articles(remove_punc(lower(s))))
 def get_tokens(s):
-    if not s: return []
+    if not s:
+        return []
    return normalize_answer(s).split()
 # Exact match (the normalized answer exactly match the gold answer)
 def exact(predictions, references):
    return int(normalize_answer(references[0]) == normalize_answer(predictions[0]))
 # The F-score of predicted tokens versus the gold answer
 def f1(predictions, references):
    gold_toks = get_tokens(references[0])

--- a/lm_eval/tasks/squadv2/with_noans_prob.yaml
+++ b/lm_eval/tasks/squadv2/with_noans_prob.yaml
--- a/main.py
+++ b/main.py
@@ -11,6 +11,7 @@ from lm_eval import evaluator, utils
 from lm_eval.api.registry import ALL_TASKS
 from lm_eval.logger import eval_logger, SPACING
 from lm_eval.tasks import include_task_folder
 # from lm_eval.benchmarks import include_benchmarks
 os.environ["TOKENIZERS_PARALLELISM"] = "false"