changes

e5161a6d · lintangsutawika · 57f08e40 · e5161a6d · e5161a6d · e5161a6d
Commit e5161a6d authored Jul 14, 2023 by lintangsutawika
5 changed files
--- a/lm_eval/tasks/__init__.py
+++ b/lm_eval/tasks/__init__.py
@@ -61,21 +61,30 @@ def include_benchmarks(task_dir, benchmark_dir="benchmarks"):
        if (subdirs == [] or subdirs == ["__pycache__"]) and (len(file_list) > 0):
            for f in file_list:
                if f.endswith(".yaml"):
-                    benchmark_path = os.path.join(root, f)
+                    try:
+                        benchmark_path = os.path.join(root, f)
-                    with open(benchmark_path, "rb") as file:
-                        yaml_config = yaml.full_load(file)
+                        with open(benchmark_path, "rb") as file:
+                            yaml_config = yaml.full_load(file)
-                    assert "group" in yaml_config
-                    group = yaml_config["group"]
+                        assert "group" in yaml_config
-                    task_list = yaml_config["task"]
+                        group = yaml_config["group"]
-                    task_names = utils.pattern_match(task_list, ALL_TASKS)
+                        task_list = yaml_config["task"]
-                    for task in task_names:
+                        task_names = utils.pattern_match(task_list, ALL_TASKS)
-                        if task in TASK_REGISTRY:
+                        for task in task_names:
-                            if group in GROUP_REGISTRY:
+                            if task in TASK_REGISTRY:
-                                GROUP_REGISTRY[group].append(task)
+                                if group in GROUP_REGISTRY:
-                            else:
+                                    GROUP_REGISTRY[group].append(task)
-                                GROUP_REGISTRY[group] = [task]
+                                else:
+                                    GROUP_REGISTRY[group] = [task]
+                                    ALL_TASKS.add(group)
+                    except Exception as error:
+                        eval_logger.warning(
+                            "Failed to load benchmark in\n"
+                            f"                                 {benchmark_path}\n"
+                            "                                 Benchmark will not be added to registry\n"
+                            f"                                 Error: {error}"
+                        )
 task_dir = os.path.dirname(os.path.abspath(__file__)) + "/"

--- a/lm_eval/tasks/benchmarks/pythia.yaml
+++ b/lm_eval/tasks/benchmarks/pythia.yaml
@@ -8,6 +8,6 @@ task:
  - winogrande
  - arc_challenge
  - arc_easy
-  - logiqa
+  # - logiqa
-  - blimp_*
+  # - blimp_*
-  - hendrycksTest*
+  # - hendrycksTest*
--- a/lm_eval/tasks/winogrande/default.yaml
+++ b/lm_eval/tasks/winogrande/default.yaml
-group:
-  - super-glue-lm-eval-v1
 task: winogrande
 dataset_path: winogrande
 dataset_name: winogrande_xl
 output_type: multiple_choice
-should_decontaminate: true
-doc_to_decontamination_query: "{{sentence}}"
 training_split: train
 validation_split: validation
+doc_to_text: !function preprocess_winogrande.doc_to_text
+doc_to_target: !function preprocess_winogrande.doc_to_target
+doc_to_choice: !function preprocess_winogrande.doc_to_choice
+should_decontaminate: true
+doc_to_decontamination_query: sentence
 metric_list:
-  - metric: exact_match
+  - metric: acc
    aggregation: mean
    higher_is_better: true
-    ignore_case: true
-    ignore_punctuation: true
--- a/lm_eval/tasks/winogrande/preprocess.py
+++ b/lm_eval/tasks/winogrande/preprocess.py
-import re
-from lm_eval.utils import general_detokenize
-def partial_context(doc, option):
-    # Substitute the pronoun in the sentence with the specified option
-    # and ignore everything after.
-    pronoun_loc = doc["sentence"].index("_")
-    return doc["sentence"][:pronoun_loc] + option
-def partial_target(doc):
-    # The target is everything after the document specified pronoun.
-    pronoun_loc = doc["sentence"].index("_") + 1
-    return " " + doc["sentence"][pronoun_loc:].strip()
--- a/lm_eval/tasks/winogrande/winogrande.yaml
+++ b/lm_eval/tasks/winogrande/winogrande.yaml
-task: winogrande
-dataset_path: winogrande
-dataset_name: winogrande_xl
-output_type: multiple_choice
-training_split: train
-validation_split: validation
-doc_to_text: !function preprocess_winogrande.doc_to_text
-doc_to_target: !function preprocess_winogrande.doc_to_target
-doc_to_choice: !function preprocess_winogrande.doc_to_choice
-metric_list:
-  - metric: acc
-    aggregation: mean
-    higher_is_better: true