better error handling

13a3f1d6 · Baber · 7489a342 · 13a3f1d6 · 13a3f1d6
Commit 13a3f1d6 authored Aug 26, 2024 by Baber
Hide whitespace changes
Inline Side-by-side

Showing with 30 additions and 34 deletions

lm_eval/api/judge_task.py lm_eval/api/judge_task.py +16 -15

lm_eval/tasks/__init__.py lm_eval/tasks/__init__.py +14 -19

No files found.
--- a/lm_eval/api/judge_task.py
+++ b/lm_eval/api/judge_task.py
@@ -33,21 +33,22 @@ class JudgeTask(ConfigurableTask):
        resps = []
        # load json
-        with open(self.output_path, "r") as f:
+        if self.output_path is not None:
-            for line in f:
+            with open(self.output_path, "r") as f:
-                resp = json.loads(line)
+                for line in f:
-                resps.append({"resp": resp["resps"][0][0], "doc": resp["doc_id"]})
+                    resp = json.loads(line)
+                    resps.append({"resp": resp["resps"][0][0], "doc": resp["doc_id"]})
-        resps.sort(key=lambda x: x["doc"])
-        # TODO: add filter name to resps
+            resps.sort(key=lambda x: x["doc"])
-        resps = resps[::2]
+            # TODO: add filter name to resps
-        self.dataset["test"] = self.dataset["test"].add_column(
+            resps = resps[::2]
-            "resp", [resp["resp"] for resp in resps]
+            self.dataset["test"] = self.dataset["test"].add_column(
-        )
+                "resp", [resp["resp"] for resp in resps]
-        self.dataset["train"] = self.dataset["train"].add_column(
+            )
-            "resp", self.dataset["train"]["answer"]
+            self.dataset["train"] = self.dataset["train"].add_column(
-        )
+                "resp", self.dataset["train"]["answer"]
-        print("resp columns added")
+            )
+            print("resp columns added")
    # def process_docs(self, dataset: datasets.Dataset):
    #     resps = []

--- a/lm_eval/tasks/__init__.py
+++ b/lm_eval/tasks/__init__.py
@@ -265,25 +265,20 @@ class TaskManager:
                    ),
                    **config,
                }
-            if self._config_is_python_task(config):
+            if "output_type" in config:
-                if self._class_has_config_in_constructor(config["class"]):
+                task_object = JudgeTask(
-                    task_object = config["class"](config=config)
+                    config=config, output_path=config.get("output_path", None)
-                else:
+                )
-                    task_object = config["class"]()
+            # if self._config_is_python_task(config):
-                if isinstance(task_object, ConfigurableTask):
+            #     if self._class_has_config_in_constructor(config["class"]):
-                    # very scuffed: set task name here. TODO: fixme?
+            #         task_object = config["class"](config=config)
-                    task_object.config.task = config["task"]
+            #     else:
+            #         task_object = config["class"]()
+            #     if isinstance(task_object, ConfigurableTask):
+            #         # very scuffed: set task name here. TODO: fixme?
+            #         task_object.config.task = config["task"]
            else:
-                try:
+                task_object = ConfigurableTask(config=config)
-                    if "output_type" in config:
-                        task_object = JudgeTask(
-                            config=config, output_path=config.get("output_path")
-                        )
-                except Exception:
-                    config.pop("output_type")
-                    task_object = ConfigurableTask(config=config)
-                else:
-                    task_object = ConfigurableTask(config=config)
            return {task: task_object}
@@ -502,7 +497,7 @@ class TaskManager:
                                        "`group` and `group_alias` keys in tasks' configs will no longer be used in the next release of lm-eval. "
                                        "`tag` will be used to allow to call a collection of tasks just like `group`. "
                                        "`group` will be removed in order to not cause confusion with the new ConfigurableGroup "
-                                        "which will be the offical way to create groups with addition of group-wide configuations."
+                                        "which will be the official way to create groups with addition of group-wide configurations."
                                    )
                                    print_info = False
                                    # attr = "tag"