log all arguments per doc

09a71562 · haileyschoelkopf · 165f8493 · 09a71562
Commit 09a71562 authored Jul 13, 2023 by haileyschoelkopf
Hide whitespace changes
Inline Side-by-side

Showing with 5 additions and 2 deletions

lm_eval/evaluator.py lm_eval/evaluator.py +5 -2

No files found.
--- a/lm_eval/evaluator.py
+++ b/lm_eval/evaluator.py
@@ -213,7 +213,10 @@ def evaluate(
        # aggregate Instances by LM method requested to get output.
        reqtype = (
            "loglikelihood"
-            if (task.OUTPUT_TYPE == "multiple_choice" or task.OUTPUT_TYPE == "winograd_schema") 
+            if (
+                task.OUTPUT_TYPE == "multiple_choice"
+                or task.OUTPUT_TYPE == "winograd_schema"
+            )
            else task.OUTPUT_TYPE
        )  # TODO: this is hacky, fix in task.py
        requests[reqtype].extend(task.instances)
@@ -284,7 +287,7 @@ def evaluate(
                    "doc_id": doc_id,
                    "doc": doc,
                    "target": target,
-                    "arguments": requests[0].args,
+                    "arguments": [req.args for req in requests],
                    "resps": [req.resps for req in requests],
                    "filtered_resps": [req.filtered_resps[key] for req in requests],
                }