Make acc_norm a separate metric

3eaa493e · Leo Gao · GitHub · fae5fe66 · 3eaa493e
Unverified Commit 3eaa493e authored Apr 10, 2021 by Leo Gao Committed by GitHub Apr 10, 2021
Hide whitespace changes
Inline Side-by-side

Showing with 8 additions and 5 deletions

lm_eval/base.py lm_eval/base.py +8 -5

No files found.
--- a/lm_eval/base.py
+++ b/lm_eval/base.py
@@ -225,22 +225,25 @@ class MultipleChoiceTask(Task):
    def process_results(self, doc, results):
        gold = doc["gold"]
+        acc = 1. if np.argmax(results) == gold else 0.
        completion_len = np.array([float(len(i)) for i in doc["choices"]])
+        acc_norm = 1. if np.argmax(results / completion_len) == gold else 0.
-        acc = 1. if np.argmax(results/completion_len) == gold else 0.
        return {
-            "acc": acc
+            "acc": acc,
+            "acc_norm": acc_norm,
        }
    def higher_is_better(self):
        return {
-            "acc": True
+            "acc": True,
+            "acc_norm": True,
        }
    def aggregation(self):
        return {
-            "acc": mean
+            "acc": mean,
+            "acc_norm": mean,
        }