brier score for loglikelihood task

5e4f1799 · lintangsutawika · 6998762a · 5e4f1799
Commit 5e4f1799 authored Nov 14, 2023 by lintangsutawika
Hide whitespace changes
Inline Side-by-side

Showing with 7 additions and 0 deletions

lm_eval/api/task.py lm_eval/api/task.py +7 -0

No files found.
--- a/lm_eval/api/task.py
+++ b/lm_eval/api/task.py
@@ -960,9 +960,16 @@ class ConfigurableTask(Task):
        if self.OUTPUT_TYPE == "loglikelihood":
            results = results[0]
            ll, is_greedy = results
+            prob_norm = np.exp(ll)
+
            return {
                **({"perplexity": ll} if "perplexity" in use_metric else {}),
                **({"acc": int(is_greedy)} if "acc" in use_metric else {}),
+                **(
+                    {"brier_score": (0, [prob_norm])} # Gold is Index 0
+                    if "brier_score" in use_metric
+                    else {}
+                ),
            }
        elif self.OUTPUT_TYPE == "loglikelihood_rolling":
            (loglikelihood,) = results