Merge branch 'edge-case-lowbits' of...

Merge branch 'edge-case-lowbits' of https://github.com/EleutherAI/lm-evaluation-harness into edge-case-lowbits

Merge branch 'edge-case-lowbits' of...
Merge branch 'edge-case-lowbits' of https://github.com/EleutherAI/lm-evaluation-harness into edge-case-lowbits
93b7a418 · haileyschoelkopf · 899a24c7 · 0b62e042 · 93b7a418
Commit 93b7a418 authored Jul 03, 2023 by haileyschoelkopf
Hide whitespace changes
Inline Side-by-side

Showing with 2 additions and 2 deletions

lm_eval/api/task.py lm_eval/api/task.py +2 -2

No files found.
--- a/lm_eval/api/task.py
+++ b/lm_eval/api/task.py
@@ -837,7 +837,6 @@ class ConfigurableTask(Task):
            else:
                gold = int(self.doc_to_target(doc))

-            pred = np.argmax(lls)
            # retrieve choices in List[str] form, to compute choice lengths, etc.
            choices = ast.literal_eval(
                utils.apply_template(
@@ -855,6 +854,8 @@ class ConfigurableTask(Task):
                # and this stores our "regular" conditional loglikelihoods
                lls = lls[::2]

+            pred = np.argmax(lls)
+
            acc = 1.0 if np.argmax(lls) == gold else 0.0
            completion_len = np.array([float(len(i)) for i in choices])
            acc_norm = 1.0 if np.argmax(lls / completion_len) == gold else 0.0
@@ -866,7 +867,6 @@ class ConfigurableTask(Task):
                **({"acc_norm": acc_norm} if "acc_norm" in use_metric else {}),
            }

-            # TODO: set which normalization metrics should be reported, and calculate them
            if "exact_match" in self._metric_fn_list.keys():
                # TODO: this gets score of 0 on arc_challenge for pythia-70m. need to test that this works properly
                is_greedy = is_greedy[gold]  # take value for the gold answer