Commit 93b7a418 authored by haileyschoelkopf's avatar haileyschoelkopf
Browse files

Merge branch 'edge-case-lowbits' of...

Merge branch 'edge-case-lowbits' of https://github.com/EleutherAI/lm-evaluation-harness into edge-case-lowbits
parents 899a24c7 0b62e042
......@@ -837,7 +837,6 @@ class ConfigurableTask(Task):
else:
gold = int(self.doc_to_target(doc))
pred = np.argmax(lls)
# retrieve choices in List[str] form, to compute choice lengths, etc.
choices = ast.literal_eval(
utils.apply_template(
......@@ -855,6 +854,8 @@ class ConfigurableTask(Task):
# and this stores our "regular" conditional loglikelihoods
lls = lls[::2]
pred = np.argmax(lls)
acc = 1.0 if np.argmax(lls) == gold else 0.0
completion_len = np.array([float(len(i)) for i in choices])
acc_norm = 1.0 if np.argmax(lls / completion_len) == gold else 0.0
......@@ -866,7 +867,6 @@ class ConfigurableTask(Task):
**({"acc_norm": acc_norm} if "acc_norm" in use_metric else {}),
}
# TODO: set which normalization metrics should be reported, and calculate them
if "exact_match" in self._metric_fn_list.keys():
# TODO: this gets score of 0 on arc_challenge for pythia-70m. need to test that this works properly
is_greedy = is_greedy[gold] # take value for the gold answer
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment