fix

1f63b3dc · Baber · 6bc8b8cc · 1f63b3dc · 1f63b3dc
Commit 1f63b3dc authored Jan 21, 2025 by Baber
Hide whitespace changes
Inline Side-by-side

Showing with 5 additions and 5 deletions

lm_eval/tasks/math500/math500.yaml lm_eval/tasks/math500/math500.yaml +2 -2

lm_eval/tasks/math500/utils.py lm_eval/tasks/math500/utils.py +3 -3

No files found.
--- a/lm_eval/tasks/math500/math500.yaml
+++ b/lm_eval/tasks/math500/math500.yaml
@@ -7,10 +7,10 @@ doc_to_text: "Solve the following math problem efficiently and clearly:\n\n- For
 #process_results: !function utils.process_results
 doc_to_target: "{{answer if few_shot is undefined else solution}}"
 process_results: !function utils.process_results
-repeats: 2
+repeats: 64
 generation_kwargs:
  until: []
-  max_gen_toks: 1024
+  max_gen_toks: 32768
  do_sample: true
  top_p: 0.95
  temperature: 0.6

--- a/lm_eval/tasks/math500/utils.py
+++ b/lm_eval/tasks/math500/utils.py
@@ -78,7 +78,7 @@ def process_results(docs: dict, resps: list[dict]) -> dict:
    return resps[0]
 # calculate pass@1 for all results
-def get_metric(predictions: list[list[str]], references: list[dict]) -> Dict[str, int]:
+def get_metric(predictions: list[list[str]], references: list[dict]) -> list[dict]:
    res = []
    for reference, candidates in zip(references, predictions):
        for candidate in candidates:
@@ -89,12 +89,12 @@ def get_metric(predictions: list[list[str]], references: list[dict]) -> Dict[str
                retval = 1
                results = {
-                    "accuracy": retval,
+                    "acc": retval,
                }
                res.append(results)
                break
        else:
-            res.append({"accuracy": 0})
+            res.append({"acc": 0})
    return res