change mathqa to use numeric answer strings instead of a,b,c,d,e as choices

81e42932 · Anthony DiPofi · c6c67272 · 81e42932
Commit 81e42932 authored Feb 12, 2021 by Anthony DiPofi
Show whitespace changes
Inline Side-by-side

Showing with 7 additions and 4 deletions

lm_eval/tasks/mathqa.py lm_eval/tasks/mathqa.py +7 -4

No files found.
--- a/lm_eval/tasks/mathqa.py
+++ b/lm_eval/tasks/mathqa.py
 from . common import HFTask
 from lm_eval.base import mean, rf, MultipleChoiceTask
+import re
 class MathQA(HFTask, MultipleChoiceTask):
    DATASET_PATH = "math_qa"
@@ -17,10 +17,13 @@ class MathQA(HFTask, MultipleChoiceTask):
    def _convert_standard(self, doc):
+        answer_idx = ['a', 'b', 'c', 'd', 'e'].index(doc['correct'])
+        choices = [c[4:].rstrip(" ,") for c in re.findall(r"[abcd] \) .*?, |e .*?$", doc['options'])]
        out_doc = {
-            "query": "Question: " + doc['Problem'] +" "+ doc["options"] + "\nAnswer:",
+            "query": "Question: " + doc['Problem'] +"\nAnswer:",
-            "choices": ['a', 'b', 'c', 'd', 'e'],
+            "choices": choices,
-            "gold": ['a', 'b', 'c', 'd', 'e'].index(doc['correct']),
+            "gold": answer_idx,
        }
        return out_doc