Unverified Commit e0cfeb90 authored by Jonathan Tow's avatar Jonathan Tow Committed by GitHub
Browse files

Merge branch 'master' into researcher2

parents f9b81151 6caa0afd
This diff is collapsed.
"""
MathQA: Towards Interpretable Math Word Problem Solving with Operation-Based Formalisms
https://arxiv.org/pdf/1905.13319.pdf
MathQA is a large-scale dataset of 37k English multiple-choice math word problems
covering multiple math domain categories by modeling operation programs corresponding
to word problems in the AQuA dataset (Ling et al., 2017).
Homepage: https://math-qa.github.io/math-QA/
"""
import re import re
from lm_eval.base import MultipleChoiceTask from lm_eval.base import MultipleChoiceTask
from . common import HFTask
class MathQA(HFTask, MultipleChoiceTask): _CITATION = """
@misc{amini2019mathqa,
title={MathQA: Towards Interpretable Math Word Problem Solving with Operation-Based Formalisms},
author={Aida Amini and Saadia Gabriel and Peter Lin and Rik Koncel-Kedziorski and Yejin Choi and Hannaneh Hajishirzi},
year={2019},
eprint={1905.13319},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
"""
class MathQA(MultipleChoiceTask):
VERSION = 0 VERSION = 0
DATASET_PATH = "math_qa" DATASET_PATH = "math_qa"
DATASET_NAME = None DATASET_NAME = None
...@@ -17,13 +38,23 @@ class MathQA(HFTask, MultipleChoiceTask): ...@@ -17,13 +38,23 @@ class MathQA(HFTask, MultipleChoiceTask):
def has_test_docs(self): def has_test_docs(self):
return True return True
def _convert_standard(self, doc): def training_docs(self):
if self._training_docs is None:
self._training_docs = list(map(self._process_doc, self.dataset["train"]))
return self._training_docs
def validation_docs(self):
return map(self._process_doc, self.dataset["validation"])
def test_docs(self):
return map(self._process_doc, self.dataset["test"])
def _process_doc(self, doc):
answer_idx = ['a', 'b', 'c', 'd', 'e'].index(doc['correct']) answer_idx = ['a', 'b', 'c', 'd', 'e'].index(doc['correct'])
choices = [c[4:].rstrip(" ,") for c in re.findall(r"[abcd] \) .*?, |e \) .*?$", doc['options'])] choices = [c[4:].rstrip(" ,") for c in re.findall(r"[abcd] \) .*?, |e \) .*?$", doc['options'])]
out_doc = { out_doc = {
"query": "Question: " + doc['Problem'] +"\nAnswer:", "query": "Question: " + doc['Problem'] + "\nAnswer:",
"choices": choices, "choices": choices,
"gold": answer_idx, "gold": answer_idx,
} }
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment