Merge branch 'master' into translation

ac47d481 · Leo Gao · GitHub · 404530d0 · 0601c909 · ac47d481
Unverified Commit ac47d481 authored Feb 13, 2021 by Leo Gao Committed by GitHub Feb 13, 2021
Hide whitespace changes
Inline Side-by-side

Showing with 103 additions and 1 deletion

lm_eval/tasks/__init__.py lm_eval/tasks/__init__.py +4 -1

lm_eval/tasks/headqa.py lm_eval/tasks/headqa.py +48 -0

lm_eval/tasks/mathqa.py lm_eval/tasks/mathqa.py +51 -0

No files found.
--- a/lm_eval/tasks/__init__.py
+++ b/lm_eval/tasks/__init__.py
@@ -24,7 +24,8 @@ from . import sciq
 from . import webqs
 from . import qa4mre
 from . import translation
-
+from . import headqa
+from . import mathqa

 TASK_REGISTRY = {
    # GLUE
@@ -70,6 +71,8 @@ TASK_REGISTRY = {
    # "squad": squad.SQuAD, # not implemented yet
    "race": race.RACE,
    # "naturalqs": naturalqs.NaturalQs, # not implemented yet
+    "headqa": headqa.HeadQA,
+    "mathqa": mathqa.MathQA,
    "webqs": webqs.WebQs,
    "wsc273": wsc273.WinogradSchemaChallenge273,
    "winogrande": winogrande.Winogrande,

--- a/lm_eval/tasks/headqa.py
+++ b/lm_eval/tasks/headqa.py
+from . common import HFTask
+from lm_eval.base import MultipleChoiceTask
+
+
+class HeadQA(HFTask, MultipleChoiceTask):
+    DATASET_PATH = "head_qa"
+    DATASET_NAME = None
+
+    def has_training_docs(self):
+        return True
+
+    def has_validation_docs(self):
+        return True
+
+    def has_test_docs(self):
+        return True
+
+    def _convert_standard(self, doc):
+        out_doc = {
+            "id": doc["qid"],
+            "query": "Question: " + doc["qtext"] + "\nAnswer:",
+            "choices": [answer["atext"] for answer in doc["answers"]],
+            "gold": int(doc["ra"]) - 1,
+        }
+        return out_doc
+
+    def _load_docs(self, docs):
+        for doc in docs:
+            yield self._convert_standard(doc)
+
+    def training_docs(self):
+        docs = super().training_docs()
+        return self._load_docs(docs)
+
+    def validation_docs(self):
+        docs = super().validation_docs()
+        return self._load_docs(docs)
+
+    def test_docs(self):
+        docs = super().test_docs()
+        return self._load_docs(docs)
+
+    def fewshot_description(self):
+        # TODO: figure out description
+        return ""
+
+    def doc_to_text(self, doc):
+        return doc["query"]
--- a/lm_eval/tasks/mathqa.py
+++ b/lm_eval/tasks/mathqa.py
+from . common import HFTask
+from lm_eval.base import mean, rf, MultipleChoiceTask
+import re
+
+class MathQA(HFTask, MultipleChoiceTask):
+    DATASET_PATH = "math_qa"
+    DATASET_NAME = None
+
+    def has_training_docs(self):
+        return True
+
+    def has_validation_docs(self):
+        return True
+
+    def has_test_docs(self):
+        return True
+
+    def _convert_standard(self, doc):
+
+        answer_idx = ['a', 'b', 'c', 'd', 'e'].index(doc['correct'])
+        choices = [c[4:].rstrip(" ,") for c in re.findall(r"[abcd] \) .*?, |e \) .*?$", doc['options'])]
+
+        out_doc = {
+            "query": "Question: " + doc['Problem'] +"\nAnswer:",
+            "choices": choices,
+            "gold": answer_idx,
+        }
+        return out_doc
+
+    def _load_docs(self, docs):
+        for record in docs:
+            yield self._convert_standard(record)
+
+    def training_docs(self):
+        docs = super().training_docs()
+        return self._load_docs(docs)
+
+    def validation_docs(self):
+        docs = super().validation_docs()
+        return self._load_docs(docs)
+
+    def test_docs(self):
+        docs = super().test_docs()
+        return self._load_docs(docs)
+
+    def fewshot_description(self):
+        # TODO: figure out description
+        return ""
+
+    def doc_to_text(self, doc):
+        return doc["query"]