Add initial draft of QASPER; register with package; yet to complete process_results

79492627 · Stephen Hogg · 05590e11 · 79492627 · 79492627
Commit 79492627 authored Feb 08, 2022 by Stephen Hogg
Hide whitespace changes
Inline Side-by-side

Showing with 92 additions and 0 deletions

lm_eval/tasks/__init__.py lm_eval/tasks/__init__.py +3 -0

lm_eval/tasks/qasper.py lm_eval/tasks/qasper.py +89 -0

No files found.
--- a/lm_eval/tasks/__init__.py
+++ b/lm_eval/tasks/__init__.py
@@ -29,6 +29,7 @@ from . import triviaqa
 from . import pubmedqa
 from . import sciq
 from . import webqs
+from . import qasper
 from . import qa4mre
 from . import translation
 from . import headqa
@@ -121,6 +122,8 @@ TASK_REGISTRY = {
    "pubmedqa" : pubmedqa.Pubmed_QA,
    "sciq" : sciq.SciQ,

+    "qasper": qasper.QASPER,
+
    "qa4mre_2011" : qa4mre.QA4MRE_2011,
    "qa4mre_2012" : qa4mre.QA4MRE_2012,
    "qa4mre_2013" : qa4mre.QA4MRE_2013,

--- a/lm_eval/tasks/qasper.py
+++ b/lm_eval/tasks/qasper.py
+from lm_eval.base import rf
+from .common import HFTask
+
+
+class QASPER(HFTask):
+    VERSION = 0
+    DATASET_PATH = "qasper"
+    DATASET_NAME = None
+
+    def doc_to_text(self, doc):
+        # this method is invoked by tests only
+        return (
+            "TITLE: "
+            + doc["title"]
+            + "\n"
+            + "ABSTRACT: "
+            + doc["abstract"]
+            + "\n\n"
+            + "Q: "
+            + doc["question"]
+            + "\n\n"
+            + "A: "
+        )
+
+    def doc_to_target(self, doc):
+        # this method is invoked by tests only
+        return " " + doc["answer_str"]
+
+    def training_docs(self):
+        for doc in self.data["train"]:
+            yield from self.process_doc(doc)
+
+    def validation_docs(self):
+        for doc in self.data["train"]:
+            yield from self.process_doc(doc)
+
+    def process_doc(self, doc):
+        """Given a `doc`, flatten it out so that each JSON blob
+        contains exactly one question and one answer. Logic taken from
+        the reference implementation available at
+        https://github.com/allenai/qasper-led-baseline/blob/main/scripts/evaluator.py
+        """
+        obs_list = []
+        for qa in doc["qas"]:
+            for question, answer_list in zip(qa["question"], qa["answers"]):
+                for answer in answer_list:
+                    if answer["unanswerable"]:
+                        answer_str = "unanswerable"
+                        answer_type = "unanswerable"
+                    elif answer["yes_no"]:
+                        answer_str = "Yes"
+                        answer_type = "bool"
+                    elif answer["yes_no"] is not None:
+                        answer_str = "No"
+                        answer_type = "bool"
+                    elif answer["free_form_answer"]:
+                        answer_str = answer["free_form_answer"]
+                        answer_type = "free form answer"
+                    elif answer["extractive_spans"]:
+                        answer_str = ", ".join(answer["extractive_spans"])
+                        answer_type = "extractive spans"
+                    obs_list.append[
+                        {
+                            "title": doc["title"],
+                            "abstract": doc["abstract"],
+                            "question": question,
+                            "answer_str": answer_str,
+                            "answer_type": answer_type,
+                        }
+                    ]
+        return obs_list
+
+    def process_results(self, doc, results):
+        return super().process_results(doc, results)
+
+    def construct_requests(self, doc, ctx):
+        """Uses RequestFactory to construct Requests and returns an iterable of
+        Requests which will be sent to the LM.
+
+        :param doc:
+            The document as returned from training_docs, validation_docs, or test_docs.
+        :param ctx: str
+            The context string, generated by fewshot_context. This includes the natural
+            language description, as well as the few shot examples, and the question
+            part of the document for `doc`.
+        """
+        continuation = rf.greedy_until(ctx, ["\n"])
+        is_unanswerable = rf.loglikelihood(ctx, " " + "unanswerable")
+        return continuation, is_unanswerable