qasper.py

""" 
A Dataset of Information-Seeking Questions and Answers Anchored in Research Papers
https://arxiv.org/abs/2105.03011

@article{DBLP:journals/corr/abs-2105-03011,
  author    = {Pradeep Dasigi and
               Kyle Lo and
               Iz Beltagy and
               Arman Cohan and
               Noah A. Smith and
               Matt Gardner},
  title     = {A Dataset of Information-Seeking Questions and Answers Anchored in
               Research Papers},
  journal   = {CoRR},
  volume    = {abs/2105.03011},
  year      = {2021},
  url       = {https://arxiv.org/abs/2105.03011},
  eprinttype = {arXiv},
  eprint    = {2105.03011},
  timestamp = {Fri, 14 May 2021 12:13:30 +0200},
  biburl    = {https://dblp.org/rec/journals/corr/abs-2105-03011.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}
"""
from lm_eval.base import rf
from lm_eval.metrics import f1_score
from .common import HFTask


class QASPER(HFTask):
    VERSION = 0
    DATASET_PATH = "qasper"
    DATASET_NAME = None

    def doc_to_text(self, doc):
        # this method is invoked by tests only
        return (
            "TITLE: "
            + doc["title"]
            + "\n"
            + "ABSTRACT: "
            + doc["abstract"]
            + "\n\n"
            + "Q: "
            + doc["question"]
            + "\n\n"
            + "A: "
        )

    def doc_to_target(self, doc):
        # this method is invoked by tests only
        return " " + doc["answer_str"]

    def training_docs(self):
        for doc in self.data["train"]:
            yield from self.process_doc(doc)

    def validation_docs(self):
        for doc in self.data["train"]:
            yield from self.process_doc(doc)

    def process_doc(self, doc):
        """Given a `doc`, flatten it out so that each JSON blob
        contains exactly one question and one answer. Logic taken from
        the reference implementation available at
        https://github.com/allenai/qasper-led-baseline/blob/main/scripts/evaluator.py
        """
        obs_list = []
        for qa in doc["qas"]:
            for question, answer_list in zip(qa["question"], qa["answers"]):
                for answer in answer_list:
                    if answer["unanswerable"]:
                        answer_str = "unanswerable"
                        answer_type = "unanswerable"
                    elif answer["yes_no"]:
                        answer_str = "Yes"
                        answer_type = "bool"
                    elif answer["yes_no"] is not None:
                        answer_str = "No"
                        answer_type = "bool"
                    elif answer["free_form_answer"]:
                        answer_str = answer["free_form_answer"]
                        answer_type = "free form answer"
                    elif answer["extractive_spans"]:
                        answer_str = ", ".join(answer["extractive_spans"])
                        answer_type = "extractive spans"
                    obs_list.append[
                        {
                            "title": doc["title"],
                            "abstract": doc["abstract"],
                            "question": question,
                            "answer_str": answer_str,
                            "answer_type": answer_type,
                        }
                    ]
        return obs_list

    def process_results(self, doc, results):
        res, unanswerable = results
        res_dict = {}

        # Handle unanswerability first
        unanswerable_gold = doc["answer_type"] == "unanswerable"
        unanswerable_pred = unanswerable > 1 - unanswerable
        res_dict["f1_un"] = (unanswerable_gold, unanswerable_pred)

        # Handle yes/no questions
        if doc["answer_type"] == "bool":
            ll_yes, ll_no = res
            gold = 1 if doc["answer"] == "yes" else 0
            pred = ll_yes > ll_no
            res_dict["f1_yn"] = (gold, pred)

        # Handle completions
        if doc["answer_type"] == "free form answer":
            pass
        return res_dict

    def aggregation(self):
        return {
            "f1_un": f1_score,
            "f1_yn": f1_score,
            "f1_fr": f1_score,
            "f1_ex": f1_score,
        }

    def construct_requests(self, doc, ctx):
        """Uses RequestFactory to construct Requests and returns an iterable of
        Requests which will be sent to the LM.

        :param doc:
            The document as returned from training_docs, validation_docs, or test_docs.
        :param ctx: str
            The context string, generated by fewshot_context. This includes the natural
            language description, as well as the few shot examples, and the question
            part of the document for `doc`.
        """
        unanswerable = rf.loglikelihood(ctx, " " + "unanswerable")
        if doc["answer_type"] in ("free form answer", "extractive spans"):
            res = rf.greedy_until(ctx, ["\n"])
        elif doc["answer_type"] in ("bool"):
            ll_yes, _ = rf.loglikelihood(ctx, " yes")
            ll_no, _ = rf.loglikelihood(ctx, " no")
            res = (ll_yes, ll_no)
        return res, unanswerable