from lm_eval.base import rf from .common import HFTask class QASPER(HFTask): VERSION = 0 DATASET_PATH = "qasper" DATASET_NAME = None def doc_to_text(self, doc): # this method is invoked by tests only return ( "TITLE: " + doc["title"] + "\n" + "ABSTRACT: " + doc["abstract"] + "\n\n" + "Q: " + doc["question"] + "\n\n" + "A: " ) def doc_to_target(self, doc): # this method is invoked by tests only return " " + doc["answer_str"] def training_docs(self): for doc in self.data["train"]: yield from self.process_doc(doc) def validation_docs(self): for doc in self.data["train"]: yield from self.process_doc(doc) def process_doc(self, doc): """Given a `doc`, flatten it out so that each JSON blob contains exactly one question and one answer. Logic taken from the reference implementation available at https://github.com/allenai/qasper-led-baseline/blob/main/scripts/evaluator.py """ obs_list = [] for qa in doc["qas"]: for question, answer_list in zip(qa["question"], qa["answers"]): for answer in answer_list: if answer["unanswerable"]: answer_str = "unanswerable" answer_type = "unanswerable" elif answer["yes_no"]: answer_str = "Yes" answer_type = "bool" elif answer["yes_no"] is not None: answer_str = "No" answer_type = "bool" elif answer["free_form_answer"]: answer_str = answer["free_form_answer"] answer_type = "free form answer" elif answer["extractive_spans"]: answer_str = ", ".join(answer["extractive_spans"]) answer_type = "extractive spans" obs_list.append[ { "title": doc["title"], "abstract": doc["abstract"], "question": question, "answer_str": answer_str, "answer_type": answer_type, } ] return obs_list def process_results(self, doc, results): return super().process_results(doc, results) def construct_requests(self, doc, ctx): """Uses RequestFactory to construct Requests and returns an iterable of Requests which will be sent to the LM. :param doc: The document as returned from training_docs, validation_docs, or test_docs. :param ctx: str The context string, generated by fewshot_context. This includes the natural language description, as well as the few shot examples, and the question part of the document for `doc`. """ continuation = rf.greedy_until(ctx, ["\n"]) is_unanswerable = rf.loglikelihood(ctx, " " + "unanswerable") return continuation, is_unanswerable