""" A Dataset of Information-Seeking Questions and Answers Anchored in Research Papers https://arxiv.org/abs/2105.03011 @article{DBLP:journals/corr/abs-2105-03011, author = {Pradeep Dasigi and Kyle Lo and Iz Beltagy and Arman Cohan and Noah A. Smith and Matt Gardner}, title = {A Dataset of Information-Seeking Questions and Answers Anchored in Research Papers}, journal = {CoRR}, volume = {abs/2105.03011}, year = {2021}, url = {https://arxiv.org/abs/2105.03011}, eprinttype = {arXiv}, eprint = {2105.03011}, timestamp = {Fri, 14 May 2021 12:13:30 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2105-03011.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } """ from lm_eval.base import rf from lm_eval.metrics import f1_score from .common import HFTask class QASPER(HFTask): VERSION = 0 DATASET_PATH = "qasper" DATASET_NAME = None def doc_to_text(self, doc): # this method is invoked by tests only return ( "TITLE: " + doc["title"] + "\n" + "ABSTRACT: " + doc["abstract"] + "\n\n" + "Q: " + doc["question"] + "\n\n" + "A: " ) def doc_to_target(self, doc): # this method is invoked by tests only return " " + doc["answer_str"] def training_docs(self): for doc in self.data["train"]: yield from self.process_doc(doc) def validation_docs(self): for doc in self.data["train"]: yield from self.process_doc(doc) def process_doc(self, doc): """Given a `doc`, flatten it out so that each JSON blob contains exactly one question and one answer. Logic taken from the reference implementation available at https://github.com/allenai/qasper-led-baseline/blob/main/scripts/evaluator.py """ obs_list = [] for qa in doc["qas"]: for question, answer_list in zip(qa["question"], qa["answers"]): for answer in answer_list: if answer["unanswerable"]: answer_str = "unanswerable" answer_type = "unanswerable" elif answer["yes_no"]: answer_str = "Yes" answer_type = "bool" elif answer["yes_no"] is not None: answer_str = "No" answer_type = "bool" elif answer["free_form_answer"]: answer_str = answer["free_form_answer"] answer_type = "free form answer" elif answer["extractive_spans"]: answer_str = ", ".join(answer["extractive_spans"]) answer_type = "extractive spans" obs_list.append[ { "title": doc["title"], "abstract": doc["abstract"], "question": question, "answer_str": answer_str, "answer_type": answer_type, } ] return obs_list def process_results(self, doc, results): res, unanswerable = results res_dict = {} # Handle unanswerability first unanswerable_gold = doc["answer_type"] == "unanswerable" unanswerable_pred = unanswerable > 1 - unanswerable res_dict["f1_un"] = (unanswerable_gold, unanswerable_pred) # Handle yes/no questions if doc["answer_type"] == "bool": ll_yes, ll_no = res gold = 1 if doc["answer"] == "yes" else 0 pred = ll_yes > ll_no res_dict["f1_yn"] = (gold, pred) # Handle completions if doc["answer_type"] == "free form answer": pass return res_dict def aggregation(self): return { "f1_un": f1_score, "f1_yn": f1_score, "f1_fr": f1_score, "f1_ex": f1_score, } def construct_requests(self, doc, ctx): """Uses RequestFactory to construct Requests and returns an iterable of Requests which will be sent to the LM. :param doc: The document as returned from training_docs, validation_docs, or test_docs. :param ctx: str The context string, generated by fewshot_context. This includes the natural language description, as well as the few shot examples, and the question part of the document for `doc`. """ unanswerable = rf.loglikelihood(ctx, " " + "unanswerable") if doc["answer_type"] in ("free form answer", "extractive spans"): res = rf.greedy_until(ctx, ["\n"]) elif doc["answer_type"] in ("bool"): ll_yes, _ = rf.loglikelihood(ctx, " yes") ll_no, _ = rf.loglikelihood(ctx, " no") res = (ll_yes, ll_no) return res, unanswerable