"middleware/openai_test.go" did not exist on "630e7dc6ff461cc957a1314d8f27986f0d7b92ca"
Commit 79492627 authored by Stephen Hogg's avatar Stephen Hogg
Browse files

Add initial draft of QASPER; register with package; yet to complete process_results

parent 05590e11
......@@ -29,6 +29,7 @@ from . import triviaqa
from . import pubmedqa
from . import sciq
from . import webqs
from . import qasper
from . import qa4mre
from . import translation
from . import headqa
......@@ -121,6 +122,8 @@ TASK_REGISTRY = {
"pubmedqa" : pubmedqa.Pubmed_QA,
"sciq" : sciq.SciQ,
"qasper": qasper.QASPER,
"qa4mre_2011" : qa4mre.QA4MRE_2011,
"qa4mre_2012" : qa4mre.QA4MRE_2012,
"qa4mre_2013" : qa4mre.QA4MRE_2013,
......
from lm_eval.base import rf
from .common import HFTask
class QASPER(HFTask):
VERSION = 0
DATASET_PATH = "qasper"
DATASET_NAME = None
def doc_to_text(self, doc):
# this method is invoked by tests only
return (
"TITLE: "
+ doc["title"]
+ "\n"
+ "ABSTRACT: "
+ doc["abstract"]
+ "\n\n"
+ "Q: "
+ doc["question"]
+ "\n\n"
+ "A: "
)
def doc_to_target(self, doc):
# this method is invoked by tests only
return " " + doc["answer_str"]
def training_docs(self):
for doc in self.data["train"]:
yield from self.process_doc(doc)
def validation_docs(self):
for doc in self.data["train"]:
yield from self.process_doc(doc)
def process_doc(self, doc):
"""Given a `doc`, flatten it out so that each JSON blob
contains exactly one question and one answer. Logic taken from
the reference implementation available at
https://github.com/allenai/qasper-led-baseline/blob/main/scripts/evaluator.py
"""
obs_list = []
for qa in doc["qas"]:
for question, answer_list in zip(qa["question"], qa["answers"]):
for answer in answer_list:
if answer["unanswerable"]:
answer_str = "unanswerable"
answer_type = "unanswerable"
elif answer["yes_no"]:
answer_str = "Yes"
answer_type = "bool"
elif answer["yes_no"] is not None:
answer_str = "No"
answer_type = "bool"
elif answer["free_form_answer"]:
answer_str = answer["free_form_answer"]
answer_type = "free form answer"
elif answer["extractive_spans"]:
answer_str = ", ".join(answer["extractive_spans"])
answer_type = "extractive spans"
obs_list.append[
{
"title": doc["title"],
"abstract": doc["abstract"],
"question": question,
"answer_str": answer_str,
"answer_type": answer_type,
}
]
return obs_list
def process_results(self, doc, results):
return super().process_results(doc, results)
def construct_requests(self, doc, ctx):
"""Uses RequestFactory to construct Requests and returns an iterable of
Requests which will be sent to the LM.
:param doc:
The document as returned from training_docs, validation_docs, or test_docs.
:param ctx: str
The context string, generated by fewshot_context. This includes the natural
language description, as well as the few shot examples, and the question
part of the document for `doc`.
"""
continuation = rf.greedy_until(ctx, ["\n"])
is_unanswerable = rf.loglikelihood(ctx, " " + "unanswerable")
return continuation, is_unanswerable
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment