Commit f4e38081 authored by Jonathan Tow's avatar Jonathan Tow
Browse files

Implement `PROST` task

parent 10d4b64a
...@@ -21,6 +21,7 @@ from . import arithmetic ...@@ -21,6 +21,7 @@ from . import arithmetic
from . import lambada from . import lambada
from . import race from . import race
from . import piqa from . import piqa
from . import prost
from . import triviaqa from . import triviaqa
from . import pubmedqa from . import pubmedqa
from . import sciq from . import sciq
...@@ -101,6 +102,7 @@ TASK_REGISTRY = { ...@@ -101,6 +102,7 @@ TASK_REGISTRY = {
# "cbt-ne": cbt.CBTNE, # disabled pending context length fix # "cbt-ne": cbt.CBTNE, # disabled pending context length fix
"piqa": piqa.PiQA, "piqa": piqa.PiQA,
"prost": prost.PROST,
# Science related # Science related
"pubmedqa" : pubmedqa.Pubmed_QA, "pubmedqa" : pubmedqa.Pubmed_QA,
......
"""
PROST: Physical Reasoning about Objects Through Space and Time
https://arxiv.org/pdf/2106.03634.pdf
NOTE: PROST is limited to the zero-shot setting to adhere to authors' intentions
as discussed in section 7 of the paper: "We hope that the community will use
this dataset in the intended way: in a zero-shot setting to probe models which
have been trained on data not specifically collected to succeed on PROST."
# TODO: Update citation when it is made available at https://github.com/nala-cub/prost.
@misc{arocaouellette2021prost,
title={PROST: Physical Reasoning of Objects through Space and Time},
author={Stéphane Aroca-Ouellette and Cory Paik and Alessandro Roncone and Katharina Kann},
year={2021},
eprint={2106.03634},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
"""
from lm_eval.base import MultipleChoiceTask
from . common import HFTask
class PROST(HFTask, MultipleChoiceTask):
VERSION = 0
DATASET_PATH = "corypaik/prost"
DATASET_NAME = None
def has_training_docs(self):
return False
def has_validation_docs(self):
return False
def has_test_docs(self):
return True
def fewshot_description(self):
# TODO: figure out fewshot description
return ""
def fewshot_context(self, doc, num_fewshot, provide_description, rnd):
assert num_fewshot == 0, 'PROST is designed to probe models in a zero-shot fashion only.'
return super().fewshot_context(doc, num_fewshot, provide_description, rnd)
def _convert_standard(self, doc):
out_doc = {
"query": f"{doc['context']}\nQuestion: {doc['ex_question']}\nAnswer:",
"choices": [doc['A'], doc['B'], doc['C'], doc['D']],
"gold": doc['label'],
}
return out_doc
def doc_to_text(self, doc):
return doc["query"]
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment