prost.py 2.35 KB
Newer Older
Jonathan Tow's avatar
Jonathan Tow committed
1
2
3
4
"""
PROST: Physical Reasoning about Objects Through Space and Time
https://arxiv.org/pdf/2106.03634.pdf

5
6
7
8
9
PROST, Physical Reasoning about Objects Through Space and Time, is a dataset
consisting of 18,736 multiple-choice questions made from 14 manually curated
templates, covering 10 physical reasoning concepts. All questions are designed
to probe both causal and masked language models in a zero-shot setting.

Jonathan Tow's avatar
Jonathan Tow committed
10
11
12
13
14
NOTE: PROST is limited to the zero-shot setting to adhere to authors' intentions
as discussed in section 7 of the paper: "We hope that the community will use
this dataset in the intended way: in a zero-shot setting to probe models which
have been trained on data not specifically collected to succeed on PROST."

15
Homepage: https://github.com/nala-cub/prost
16
17
18
19
"""
from lm_eval.base import MultipleChoiceTask
from . common import HFTask

20

Jonathan Tow's avatar
Jonathan Tow committed
21
_CITATION = """
22
23
24
25
26
27
28
29
30
31
32
33
34
@inproceedings{aroca-ouellette-etal-2021-prost,
    title = "{PROST}: {P}hysical Reasoning about Objects through Space and Time",
    author = "Aroca-Ouellette, St{\'e}phane  and
      Paik, Cory  and
      Roncone, Alessandro  and
      Kann, Katharina",
    booktitle = "Findings of the Association for Computational Linguistics: ACL-IJCNLP 2021",
    month = aug,
    year = "2021",
    address = "Online",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2021.findings-acl.404",
    pages = "4597--4608",
Jonathan Tow's avatar
Jonathan Tow committed
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
}
"""


class PROST(HFTask, MultipleChoiceTask):
    VERSION = 0
    DATASET_PATH = "corypaik/prost"
    DATASET_NAME = None

    def has_training_docs(self):
        return False

    def has_validation_docs(self):
        return False

    def has_test_docs(self):
        return True

53
    def fewshot_context(self, doc, num_fewshot, provide_description=None, rnd=None, description=None):
Jonathan Tow's avatar
Jonathan Tow committed
54
        assert num_fewshot == 0, 'PROST is designed to probe models in a zero-shot fashion only.'
55
56
57
58
59
60
        return super().fewshot_context(
            doc=doc,
            num_fewshot=num_fewshot,
            rnd=rnd,
            description=description
        )
Jonathan Tow's avatar
Jonathan Tow committed
61
62
63
64
65
66
67
68
69
70
71

    def _convert_standard(self, doc):
        out_doc = {
            "query": f"{doc['context']}\nQuestion: {doc['ex_question']}\nAnswer:",
            "choices": [doc['A'], doc['B'], doc['C'], doc['D']],
            "gold": doc['label'],
        }
        return out_doc

    def doc_to_text(self, doc):
        return doc["query"]