prost.py 2.33 KB
Newer Older
Jonathan Tow's avatar
Jonathan Tow committed
1
2
3
4
"""
PROST: Physical Reasoning about Objects Through Space and Time
https://arxiv.org/pdf/2106.03634.pdf

5
6
7
8
9
PROST, Physical Reasoning about Objects Through Space and Time, is a dataset
consisting of 18,736 multiple-choice questions made from 14 manually curated
templates, covering 10 physical reasoning concepts. All questions are designed
to probe both causal and masked language models in a zero-shot setting.

Jonathan Tow's avatar
Jonathan Tow committed
10
11
12
13
14
NOTE: PROST is limited to the zero-shot setting to adhere to authors' intentions
as discussed in section 7 of the paper: "We hope that the community will use
this dataset in the intended way: in a zero-shot setting to probe models which
have been trained on data not specifically collected to succeed on PROST."

15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
Homepage: https://github.com/nala-cub/prost

@inproceedings{aroca-ouellette-etal-2021-prost,
    title = "{PROST}: {P}hysical Reasoning about Objects through Space and Time",
    author = "Aroca-Ouellette, St{\'e}phane  and
      Paik, Cory  and
      Roncone, Alessandro  and
      Kann, Katharina",
    booktitle = "Findings of the Association for Computational Linguistics: ACL-IJCNLP 2021",
    month = aug,
    year = "2021",
    address = "Online",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2021.findings-acl.404",
    pages = "4597--4608",
Jonathan Tow's avatar
Jonathan Tow committed
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
}
"""

from lm_eval.base import MultipleChoiceTask
from . common import HFTask


class PROST(HFTask, MultipleChoiceTask):
    VERSION = 0
    DATASET_PATH = "corypaik/prost"
    DATASET_NAME = None

    def has_training_docs(self):
        return False

    def has_validation_docs(self):
        return False

    def has_test_docs(self):
        return True

51
    def fewshot_context(self, doc, num_fewshot, provide_description=None, rnd=None, description=None):
Jonathan Tow's avatar
Jonathan Tow committed
52
        assert num_fewshot == 0, 'PROST is designed to probe models in a zero-shot fashion only.'
53
54
55
56
57
58
        return super().fewshot_context(
            doc=doc,
            num_fewshot=num_fewshot,
            rnd=rnd,
            description=description
        )
Jonathan Tow's avatar
Jonathan Tow committed
59
60
61
62
63
64
65
66
67
68
69

    def _convert_standard(self, doc):
        out_doc = {
            "query": f"{doc['context']}\nQuestion: {doc['ex_question']}\nAnswer:",
            "choices": [doc['A'], doc['B'], doc['C'], doc['D']],
            "gold": doc['label'],
        }
        return out_doc

    def doc_to_text(self, doc):
        return doc["query"]