prost.py 2.53 KB
Newer Older
Jonathan Tow's avatar
Jonathan Tow committed
1
2
3
4
"""
PROST: Physical Reasoning about Objects Through Space and Time
https://arxiv.org/pdf/2106.03634.pdf

5
6
7
8
9
PROST, Physical Reasoning about Objects Through Space and Time, is a dataset
consisting of 18,736 multiple-choice questions made from 14 manually curated
templates, covering 10 physical reasoning concepts. All questions are designed
to probe both causal and masked language models in a zero-shot setting.

Jonathan Tow's avatar
Jonathan Tow committed
10
11
12
13
14
NOTE: PROST is limited to the zero-shot setting to adhere to authors' intentions
as discussed in section 7 of the paper: "We hope that the community will use
this dataset in the intended way: in a zero-shot setting to probe models which
have been trained on data not specifically collected to succeed on PROST."

15
Homepage: https://github.com/nala-cub/prost
16
17
18
"""
from lm_eval.base import MultipleChoiceTask

19

Jonathan Tow's avatar
Jonathan Tow committed
20
_CITATION = """
21
22
23
24
25
26
27
28
29
30
31
32
33
@inproceedings{aroca-ouellette-etal-2021-prost,
    title = "{PROST}: {P}hysical Reasoning about Objects through Space and Time",
    author = "Aroca-Ouellette, St{\'e}phane  and
      Paik, Cory  and
      Roncone, Alessandro  and
      Kann, Katharina",
    booktitle = "Findings of the Association for Computational Linguistics: ACL-IJCNLP 2021",
    month = aug,
    year = "2021",
    address = "Online",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2021.findings-acl.404",
    pages = "4597--4608",
Jonathan Tow's avatar
Jonathan Tow committed
34
35
36
37
}
"""


Jonathan Tow's avatar
Jonathan Tow committed
38
class PROST(MultipleChoiceTask):
Jonathan Tow's avatar
Jonathan Tow committed
39
40
41
42
43
44
45
46
47
48
49
50
51
    VERSION = 0
    DATASET_PATH = "corypaik/prost"
    DATASET_NAME = None

    def has_training_docs(self):
        return False

    def has_validation_docs(self):
        return False

    def has_test_docs(self):
        return True

Jonathan Tow's avatar
Jonathan Tow committed
52
    def test_docs(self):
Jon Tow's avatar
Jon Tow committed
53
        return map(self._process_doc, self.dataset["test"])
Jonathan Tow's avatar
Jonathan Tow committed
54

bzantium's avatar
bzantium committed
55
56
57
58
59
60
    def fewshot_context(
        self, doc, num_fewshot, provide_description=None, rnd=None, description=None
    ):
        assert (
            num_fewshot == 0
        ), "PROST is designed to probe models in a zero-shot fashion only."
61
        return super().fewshot_context(
bzantium's avatar
bzantium committed
62
            doc=doc, num_fewshot=num_fewshot, rnd=rnd, description=description
63
        )
Jonathan Tow's avatar
Jonathan Tow committed
64

Jon Tow's avatar
Jon Tow committed
65
    def _process_doc(self, doc):
Jonathan Tow's avatar
Jonathan Tow committed
66
67
        out_doc = {
            "query": f"{doc['context']}\nQuestion: {doc['ex_question']}\nAnswer:",
bzantium's avatar
bzantium committed
68
69
            "choices": [doc["A"], doc["B"], doc["C"], doc["D"]],
            "gold": doc["label"],
Jonathan Tow's avatar
Jonathan Tow committed
70
71
72
73
74
        }
        return out_doc

    def doc_to_text(self, doc):
        return doc["query"]
bzantium's avatar
bzantium committed
75
76
77
78
79
80

    def should_decontaminate(self):
        return True

    def doc_to_decontamination_query(self, doc):
        return doc["query"]