sat.py 1.88 KB
Newer Older
1
2
3
4
5
6
7
8
"""
Similarity of Semantic Relations
https://arxiv.org/pdf/cs/0608100.pdf

SAT (Scholastic Aptitude Test) Analogy Questions is a dataset comprising 374
multiple-choice analogy questions; 5 choices per question.

Homepage: https://aclweb.org/aclwiki/SAT_Analogy_Questions_(State_of_the_art)
9
"""
Jonathan Tow's avatar
Jonathan Tow committed
10
11
import inspect
import lm_eval.datasets.sat_analogies.sat_analogies
12
from lm_eval.base import MultipleChoiceTask
13

14
15

_CITATION = """
16
17
18
19
20
21
22
23
24
25
26
@article{article,
    author = {Turney, Peter},
    year = {2006},
    month = {09},
    pages = {379-416},
    title = {Similarity of Semantic Relations},
    volume = {32},
    journal = {Computational Linguistics},
    doi = {10.1162/coli.2006.32.3.379}
}
"""
27
28


Jonathan Tow's avatar
Jonathan Tow committed
29
class SATAnalogies(MultipleChoiceTask):
Leo Gao's avatar
Leo Gao committed
30
    VERSION = 0
Jonathan Tow's avatar
Jonathan Tow committed
31
32
    DATASET_PATH = inspect.getfile(lm_eval.datasets.sat_analogies.sat_analogies)
    DATASET_NAME = None
33

Jonathan Tow's avatar
Jonathan Tow committed
34
35
36
37
38
39
40
    def __init__(self, data_dir: str):
        """
        SAT Analog Questions is not publicly available. You must request the data
        by emailing Peter Turney and then download it to a local directory path
        which should be passed into the `data_dir` arg.
        """
        super().__init__(data_dir=data_dir)
41
42
43
44
45
46
47
48
49
50
51
52
53
54

    def has_training_docs(self):
        return False

    def has_validation_docs(self):
        return True

    def has_test_docs(self):
        return False

    def training_docs(self):
        return []

    def validation_docs(self):
Jon Tow's avatar
Jon Tow committed
55
        return map(self._process_doc, self.dataset["validation"])
56

Jonathan Tow's avatar
Jonathan Tow committed
57
58
    def test_docs(self):
        return []
59

Jon Tow's avatar
Jon Tow committed
60
    def _process_doc(self, doc):
Jonathan Tow's avatar
Jonathan Tow committed
61
62
63
64
65
66
        return {
            'source': doc['source'],
            'query': doc['stem'].split(' ')[:2],
            'choices': ["{} is to {}".format(*c.split(' ')[:2]) for c in doc["choices"]],
            'gold': ['a', 'b', 'c', 'd', 'e'].index(doc['solution'].strip()),
        }
67
68

    def doc_to_text(self, doc):
69
        return "{} is to {} as".format(*doc['query'])