logiqa.py 2.66 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
"""
LogiQA: A Challenge Dataset for Machine Reading Comprehension with Logical Reasoning
https://arxiv.org/pdf/2007.08124.pdf

LogiQA is a dataset for testing human logical reasoning. It consists of 8,678 QA
instances, covering multiple types of deductive reasoning. Results show that state-
of-the-art neural models perform by far worse than human ceiling. The dataset can
also serve as a benchmark for reinvestigating logical AI under the deep learning
NLP setting.

Homepage: https://github.com/lgw863/LogiQA-dataset
12
"""
Jonathan Tow's avatar
Jonathan Tow committed
13
14
import inspect
import lm_eval.datasets.logiqa.logiqa
15
16
17
from lm_eval.base import MultipleChoiceTask


18
19
_CITATION = """
@misc{liu2020logiqa,
bzantium's avatar
bzantium committed
20
    title={LogiQA: A Challenge Dataset for Machine Reading Comprehension with Logical Reasoning},
21
22
23
24
25
26
27
28
29
    author={Jian Liu and Leyang Cui and Hanmeng Liu and Dandan Huang and Yile Wang and Yue Zhang},
    year={2020},
    eprint={2007.08124},
    archivePrefix={arXiv},
    primaryClass={cs.CL}
}
"""


30
class LogiQA(MultipleChoiceTask):
Leo Gao's avatar
Leo Gao committed
31
    VERSION = 0
Jonathan Tow's avatar
Jonathan Tow committed
32
33
    DATASET_PATH = inspect.getfile(lm_eval.datasets.logiqa.logiqa)
    DATASET_NAME = None
34
35
36
37
38
39
40
41
42
43

    def has_training_docs(self):
        return True

    def has_validation_docs(self):
        return True

    def has_test_docs(self):
        return True

Jonathan Tow's avatar
Jonathan Tow committed
44
    def training_docs(self):
Jon Tow's avatar
Jon Tow committed
45
46
47
        if self._training_docs is None:
            self._training_docs = list(map(self._process_doc, self.dataset["train"]))
        return self._training_docs
Jonathan Tow's avatar
Jonathan Tow committed
48
49

    def validation_docs(self):
Jon Tow's avatar
Jon Tow committed
50
        return map(self._process_doc, self.dataset["validation"])
Jonathan Tow's avatar
Jonathan Tow committed
51
52

    def test_docs(self):
Jon Tow's avatar
Jon Tow committed
53
        return map(self._process_doc, self.dataset["test"])
Jonathan Tow's avatar
Jonathan Tow committed
54

Jon Tow's avatar
Jon Tow committed
55
    def _process_doc(self, doc):
Jonathan Tow's avatar
Jonathan Tow committed
56
57
        def format_example(doc, choices):
            """
bzantium's avatar
bzantium committed
58
59
60
61
62
63
64
65
            Passage: <passage>
            Question: <question>
            Choices:
            A. <choice1>
            B. <choice2>
            C. <choice3>
            D. <choice4>
            Answer:
Jonathan Tow's avatar
Jonathan Tow committed
66
            """
Jonathan Tow's avatar
Jonathan Tow committed
67
            prompt = "Passage: " + doc["context"] + "\n"
68
            prompt += "Question: " + doc["question"] + "\nChoices:\n"
Jonathan Tow's avatar
Jonathan Tow committed
69
70
71
72
            for choice, option in zip(choices, doc["options"]):
                prompt += f"{choice.upper()}. {option}\n"
            prompt += "Answer:"
            return prompt
bzantium's avatar
bzantium committed
73
74

        choices = ["a", "b", "c", "d"]
75
        return {
bzantium's avatar
bzantium committed
76
            "passage": doc["context"],  # Used for decontamination
Jonathan Tow's avatar
Jonathan Tow committed
77
            "query": format_example(doc, choices),
78
            "choices": doc["options"],
bzantium's avatar
bzantium committed
79
            "gold": choices.index(doc["label"]),
80
81
82
83
        }

    def doc_to_text(self, doc):
        return doc["query"]
bzantium's avatar
bzantium committed
84
85
86
87
88
89

    def should_decontaminate(self):
        return True

    def doc_to_decontamination_query(self, doc):
        return doc["passage"]