logiqa.py 3.7 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
"""
LogiQA: A Challenge Dataset for Machine Reading Comprehension with Logical Reasoning
https://arxiv.org/pdf/2007.08124.pdf

LogiQA is a dataset for testing human logical reasoning. It consists of 8,678 QA
instances, covering multiple types of deductive reasoning. Results show that state-
of-the-art neural models perform by far worse than human ceiling. The dataset can
also serve as a benchmark for reinvestigating logical AI under the deep learning
NLP setting.

Homepage: https://github.com/lgw863/LogiQA-dataset
12
"""
13
14
15
16
17
from lm_eval.base import MultipleChoiceTask
from best_download import download_file
from pathlib import Path


18
19
20
21
22
23
24
25
26
27
28
29
_CITATION = """
@misc{liu2020logiqa,
    title={LogiQA: A Challenge Dataset for Machine Reading Comprehension with Logical Reasoning}, 
    author={Jian Liu and Leyang Cui and Hanmeng Liu and Dandan Huang and Yile Wang and Yue Zhang},
    year={2020},
    eprint={2007.08124},
    archivePrefix={arXiv},
    primaryClass={cs.CL}
}
"""


30
class LogiQA(MultipleChoiceTask):
Leo Gao's avatar
Leo Gao committed
31
    VERSION = 0
32
33
34
35
36
    DATASET_PATH = Path("data/logiqa")

    def download(self):
        if self.DATASET_PATH.exists():
            return
Jun Shern Chan's avatar
Jun Shern Chan committed
37
        Path.mkdir(self.DATASET_PATH, parents=True)
38
39
40
41
42
43
44
45
        base_url = "https://raw.githubusercontent.com/lgw863/LogiQA-dataset/master"
        splits = [
            {"name": "Train", "checksum": "7d5bb1f58278e33b395744cd2ad8d7600faa0b3c4d615c659a44ec1181d759fa"},
            {"name": "Eval", "checksum": "4c49e6753b7262c001506b9151135abf722247035ab075dad93acdea5789c01f"},
            {"name": "Test", "checksum": "359acb78c37802208f7fde9e2f6574b8526527c63d6a336f90a53f1932cb4701"}
        ]
        for split in splits:
            file = self.DATASET_PATH / f"{split['name']}.txt"
46
            download_file(f"{base_url}/{split['name']}.txt", local_file=str(file), expected_checksum=split["checksum"])
47
48
49
50
51
52
53
54
55
56
57

    def has_training_docs(self):
        return True

    def has_validation_docs(self):
        return True

    def has_test_docs(self):
        return True

    def _convert_standard(self, doc):
Jonathan Tow's avatar
Jonathan Tow committed
58
59
60
61
        def format_example(doc, choices):
            """
                Passage: <passage>
                Question: <question>
62
                Choices:
Jonathan Tow's avatar
Jonathan Tow committed
63
64
65
66
67
68
69
                A. <choice1>
                B. <choice2>
                C. <choice3>
                D. <choice4>
                Answer:
            """
            prompt = "Passage: " + doc["passage"] + "\n"
70
            prompt += "Question: " + doc["question"] + "\nChoices:\n"
Jonathan Tow's avatar
Jonathan Tow committed
71
72
73
74
75
            for choice, option in zip(choices, doc["options"]):
                prompt += f"{choice.upper()}. {option}\n"
            prompt += "Answer:"
            return prompt
        choices = ['a', 'b', 'c', 'd']
76
        return {
Jonathan Tow's avatar
Jonathan Tow committed
77
            "query": format_example(doc, choices),
78
            "choices": doc["options"],
Jonathan Tow's avatar
Jonathan Tow committed
79
            "gold": choices.index(doc["answerKey"])
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
        }

    def _load_docs(self, filename):
        def normalize(text):
            return text.replace(".", ". ").strip()

        with open(filename, 'r') as f:
            docs = f.read().strip().split("\n\n")
        for rawdoc in docs:
            rawdoc = rawdoc.split("\n")
            doc = {
                "answerKey": rawdoc[0].strip(),
                "passage": normalize(rawdoc[1]),
                "question": normalize(rawdoc[2]),
                "options": [normalize(option[2:]) for option in rawdoc[3:]]
            }
            yield self._convert_standard(doc)

    def training_docs(self):
        return self._load_docs(self.DATASET_PATH / "Train.txt")

    def validation_docs(self):
        return self._load_docs(self.DATASET_PATH / "Eval.txt")

    def test_docs(self):
        return self._load_docs(self.DATASET_PATH / "Test.txt")

    def doc_to_text(self, doc):
        return doc["query"]