headqa.py 2.38 KB
Newer Older
1
2
3
4
"""
Interpretable Multi-Step Reasoning with Knowledge Extraction on Complex Healthcare Question Answering
https://aclanthology.org/P19-1092.pdf

bzantium's avatar
bzantium committed
5
HEAD-QA is a multi-choice HEAlthcare Dataset. The questions come from exams to
6
7
8
9
access a specialized position in the Spanish healthcare system, and are challenging
even for highly specialized humans.

Homepage: https://aghie.github.io/head-qa/
10
"""
11
12
import inspect
import lm_eval.datasets.headqa.headqa
13
from lm_eval.base import MultipleChoiceTask
14

15
16

_CITATION = """
17
@misc{liu2020interpretable,
bzantium's avatar
bzantium committed
18
    title={Interpretable Multi-Step Reasoning with Knowledge Extraction on Complex Healthcare Question Answering},
19
20
21
22
23
    author={Ye Liu and Shaika Chowdhury and Chenwei Zhang and Cornelia Caragea and Philip S. Yu},
    year={2020},
    eprint={2008.02434},
    archivePrefix={arXiv},
    primaryClass={cs.AI}
24
25
}
"""
26

27

Jonathan Tow's avatar
Jonathan Tow committed
28
class HeadQABase(MultipleChoiceTask):
Leo Gao's avatar
Leo Gao committed
29
    VERSION = 0
30
    DATASET_PATH = inspect.getfile(lm_eval.datasets.headqa.headqa)
31
32
33
34
35
36
37
38
39
40

    def has_training_docs(self):
        return True

    def has_validation_docs(self):
        return True

    def has_test_docs(self):
        return True

Jonathan Tow's avatar
Jonathan Tow committed
41
42
    def training_docs(self):
        if self._training_docs is None:
Jon Tow's avatar
Jon Tow committed
43
            self._training_docs = list(map(self._process_doc, self.dataset["train"]))
Jon Tow's avatar
Jon Tow committed
44
        return self._training_docs
Jonathan Tow's avatar
Jonathan Tow committed
45
46

    def validation_docs(self):
Jon Tow's avatar
Jon Tow committed
47
        return map(self._process_doc, self.dataset["validation"])
Jonathan Tow's avatar
Jonathan Tow committed
48
49

    def test_docs(self):
Jon Tow's avatar
Jon Tow committed
50
        return map(self._process_doc, self.dataset["test"])
Jonathan Tow's avatar
Jonathan Tow committed
51

Jon Tow's avatar
Jon Tow committed
52
    def _process_doc(self, doc):
53
54
55
56
57
58
59
        out_doc = {
            "id": doc["qid"],
            "query": "Question: " + doc["qtext"] + "\nAnswer:",
            "choices": [answer["atext"] for answer in doc["answers"]],
            "gold": int(doc["ra"]) - 1,
        }
        return out_doc
60

61
62
    def doc_to_text(self, doc):
        return doc["query"]
63

bzantium's avatar
bzantium committed
64
65
66
67
68
69
    def should_decontaminate(self):
        return True

    def doc_to_decontamination_query(self, doc):
        return doc["query"]

Jonathan Tow's avatar
Jonathan Tow committed
70

71
72
73
class HeadQAEn(HeadQABase):
    DATASET_NAME = "en"

Jonathan Tow's avatar
Jonathan Tow committed
74

75
class HeadQAEs(HeadQABase):
76
77
    DATASET_NAME = "es"

Jonathan Tow's avatar
Jonathan Tow committed
78

79
80
81
82
# for backwards compatibility
class HeadQAEsDeprecated(HeadQABase):
    DATASET_NAME = "es"

83
84
    def __init__(self):
        super().__init__()
bzantium's avatar
bzantium committed
85
86
87
        print(
            "WARNING: headqa is deprecated. Please use headqa_es or headqa_en instead. See https://github.com/EleutherAI/lm-evaluation-harness/pull/240 for more info."
        )