headqa.py 1.81 KB
Newer Older
1
2
3
4
5
6
7
8
9
"""
Interpretable Multi-Step Reasoning with Knowledge Extraction on Complex Healthcare Question Answering
https://aclanthology.org/P19-1092.pdf

HEAD-QA is a multi-choice HEAlthcare Dataset. The questions come from exams to 
access a specialized position in the Spanish healthcare system, and are challenging
even for highly specialized humans.

Homepage: https://aghie.github.io/head-qa/
10
11
12
"""
from . common import HFTask
from lm_eval.base import MultipleChoiceTask
13

14
15

_CITATION = """
16
@misc{liu2020interpretable,
17
18
19
20
21
22
    title={Interpretable Multi-Step Reasoning with Knowledge Extraction on Complex Healthcare Question Answering}, 
    author={Ye Liu and Shaika Chowdhury and Chenwei Zhang and Cornelia Caragea and Philip S. Yu},
    year={2020},
    eprint={2008.02434},
    archivePrefix={arXiv},
    primaryClass={cs.AI}
23
24
}
"""
25

26

27
class HeadQABase(HFTask, MultipleChoiceTask):
Leo Gao's avatar
Leo Gao committed
28
    VERSION = 0
29
30
31
32
33
34
35
36
37
38
39
    DATASET_PATH = "head_qa"

    def has_training_docs(self):
        return True

    def has_validation_docs(self):
        return True

    def has_test_docs(self):
        return True

40
41
42
43
44
45
46
47
    def _convert_standard(self, doc):
        out_doc = {
            "id": doc["qid"],
            "query": "Question: " + doc["qtext"] + "\nAnswer:",
            "choices": [answer["atext"] for answer in doc["answers"]],
            "gold": int(doc["ra"]) - 1,
        }
        return out_doc
48

49
50
    def doc_to_text(self, doc):
        return doc["query"]
51
52
53
54
55

class HeadQAEn(HeadQABase):
    DATASET_NAME = "en"

class HeadQAEs(HeadQABase):
56
57
58
59
60
61
    DATASET_NAME = "es"

# for backwards compatibility
class HeadQAEsDeprecated(HeadQABase):
    DATASET_NAME = "es"

62
63
64
    def __init__(self):
        super().__init__()
        print("WARNING: headqa is deprecated. Please use headqa_es or headqa_en instead. See https://github.com/EleutherAI/lm-evaluation-harness/pull/240 for more info.")