pubmedqa.py 3.3 KB
Newer Older
1
2
3
4
5
"""
PubMedQA: A Dataset for Biomedical Research Question Answering
https://arxiv.org/pdf/1909.06146.pdf

PubMedQA is a novel biomedical question answering (QA) dataset collected from
Fabrizio Milo's avatar
Fabrizio Milo committed
6
7
8
9
PubMed abstracts. The task of PubMedQA is to answer research questions with
yes/no/maybe (e.g.: Do preoperative statins reduce atrial fibrillation after
coronary artery bypass grafting?) using the corresponding abstracts. PubMedQA
has 1k expert-annotated, 61.2k unlabeled and 211.3k artificially generated QA
10
11
12
instances. Each PubMedQA instance is composed of (1) a question which is either
an existing research article title or derived from one, (2) a context which is
the corresponding abstract without its conclusion, (3) a long answer, which is
Fabrizio Milo's avatar
Fabrizio Milo committed
13
the conclusion of the abstract and, presumably, answers the research question,
14
15
16
17
and (4) a yes/no/maybe answer which summarizes the conclusion.

Homepage: https://pubmedqa.github.io/
"""
jeffhsu3's avatar
jeffhsu3 committed
18
import numpy as np
Jonathan Tow's avatar
Jonathan Tow committed
19
20
from lm_eval.base import rf, Task
from lm_eval.metrics import mean
jeffhsu3's avatar
jeffhsu3 committed
21
22


23
24
25
26
27
28
29
30
31
32
33
_CITATION = """
@inproceedings{jin2019pubmedqa,
    title={PubMedQA: A Dataset for Biomedical Research Question Answering},
    author={Jin, Qiao and Dhingra, Bhuwan and Liu, Zhengping and Cohen, William and Lu, Xinghua},
    booktitle={Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)},
    pages={2567--2577},
    year={2019}
}
"""


Jonathan Tow's avatar
Jonathan Tow committed
34
class Pubmed_QA(Task):
Leo Gao's avatar
Leo Gao committed
35
    VERSION = 0
Tanishq Abraham's avatar
Tanishq Abraham committed
36
37
    DATASET_PATH = "bigbio/pubmed_qa"
    DATASET_NAME = "pubmed_qa_labeled_fold0_source"
jeffhsu3's avatar
jeffhsu3 committed
38
39

    def has_training_docs(self):
Tanishq Abraham's avatar
Tanishq Abraham committed
40
        return True
jeffhsu3's avatar
jeffhsu3 committed
41
42

    def has_validation_docs(self):
Tanishq Abraham's avatar
Tanishq Abraham committed
43
        return True
jeffhsu3's avatar
jeffhsu3 committed
44

jeffhsu3's avatar
jeffhsu3 committed
45
46
47
    def has_test_docs(self):
        return True

Tanishq Abraham's avatar
Tanishq Abraham committed
48
49
50
51
52
53
54
55
56
57
    def training_docs(self):
        if self.has_training_docs():
            if self._training_docs is None:
                self._training_docs = self.dataset["train"]
            return self._training_docs

    def validation_docs(self):
        if self.has_validation_docs():
            return self.dataset["validation"]

58
59
    def test_docs(self):
        if self.has_test_docs():
Tanishq Abraham's avatar
Tanishq Abraham committed
60
            return self.dataset["test"]
61

jeffhsu3's avatar
jeffhsu3 committed
62
    def doc_to_text(self, doc):
Tanishq Abraham's avatar
Tanishq Abraham committed
63
        ctxs = "\n".join(doc["CONTEXTS"])
Leo Gao's avatar
Leo Gao committed
64
        return "Abstract: {}\nQuestion: {}\nAnswer:".format(
Tanishq Abraham's avatar
Tanishq Abraham committed
65
            ctxs, doc["QUESTION"], doc["final_decision"]
jeffhsu3's avatar
jeffhsu3 committed
66
67
        )

68
69
70
71
    def should_decontaminate(self):
        return True

    def doc_to_decontamination_query(self, doc):
Tanishq Abraham's avatar
Tanishq Abraham committed
72
        return doc["question"] + " " + "\n".join(doc["CONTEXTS"])
73

jeffhsu3's avatar
jeffhsu3 committed
74
    def doc_to_target(self, doc):
jeffhsu3's avatar
jeffhsu3 committed
75
        return " {}".format(doc["final_decision"])
jeffhsu3's avatar
jeffhsu3 committed
76
77

    def construct_requests(self, doc, ctx):
Fabrizio Milo's avatar
Fabrizio Milo committed
78
        """Uses RequestFactory to construct Requests and returns
jeffhsu3's avatar
jeffhsu3 committed
79
80
81
82
83
84
85
86
        an iterable of Requests which will be sent to the LM.
        """
        ll_yes, _ = rf.loglikelihood(ctx, " yes")
        ll_no, _ = rf.loglikelihood(ctx, " no")
        ll_maybe, _ = rf.loglikelihood(ctx, " maybe")
        return ll_yes, ll_no, ll_maybe

    def process_results(self, doc, results):
jeffhsu3's avatar
jeffhsu3 committed
87
        gold = doc["final_decision"]
jeffhsu3's avatar
jeffhsu3 committed
88
89
90
        ll_yes, ll_no, ll_maybe = results
        pred = np.argmax(results)
        return {
Fabrizio Milo's avatar
Fabrizio Milo committed
91
            "acc": ["yes", "no", "maybe"][pred] == gold,
jeffhsu3's avatar
jeffhsu3 committed
92
93
94
        }

    def aggregation(self):
Fabrizio Milo's avatar
Fabrizio Milo committed
95
        return {"acc": mean}
jeffhsu3's avatar
jeffhsu3 committed
96
97

    def higher_is_better(self):
Fabrizio Milo's avatar
Fabrizio Milo committed
98
        return {"acc": True}