"lm_eval/tasks/gpqa/zeroshot/utils.py" did not exist on "45941c67dd531196035fed24f4ee16b249297325"
qasper.py 3.24 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
from lm_eval.base import rf
from .common import HFTask


class QASPER(HFTask):
    VERSION = 0
    DATASET_PATH = "qasper"
    DATASET_NAME = None

    def doc_to_text(self, doc):
        # this method is invoked by tests only
        return (
            "TITLE: "
            + doc["title"]
            + "\n"
            + "ABSTRACT: "
            + doc["abstract"]
            + "\n\n"
            + "Q: "
            + doc["question"]
            + "\n\n"
            + "A: "
        )

    def doc_to_target(self, doc):
        # this method is invoked by tests only
        return " " + doc["answer_str"]

    def training_docs(self):
        for doc in self.data["train"]:
            yield from self.process_doc(doc)

    def validation_docs(self):
        for doc in self.data["train"]:
            yield from self.process_doc(doc)

    def process_doc(self, doc):
        """Given a `doc`, flatten it out so that each JSON blob
        contains exactly one question and one answer. Logic taken from
        the reference implementation available at
        https://github.com/allenai/qasper-led-baseline/blob/main/scripts/evaluator.py
        """
        obs_list = []
        for qa in doc["qas"]:
            for question, answer_list in zip(qa["question"], qa["answers"]):
                for answer in answer_list:
                    if answer["unanswerable"]:
                        answer_str = "unanswerable"
                        answer_type = "unanswerable"
                    elif answer["yes_no"]:
                        answer_str = "Yes"
                        answer_type = "bool"
                    elif answer["yes_no"] is not None:
                        answer_str = "No"
                        answer_type = "bool"
                    elif answer["free_form_answer"]:
                        answer_str = answer["free_form_answer"]
                        answer_type = "free form answer"
                    elif answer["extractive_spans"]:
                        answer_str = ", ".join(answer["extractive_spans"])
                        answer_type = "extractive spans"
                    obs_list.append[
                        {
                            "title": doc["title"],
                            "abstract": doc["abstract"],
                            "question": question,
                            "answer_str": answer_str,
                            "answer_type": answer_type,
                        }
                    ]
        return obs_list

    def process_results(self, doc, results):
        return super().process_results(doc, results)

    def construct_requests(self, doc, ctx):
        """Uses RequestFactory to construct Requests and returns an iterable of
        Requests which will be sent to the LM.

        :param doc:
            The document as returned from training_docs, validation_docs, or test_docs.
        :param ctx: str
            The context string, generated by fewshot_context. This includes the natural
            language description, as well as the few shot examples, and the question
            part of the document for `doc`.
        """
        continuation = rf.greedy_until(ctx, ["\n"])
        is_unanswerable = rf.loglikelihood(ctx, " " + "unanswerable")
        return continuation, is_unanswerable