piqa.py 1.1 KB
Newer Older
Jonathan Tow's avatar
Jonathan Tow committed
1
import numpy as np
&'s avatar
& committed
2
3
from lm_eval.base import rf
from ..metrics import mean
Jonathan Tow's avatar
Jonathan Tow committed
4
from . common import HFTask
Anish Thite's avatar
Anish Thite committed
5

Jonathan Tow's avatar
Jonathan Tow committed
6
7
8
9

class PiQA(HFTask):
    DATASET_PATH = "piqa"
    DATASET_NAME = None
Anish Thite's avatar
Anish Thite committed
10
11
12
13
14
15
16
17

    def has_training_docs(self):
        return True

    def has_validation_docs(self):
        return True

    def has_test_docs(self):
Leo Gao's avatar
Leo Gao committed
18
        return False
Anish Thite's avatar
Anish Thite committed
19
20

    def fewshot_description(self):
Leo Gao's avatar
Leo Gao committed
21
22
        # TODO: figure out fewshot description
        return ""
Jonathan Tow's avatar
Jonathan Tow committed
23

24
    def doc_to_text(self, doc):
Leo Gao's avatar
Leo Gao committed
25
        return "Question: "+doc["goal"] + "\nAnswer:"
Anish Thite's avatar
Anish Thite committed
26

27
    def doc_to_target(self, doc):
Jonathan Tow's avatar
Jonathan Tow committed
28
        solutions = [doc["sol1"], doc["sol2"]]
Leo Gao's avatar
Leo Gao committed
29
        return " " + solutions[doc["label"]]
30

Leo Gao's avatar
Leo Gao committed
31
    def construct_requests(self, doc, ctx):
Leo Gao's avatar
Leo Gao committed
32
33
        ll_1, _ = rf.loglikelihood(ctx, " " + doc['sol1'])
        ll_2, _ = rf.loglikelihood(ctx, " " + doc['sol2'])
Leo Gao's avatar
Leo Gao committed
34
        return ll_1, ll_2
Leo Gao's avatar
Leo Gao committed
35

Jonathan Tow's avatar
Jonathan Tow committed
36
    def process_results(self, doc, results):
Leo Gao's avatar
Leo Gao committed
37
        return {
Jonathan Tow's avatar
Jonathan Tow committed
38
            'acc': np.argmax(results) == doc["label"]
Leo Gao's avatar
Leo Gao committed
39
        }
Leo Gao's avatar
Leo Gao committed
40
41

    def aggregation(self):
Leo Gao's avatar
Leo Gao committed
42
43
44
        return {
            'acc': mean
        }
Leo Gao's avatar
Leo Gao committed
45
46

    def higher_is_better(self):
Leo Gao's avatar
Leo Gao committed
47
48
        return {
            'acc': True
Leo Gao's avatar
Leo Gao committed
49
        }