lambada.py 1.64 KB
Newer Older
&'s avatar
& committed
1
2
from lm_eval.base import Task, rf
from lm_eval.metrics import mean, perplexity
sdtblck's avatar
sdtblck committed
3
from lm_eval.utils import sh
sdtblck's avatar
sdtblck committed
4
import json
Leo Gao's avatar
Leo Gao committed
5
6
import math
from best_download import download_file
sdtblck's avatar
sdtblck committed
7
8


9
class LAMBADA(Task):
sdtblck's avatar
sdtblck committed
10
11
    def download(self):
        sh("mkdir -p data/lambada")
Leo Gao's avatar
Leo Gao committed
12
        download_file(
Leo Gao's avatar
Leo Gao committed
13
            "http://eaidata.bmk.sh/data/lambada_test.jsonl", 
Leo Gao's avatar
Leo Gao committed
14
15
16
            "data/lambada/lambada_test.jsonl", 
            "4aa8d02cd17c719165fc8a7887fddd641f43fcafa4b1c806ca8abc31fabdb226"
        )
sdtblck's avatar
sdtblck committed
17
18
19
20
21

    def has_training_docs(self):
        return False

    def has_validation_docs(self):
Leo Gao's avatar
Leo Gao committed
22
        return True
sdtblck's avatar
sdtblck committed
23
24

    def has_test_docs(self):
Leo Gao's avatar
Leo Gao committed
25
        return False
sdtblck's avatar
sdtblck committed
26
27
28
29
30

    def training_docs(self):
        pass

    def validation_docs(self):
Leo Gao's avatar
Leo Gao committed
31
32
33
34
        with open("data/lambada/lambada_test.jsonl") as fh:
            for line in fh:
                yield json.loads(line)

Leo Gao's avatar
Leo Gao committed
35
36
37
    def test_docs(self):
        pass

Leo Gao's avatar
Leo Gao committed
38
39
    def doc_to_text(self, doc):
        return doc['text'].rsplit(' ', 1)[0]
sdtblck's avatar
sdtblck committed
40

Leo Gao's avatar
Leo Gao committed
41
42
    def doc_to_target(self, doc):
        return " " + doc['text'].rsplit(' ', 1)[1]
Leo Gao's avatar
Leo Gao committed
43
44
45
46
    
    def fewshot_description(self):
        # TODO: figure out description
        return ""
sdtblck's avatar
sdtblck committed
47

Leo Gao's avatar
Leo Gao committed
48
    def construct_requests(self, doc, ctx):
Leo Gao's avatar
Leo Gao committed
49
        ll, is_greedy = rf.loglikelihood(ctx, self.doc_to_target(doc))
50

Leo Gao's avatar
Leo Gao committed
51
        return ll, is_greedy
Leo Gao's avatar
Leo Gao committed
52
53
    
    def process_results(self, doc, results):
Leo Gao's avatar
Leo Gao committed
54
        ll, is_greedy = results
Leo Gao's avatar
Leo Gao committed
55

Leo Gao's avatar
Leo Gao committed
56
        return {
Leo Gao's avatar
Leo Gao committed
57
58
            'ppl': ll,
            'acc': int(is_greedy)
Leo Gao's avatar
Leo Gao committed
59
60
        }
        
Leo Gao's avatar
Leo Gao committed
61
    def aggregation(self):
Leo Gao's avatar
Leo Gao committed
62
        return {
Leo Gao's avatar
Leo Gao committed
63
64
            'ppl': perplexity,
            'acc': mean
Leo Gao's avatar
Leo Gao committed
65
        }
Leo Gao's avatar
Leo Gao committed
66
67

    def higher_is_better(self):
Leo Gao's avatar
Leo Gao committed
68
        return {
Leo Gao's avatar
Leo Gao committed
69
70
            'ppl': False,
            'acc': True
Leo Gao's avatar
Leo Gao committed
71
        }