lambada.py 1.67 KB
Newer Older
Leo Gao's avatar
Leo Gao committed
1
from lm_eval.base import Dataset, rf, mean
sdtblck's avatar
sdtblck committed
2
from lm_eval.utils import sh
sdtblck's avatar
sdtblck committed
3
import json
Leo Gao's avatar
Leo Gao committed
4
5
import math
from best_download import download_file
sdtblck's avatar
sdtblck committed
6
7


Leo Gao's avatar
Leo Gao committed
8
class LAMBADA(Dataset):
sdtblck's avatar
sdtblck committed
9
10
    def download(self):
        sh("mkdir -p data/lambada")
Leo Gao's avatar
Leo Gao committed
11
12
13
14
15
        download_file(
            "https://storage.googleapis.com/gpt-2/data/lambada_test.jsonl", 
            "data/lambada/lambada_test.jsonl", 
            "4aa8d02cd17c719165fc8a7887fddd641f43fcafa4b1c806ca8abc31fabdb226"
        )
sdtblck's avatar
sdtblck committed
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32

    def has_training_docs(self):
        return False

    def has_validation_docs(self):
        return False

    def has_test_docs(self):
        return True

    def training_docs(self):
        pass

    def validation_docs(self):
        pass

    def test_docs(self):
Leo Gao's avatar
Leo Gao committed
33
34
35
36
37
38
        with open("data/lambada/lambada_test.jsonl") as fh:
            for line in fh:
                yield json.loads(line)

    def doc_to_text(self, doc):
        return doc['text'].rsplit(' ', 1)[0]
sdtblck's avatar
sdtblck committed
39

Leo Gao's avatar
Leo Gao committed
40
41
    def doc_to_target(self, doc):
        return " " + doc['text'].rsplit(' ', 1)[1]
Leo Gao's avatar
Leo Gao committed
42
43
44
45
    
    def fewshot_description(self):
        # TODO: figure out description
        return ""
sdtblck's avatar
sdtblck committed
46

Leo Gao's avatar
Leo Gao committed
47
    def construct_requests(self, doc, ctx):
Leo Gao's avatar
Leo Gao committed
48
        ll, is_greedy = rf.loglikelihood(doc, self.doc_to_target(doc))
49

Leo Gao's avatar
Leo Gao committed
50
        return ll, is_greedy
Leo Gao's avatar
Leo Gao committed
51
52
    
    def process_results(self, doc, results):
Leo Gao's avatar
Leo Gao committed
53
        ll, is_greedy = results
Leo Gao's avatar
Leo Gao committed
54

Leo Gao's avatar
Leo Gao committed
55
56
57
58
59
        return {
            'perplexity': math.exp(-ll),
            'accuracy': int(is_greedy)
        }
        
Leo Gao's avatar
Leo Gao committed
60
    def aggregation(self):
Leo Gao's avatar
Leo Gao committed
61
62
63
64
        return {
            'perplexity': mean,
            'accuracy': mean
        }
Leo Gao's avatar
Leo Gao committed
65
66

    def higher_is_better(self):
Leo Gao's avatar
Leo Gao committed
67
68
69
        return {
            'perplexity': False,
            'accuracy': True
Leo Gao's avatar
Leo Gao committed
70
        }