triviaqa.py 1.44 KB
Newer Older
Anish Thite's avatar
Anish Thite committed
1
2
3
4
5
6
7
8
9
import json
import random
from lm_eval.base import Dataset
from ..utils import sh

class TriviaQA(Dataset):
    def __init__(self):
        self.download()
    def download(self):
Anish Thite's avatar
Anish Thite committed
10
        #pass
Anish Thite's avatar
Anish Thite committed
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
        #TODO: don't download if files already there
        sh("""
           mkdir -p data/triviaqa
           wget http://nlp.cs.washington.edu/triviaqa/data/triviaqa-unfiltered.tar.gz -O data/triviaqa/trivia_qa-unfiltered.tar.gz
           tar -xf data/triviaqa/trivia_qa-unfiltered.tar.gz
           mv triviaqa-unfiltered/ data/triviaqa/
           """)

    def has_training_docs(self):
        return True

    def has_validation_docs(self):
        return True

    def has_test_docs(self):
        return True

    def training_docs(self):
        return json.load(open('data/triviaqa/triviaqa-unfiltered/unfiltered-web-train.json'))['Data']

    def validation_docs(self):
        return  json.load(open('data/triviaqa/triviaqa-unfiltered/unfiltered-web-dev.json'))['Data']

    def test_docs(self):
        return  json.load(open('data/triviaqa/triviaqa-unfiltered/unfiltered-web-test.json'))['Data']     
    
    def fewshot_description(self):
        pass
    
    def doc_to_text(self, doc, include_target=True):
Anish Thite's avatar
Anish Thite committed
41
42
43
44
        if include_target:
            return ''.join(['Q: ', doc['Question'], '\n\n','A: ', doc['Answer']['Aliases'][0]])
        else:
            return ''.join(['Q: ', doc['Question'], '\n\n','A: '])
Anish Thite's avatar
Anish Thite committed
45
46
47
    def evaluate(self, docs, lm):
        pass