Commit fb8c3df9 authored by sdtblck's avatar sdtblck
Browse files

add lambada

parent b0e715ee
from lm_eval.base import Dataset
from ..utils import sh
import json
import requests
import ftfy
class Lambada(Dataset):
def download(self):
sh("mkdir -p data/lambada")
with open("data/lambada/lambada_test.json", 'w') as f:
req = requests.get("https://storage.googleapis.com/gpt-2/data/lambada_test.jsonl")
req.raise_for_status()
jsons = [json.loads(l) for l in req.iter_lines()]
texts = [ftfy.fix_text(j['text'], normalization='NFKC') for j in jsons]
json.dump(texts, f)
def has_training_docs(self):
return False
def has_validation_docs(self):
return False
def has_test_docs(self):
return True
def training_docs(self):
pass
def validation_docs(self):
pass
def load_doc(self, myjson):
return [doc['text'] for doc in myjson]
def test_docs(self):
myjson = json.load(open("data/lambada/lambada_test.json"))
return self.load_doc(myjson)
def doc_to_text(self, doc, include_target=True):
pass
def evaluate(self, docs, lm, provide_description, num_fewshot):
pass
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment