Unverified Commit 4a294d8a authored by Charles Foster's avatar Charles Foster Committed by GitHub
Browse files

Merge pull request #3 from EleutherAI/master

Sync up to EAI
parents 302ca3d6 946cb2bc
......@@ -4,16 +4,19 @@ from . import arc
from . import race
from . import webqs
from . import anli
from . import wsc273
from . import winogrande
from . import quac
from . import hellaswag
from . import openbookqa
from . import squad
from . import naturalqs
TASK_REGISTRY = {
# GLUE
"cola": glue.CoLA,
"mnli": glue.MNLI,
"mnli_mismatched": glue.MNLIMismatched,
"mrpc": glue.MRPC,
"rte": glue.RTE,
"qnli": glue.QNLI,
......@@ -27,7 +30,7 @@ TASK_REGISTRY = {
"copa": superglue.Copa,
"multirc": superglue.MultiRC,
"wic": superglue.WordsInContext,
"wsc": superglue.WinogradSchemaChallenge,
"wsc": superglue.SGWinogradSchemaChallenge,
# Order by benchmark/genre?
"arc_easy": arc.ARCEasy,
"arc_challenge": arc.ARCChallenge,
......@@ -36,7 +39,9 @@ TASK_REGISTRY = {
"openbookqa": openbookqa.OpenBookQA,
"squad": squad.SQuAD,
"race": race.RACE,
"naturalqs": naturalqs.NaturalQs,
"webqs": webqs.WebQs,
"wsc273": wsc273.WinogradSchemaChallenge273,
"winogrande": winogrande.Winogrande,
"anli_r1": anli.ANLIRound1,
"anli_r2": anli.ANLIRound2,
......
......@@ -5,8 +5,10 @@ from ..utils import sh
class CoQA(Dataset):
def __init__(self):
self.download()
def download(self):
#TODO: don't download if files already there
sh("""
mkdir -p data/coqa
wget http://downloads.cs.stanford.edu/nlp/data/coqa/coqa-train-v1.0.json -O data/coqa/coqa-train-v1.0.json
......@@ -17,48 +19,34 @@ class CoQA(Dataset):
return True
def has_validation_docs(self):
return False
return True
def has_test_docs(self):
return False
def training_docs(self):
myjson = json.load(open('data/coqa/coqa-train-v1.0.json'))['data']
return self.load_doc(myjson)
return json.load(open('data/coqa/coqa-train-v1.0.json'))['data']
def validation_docs(self):
pass
return json.load(open('data/coqa/coqa-dev-v1.0.json'))['data']
def test_docs(self):
myjson = json.load(open('data/coqa/coqa-dev-v1.0.json'))['data']
return self.load_doc(myjson)
def fewshot_examples(self, k):
traindocs = list(self.training_docs())
random.seed(123)
random.shuffle(traindocs)
return traindocs[:k]
pass
def fewshot_description(self):
pass
def load_doc(self, myjson):
docs = []
for item in myjson:
new_instance = [item['story']]
qa_pairs = zip(item['questions'], item['answers'])
for pair in qa_pairs:
new_instance.append('\n')
new_instance.append(''.join(['Q: ',pair[0]['input_text']]))
new_instance.append(''.join(['A: ',pair[1]['input_text']]))
docs.append(new_instance)
return docs
def doc_to_text(self, doc, include_target=True):
text = '\n<|endoftext|>\n'.join(['\n'.join(instance) for instance in doc])
text = text + '\n<|endoftext|>'
return text
text = [doc['story']]
for pair in zip(doc['questions'], doc['answers']):
text.append('\n\n')
text.append(''.join(['Q: ',pair[0]['input_text'], '\n\n']))
if include_target:
text.append(''.join(['A: ',pair[1]['input_text']]))
else:
text.append('A: ')
return ''.join(text)
def evaluate(self, docs, lm):
pass
......@@ -10,6 +10,9 @@ from ..base import Dataset
class DROP(Dataset):
DATAFOLDER = Path(__file__).parent / "../../data/drop"
def __init__(self):
self.download()
def has_training_docs(self):
"""Whether the task has a training set"""
return True
......@@ -35,10 +38,10 @@ class DROP(Dataset):
pass
def doc_to_text(self, doc, include_target=True):
doctext = "Passage: {}\n\n".format(doc["passage"])
doctext = "Passage: {}\n".format(doc["passage"])
qa_texts = []
for pair in doc["qa_pairs"]:
text = ''.join(['Q: ', pair['question'],'\nA: '])
text = ''.join(['Question: ', pair['question'],'\nAnswer: '])
if include_target:
def get_answer(ans_dict):
if ans_dict['number'] != '':
......@@ -52,7 +55,7 @@ class DROP(Dataset):
ans_dict['date']['year']]).strip()
text = ''.join([text, get_answer(pair['answer'])])
qa_texts.append(text)
return ''.join([doctext, '\n\n'.join(qa_texts)])
return ''.join([doctext, '\n'.join(qa_texts)])
def evaluate(self, docs, lm, provide_description, num_fewshot):
......
......@@ -114,6 +114,17 @@ class MNLI(HFTask):
return simple_accuracy_metric(preds=preds, golds=golds)
class MNLIMismatched(MNLI):
def validation_docs(self):
if self.has_validation_docs():
return self.data["validation_mismatched"]
def test_docs(self):
if self.has_test_docs():
return self.data["test_mismatched"]
class MRPC(HFTask):
DATASET_PATH = "glue"
DATASET_NAME = "mrpc"
......
......@@ -6,7 +6,8 @@ import ftfy
class Lambada(Dataset):
def __init__(self):
self.download()
def download(self):
sh("mkdir -p data/lambada")
with open("data/lambada/lambada_test.json", 'w') as f:
......@@ -32,14 +33,16 @@ class Lambada(Dataset):
pass
def load_doc(self, myjson):
return [doc['text'] for doc in myjson]
return [doc for doc in myjson]
def test_docs(self):
myjson = json.load(open("data/lambada/lambada_test.json"))
return self.load_doc(myjson)
def doc_to_text(self, doc, include_target=True):
pass
#TODO: check if this is how OA does it
#label = doc[]
return doc
def evaluate(self, docs, lm, provide_description, num_fewshot):
pass
\ No newline at end of file
from . common import HFTask
class NaturalQs(HFTask):
DATASET_PATH = "natural_questions"
DATASET_NAME = None
def has_training_docs(self):
return True
def has_validation_docs(self):
return True
def has_test_docs(self):
return False
def fewshot_description(self):
# TODO: figure out description
return ""
def training_docs(self):
# Cache training for faster few-shot.
# Data is too large to fit in memory.
return self.data["train"]
def doc_to_text(self, doc, include_target=True):
question = doc['question']['text']
text = 'Q: ' + question + '\n\n' + 'A: '
if include_target:
# There's a short answer and a long answer. Based on the paper, I'm using the long answer.
short_answer = doc['annotations']['short_answers'][0]['text']
long_answer_start = doc['annotations']['long_answer'][0]['start_token']
long_answer_end = doc['annotations']['long_answer'][0]['end_token']
long_answer_span = doc['document']['tokens']['token'][long_answer_start:long_answer_end]
long_answer_is_html = doc['document']['tokens']['is_html'][long_answer_start:long_answer_end]
long_answer_chars = [tok for (tok, is_html) in zip(long_answer_span, long_answer_is_html) if not is_html]
long_answer = " ".join(long_answer_chars)
text += long_answer # Replace with short_answer[0] for short answer
return text
def evaluate(self, docs, lm, provide_description, num_fewshot):
# TODO: implement
raise NotImplementedError()
\ No newline at end of file
import json
import random
from lm_eval.base import Dataset
from ..utils import sh
class PiQA(Dataset):
def __init__(self):
self.download()
def download(self):
#pass
#TODO: don't download if files already there
sh("""
mkdir -p data/piqa
wget https://yonatanbisk.com/piqa/data/train.jsonl -O data/piqa/piqa-train.jsonl
wget https://yonatanbisk.com/piqa/data/train-labels.lst -O data/piqa/piqa-train-labels.lst
wget https://yonatanbisk.com/piqa/data/valid.jsonl -O data/piqa/piqa-valid.jsonl
wget https://yonatanbisk.com/piqa/data/valid-labels.lst -O data/piqa/piqa-valid-labels.lst
wget https://yonatanbisk.com/piqa/data/tests.jsonl -O data/piqa/piqa-test.jsonl
""")
def has_training_docs(self):
return True
def has_validation_docs(self):
return True
def has_test_docs(self):
return True
def load_docs(self, textfilename, labelfilename):
if labelfilename != None:
return zip([json.loads(entry) for entry in list(open(textfilename,'r'))],list(open(labelfilename, 'r')))
else:
return [json.loads(entry) for entry in list(open(textfilename,'r'))]
def training_docs(self):
return self.load_docs('data/piqa/piqa-train.jsonl', 'data/piqa/piqa-train-labels.lst')
def validation_docs(self):
return self.load_docs('data/piqa/piqa-valid.jsonl', 'data/piqa/piqa-valid-labels.lst')
def test_docs(self):
return self.load_docs('data/piqa/piqa-test.jsonl', None)
def fewshot_description(self):
pass
def doc_to_text(self, doc, include_target=True):
if include_target:
rightanswer = int(doc[1][0])+1
return ''.join([doc[0]['goal'],' ',doc[0]['sol'+str(rightanswer)]])
#TODO: check if oa uses newline
return doc['goal'] + ' '
def evaluate(self, docs, lm):
pass
import json
import random
from lm_eval.base import Dataset
from ..utils import sh
import csv
class StoryCloze(Dataset):
def __init__(self):
self.download()
def download(self):
#TODO: replace with Eye link
pass
def has_training_docs(self):
return False
def has_validation_docs(self):
return True
def has_test_docs(self):
return True
def training_docs(self):
pass
def load_doc(self, filename):
with open(filename, newline='') as file:
filereader = csv.reader(file)
return list(filereader)
def validation_docs(self):
return self.load_doc("data/storycloze/cloze_test_val__winter2018-cloze_test_ALL_val - 1 - 1.csv")
def test_docs(self):
return self.load_doc("data/storycloze/cloze_test_test__winter2018-cloze_test_ALL_test - 1.csv")
def fewshot_description(self):
pass
def doc_to_text(self, doc, include_target=True):
if include_target:
return ' '.join([*doc[1:5],doc[int(doc[-1])-4]])
else:
return ' '.join([*doc[1:5]])
def evaluate(self, docs, lm):
pass
......@@ -218,7 +218,7 @@ class WordsInContext(HFTask):
return simple_accuracy_metric(preds=preds, golds=golds)
class WinogradSchemaChallenge(HFTask):
class SGWinogradSchemaChallenge(HFTask):
DATASET_PATH = "super_glue"
DATASET_NAME = "wsc"
......@@ -282,3 +282,25 @@ class WinogradSchemaChallenge(HFTask):
)
preds.append(1 if generated == to_predict else 0)
return simple_accuracy_metric(preds=preds, golds=golds)
class RTE(HFTask):
DATASET_PATH = "super_glue"
DATASET_NAME = "rte"
def fewshot_description(self):
#TODO: implement
pass
def doc_to_text(self, doc, include_target=True):
if include_target:
if doc['label'] == 0:
answer = 'True'
else:
answer = 'False'
return ''.join([doc['premise'], '\nquestion: ',doc['hypothesis'], ' True or False?\nanswer: ', answer])
else:
return ''.join([doc['premise'], '\nquestion: ',doc['hypothesis'], ' True or False?\nanswer: '])
def evaluate(self, docs, lm, provide_description, num_fewshot):
#TODO:
pass
import json
import random
from lm_eval.base import Dataset
from ..utils import sh
class TriviaQA(Dataset):
def __init__(self):
self.download()
def download(self):
#pass
#TODO: don't download if files already there
sh("""
mkdir -p data/triviaqa
wget http://nlp.cs.washington.edu/triviaqa/data/triviaqa-unfiltered.tar.gz -O data/triviaqa/trivia_qa-unfiltered.tar.gz
tar -xf data/triviaqa/trivia_qa-unfiltered.tar.gz
mv triviaqa-unfiltered/ data/triviaqa/
""")
def has_training_docs(self):
return True
def has_validation_docs(self):
return True
def has_test_docs(self):
return True
def training_docs(self):
return json.load(open('data/triviaqa/triviaqa-unfiltered/unfiltered-web-train.json'))['Data']
def validation_docs(self):
return json.load(open('data/triviaqa/triviaqa-unfiltered/unfiltered-web-dev.json'))['Data']
def test_docs(self):
return json.load(open('data/triviaqa/triviaqa-unfiltered/unfiltered-web-test.json'))['Data']
def fewshot_description(self):
pass
def doc_to_text(self, doc, include_target=True):
if include_target:
return ''.join(['Q: ', doc['Question'], '\n\n','A: ', doc['Answer']['Aliases'][0]])
else:
return ''.join(['Q: ', doc['Question'], '\n\n','A: '])
def evaluate(self, docs, lm):
pass
import numpy as np
from scipy.stats import pearsonr, spearmanr
from sklearn.metrics import f1_score, matthews_corrcoef
from tqdm import auto as tqdm_lib
from . common import NLP_TASK, simple_accuracy_metric, yesno
class WikiText103(NLP_TASK):
NLP_PATH = "wikitext"
NLP_NAME = "wikitext-103-raw-v1"
def fewshot_description(self):
return ""
def doc_to_text(self, doc, include_target=True):
return doc['text']
def evaluate(self, docs, lm, provide_description, num_fewshot):
pass
class WikiText2(NLP_TASK):
NLP_PATH = "wikitext"
NLP_NAME = "wikitext-2-raw-v1"
def fewshot_description(self):
return ""
def doc_to_text(self, doc, include_target=True):
return doc['text']
def evaluate(self, docs, lm, provide_description, num_fewshot):
pass
\ No newline at end of file
import json
import random
import os
from lm_eval.base import Dataset
from ..utils import sh
class WinogradSchemaChallenge273(Dataset):
def __init__(self):
super().__init__()
def download(self):
if not os.path.exists('data/wsc273'):
sh("""
mkdir -p data/wsc273
wget https://git.cse.msu.edu/bakerb15/nlp-final-project/raw/master/Winogard/reproduce/commonsense_test/wsc273.json -O data/wsc273/wsc273.json
""")
def has_training_docs(self):
return False
def has_validation_docs(self):
return False
def has_test_docs(self):
return True
def training_docs(self):
return []
def validation_docs(self):
return []
def test_docs(self):
myjson = json.load(open('data/wsc273/wsc273.json'))
return self.load_doc(myjson)
def fewshot_description(self):
# This format is ONLY for the purposes of deduplication. For the task evaluation, we'll need to find a new strategy,
# to meet the needs of this particular task.
return "Winograd schema sentence with correct continuation. True. Winograd schema sentence with incorrect continuation. False."
def load_doc(self, myjson):
docs = []
for i in range(0, 273 * 2, 2):
item1 = myjson[i]
item2 = myjson[i+1]
if item1['question_id'] != item2['question_id']:
raise ValueError("WSC273 has missing completion pair.")
question_id = item1['question_id']
if item1['correctness'] == True:
doc = {
'id': question_id,
'completions': {
'T': item1['substitution'],
'F': item2['substitution'],
},
}
if item2['correctness'] == True:
doc = {
'id': question_id,
'completions': {
'F': item1['substitution'],
'T': item2['substitution'],
},
}
docs.append(doc)
return docs
def doc_to_text(self, doc, include_target=True):
# WSC273 is currently only writing out full examples. Partial evaluation needs implementing.
text = doc['completions']['T'] + ' True. ' + doc['completions']['F'] + ' False.'
return text
def evaluate(self, docs, lm):
# TODO: Write evaluation function
raise NotImplementedError()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment