Unverified Commit 0e0e37f4 authored by Stella Biderman's avatar Stella Biderman Committed by GitHub
Browse files

Merge branch 'master' into winogrande

parents dbe43ec3 80f5fc3b
......@@ -5,6 +5,10 @@ from . import race
from . import webqs
from . import anli
from . import winogrande
from . import quac
from . import hellaswag
from . import openbookqa
from . import squad
TASK_REGISTRY = {
# GLUE
......@@ -27,6 +31,10 @@ TASK_REGISTRY = {
# Order by benchmark/genre?
"arc_easy": arc.ARCEasy,
"arc_challenge": arc.ARCChallenge,
"quac": quac.QuAC,
"hellaswag": hellaswag.HellaSwag,
"openbookqa": openbookqa.OpenBookQA,
"squad": squad.SQuAD,
"race": race.RACE,
"webqs": webqs.WebQs,
"winogrande": winogrande.Winogrande,
......
import numpy as np
from scipy.stats import pearsonr, spearmanr
from sklearn.metrics import f1_score, matthews_corrcoef
from tqdm import auto as tqdm_lib
from . common import HFTask, simple_accuracy_metric, yesno
class HellaSwag(HFTask):
DATASET_PATH = "hellaswag"
DATASET_NAME = None
def has_training_docs(self):
return True
def has_validation_docs(self):
return True
def has_test_docs(self):
return True
def training_docs(self):
if self.has_training_docs():
return self.data["train"]
def validation_docs(self):
if self.has_validation_docs():
return self.data["validation"]
def test_docs(self):
if self.has_test_docs():
return self.data["test"]
def fewshot_description(self):
return "Label for the relevant action: Sentences describing the context, with an incomplete sentence trailing\nanswer that plausibly completes the situation."
def doc_to_text(self, doc, include_target=True):
text = doc['activity_label'] + ': ' + doc['ctx'] + '\n'
if include_target:
letter_answer = doc['label']
if letter_answer == '0':
index = 0
elif letter_answer == '1':
index = 1
elif letter_answer == '2':
index = 2
elif letter_answer == '3':
index = 3
else:
raise ValueError("HellaSwag from HF datasets contained an invalid answer key")
text += doc['endings'][index]
return text
def evaluate(self, docs, lm, provide_description, num_fewshot):
# TODO: Write evaluation function
raise NotImplementedError()
import numpy as np
from scipy.stats import pearsonr, spearmanr
from sklearn.metrics import f1_score, matthews_corrcoef
from tqdm import auto as tqdm_lib
from . common import HFTask, simple_accuracy_metric, yesno
class OpenBookQA(HFTask):
DATASET_PATH = "openbookqa"
DATASET_NAME = "main"
def has_training_docs(self):
return True
def has_validation_docs(self):
return True
def has_test_docs(self):
return True
def training_docs(self):
if self.has_training_docs():
if self._training_docs is None:
self._training_docs = list(self.data["train"])
return self._training_docs
def validation_docs(self):
if self.has_validation_docs():
return self.data["validation"]
def test_docs(self):
if self.has_test_docs():
return self.data["test"]
def fewshot_description(self):
return "Text of the question prompt\nText of the answer completion"
def doc_to_text(self, doc, include_target=True):
text = doc['question_stem'] + '\n'
if include_target:
letter_answer = doc['answerKey']
if letter_answer == 'A':
index = 0
elif letter_answer == 'B':
index = 1
elif letter_answer == 'C':
index = 2
elif letter_answer == 'D':
index = 3
else:
raise ValueError("OpenBookQA from HF datasets contained an invalid answer key")
text += doc['choices']['text'][index] + '.'
return text
def evaluate(self, docs, lm, provide_description, num_fewshot):
# TODO: Write evaluation function
raise NotImplementedError()
import json
import random
import os
from lm_eval.base import Dataset
from ..utils import sh
class QuAC(Dataset):
def __init__(self):
super().__init__()
def download(self):
if not os.path.exists('data/quac'):
sh("""
mkdir -p data/quac
wget https://s3.amazonaws.com/my89public/quac/train_v0.2.json -O data/quac/train_v0.2.json
wget https://s3.amazonaws.com/my89public/quac/val_v0.2.json -O data/quac/val_v0.2.json
""")
def has_training_docs(self):
return True
def has_validation_docs(self):
return True
def has_test_docs(self):
return False
def training_docs(self):
myjson = json.load(open('data/quac/train_v0.2.json'))['data']
return self.load_doc(myjson)
def validation_docs(self):
myjson = json.load(open('data/quac/val_v0.2.json'))['data']
return self.load_doc(myjson)
def test_docs(self):
raise NotImplementedError("QuAC has no test docs.")
def fewshot_examples(self, k):
traindocs = list(self.training_docs())
random.shuffle(traindocs)
return traindocs[:k]
def fewshot_description(self):
desc = "TITLE: Title of the context passage - subtitle of the passage\nPARAGRAPH: Passage describing the relevant information for answering questions.\n\nQ: Text of a question.\n\nA: Answer to the question, based on the passage. If it cannot be answered based on the passage, write CANNOTANSWER"
return desc
def load_doc(self, myjson):
docs = []
for item in myjson:
title = item['title'] + ' - ' + item['section_title']
paragraph = item['paragraphs'][0]['context'].replace("CANNOTANSWER", "")
qas = item['paragraphs'][0]['qas']
qa_pairs = [(qa['question'], qa['answers'][0]['text']) for qa in qas]
for (question, answer) in qa_pairs:
doc = { 'title': title, 'paragraph': paragraph, 'question': question, 'answer': answer }
docs.append(doc)
return docs
def doc_to_text(self, doc, include_target=True):
text = 'TITLE: ' + doc['title'] + '\n' + 'PARAGRAPH: ' + doc['paragraph'] + '\n\n' + 'Q: ' + doc['question'] + '\n\n' + 'A: '
if include_target:
text += doc['answer']
return text
def evaluate(self, docs, lm):
pass
from . common import HFTask
from ..utils_stream import X, each, apply, join, filt, one
import collections
import nlp
import datasets
class RACE(HFTask):
......@@ -26,7 +26,7 @@ class RACE(HFTask):
# is shown that one document is made per passage.
r = collections.defaultdict(list)
for item in nlp.load_dataset(path=self.DATASET_PATH, name=self.DATASET_NAME)[set]:
for item in datasets.load_dataset(path=self.DATASET_PATH, name=self.DATASET_NAME)[set]:
r[item['article']].append(item)
res = list(r.values() >> each(lambda x: {
......
import numpy as np
from scipy.stats import pearsonr, spearmanr
from sklearn.metrics import f1_score, matthews_corrcoef
from tqdm import auto as tqdm_lib
from . common import HFTask, simple_accuracy_metric, yesno
class SQuAD(HFTask):
DATASET_PATH = "squad_v2"
DATASET_NAME = None
def has_training_docs(self):
return True
def has_validation_docs(self):
return True
def has_test_docs(self):
return False
def training_docs(self):
if self.has_training_docs():
return self.data["train"]
def validation_docs(self):
if self.has_validation_docs():
return self.data["validation"]
def fewshot_description(self):
return "Title: The_Title_of_It\n\nBackground: A text passage as background to answer the question with.\n\nQ: Question about the passage.\n\nA: Answer."
def doc_to_text(self, doc, include_target=True):
text = 'Title: ' + doc['title'] + '\n\n' + 'Background: ' + doc['context'] + '\n\n' + 'Q: ' + doc['question'] + '\n\n' + 'A: '
if include_target:
answer_list = doc['answers']['text']
if len(answer_list) > 0:
answer = answer_list[0]
else:
answer = 'unanswerable'
text += answer
return text
def evaluate(self, docs, lm, provide_description, num_fewshot):
# TODO: Write evaluation function
raise NotImplementedError()
\ No newline at end of file
......@@ -94,7 +94,11 @@ class Copa(HFTask):
def doc_to_text(self, doc, include_target=True):
# Drop the period
text = doc["premise"].strip()[:-1] + " because "
connector = {
"cause": "because",
"effect": "therefore",
}[doc["question"]]
text = doc["premise"].strip()[:-1] + f" {connector} "
if include_target:
correct_choice = doc["choice1"] if doc["label"] == 0 else doc["choice2"]
# Connect the sentences
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment