Unverified Commit efc4310c authored by Stella Biderman's avatar Stella Biderman Committed by GitHub
Browse files

Merge branch 'master' into openbookqa

parents 8402847e 2338188f
...@@ -5,6 +5,7 @@ from . import race ...@@ -5,6 +5,7 @@ from . import race
from . import webqs from . import webqs
from . import anli from . import anli
from . import openbookqa from . import openbookqa
from . import squad
TASK_REGISTRY = { TASK_REGISTRY = {
# GLUE # GLUE
...@@ -28,6 +29,7 @@ TASK_REGISTRY = { ...@@ -28,6 +29,7 @@ TASK_REGISTRY = {
"arc_easy": arc.ARCEasy, "arc_easy": arc.ARCEasy,
"arc_challenge": arc.ARCChallenge, "arc_challenge": arc.ARCChallenge,
"openbookqa": openbookqa.OpenBookQA, "openbookqa": openbookqa.OpenBookQA,
"squad": squad.SQuAD,
"race": race.RACE, "race": race.RACE,
"webqs": webqs.WebQs, "webqs": webqs.WebQs,
"anli_r1": anli.ANLIRound1, "anli_r1": anli.ANLIRound1,
......
from . common import HFTask from . common import HFTask
from ..utils_stream import X, each, apply, join, filt, one from ..utils_stream import X, each, apply, join, filt, one
import collections import collections
import nlp import datasets
class RACE(HFTask): class RACE(HFTask):
...@@ -26,7 +26,7 @@ class RACE(HFTask): ...@@ -26,7 +26,7 @@ class RACE(HFTask):
# is shown that one document is made per passage. # is shown that one document is made per passage.
r = collections.defaultdict(list) r = collections.defaultdict(list)
for item in nlp.load_dataset(path=self.DATASET_PATH, name=self.DATASET_NAME)[set]: for item in datasets.load_dataset(path=self.DATASET_PATH, name=self.DATASET_NAME)[set]:
r[item['article']].append(item) r[item['article']].append(item)
res = list(r.values() >> each(lambda x: { res = list(r.values() >> each(lambda x: {
......
import numpy as np
from scipy.stats import pearsonr, spearmanr
from sklearn.metrics import f1_score, matthews_corrcoef
from tqdm import auto as tqdm_lib
from . common import HFTask, simple_accuracy_metric, yesno
class SQuAD(HFTask):
DATASET_PATH = "squad_v2"
DATASET_NAME = None
def has_training_docs(self):
return True
def has_validation_docs(self):
return True
def has_test_docs(self):
return False
def training_docs(self):
if self.has_training_docs():
return self.data["train"]
def validation_docs(self):
if self.has_validation_docs():
return self.data["validation"]
def fewshot_description(self):
return "Title: The_Title_of_It\n\nBackground: A text passage as background to answer the question with.\n\nQ: Question about the passage.\n\nA: Answer."
def doc_to_text(self, doc, include_target=True):
text = 'Title: ' + doc['title'] + '\n\n' + 'Background: ' + doc['context'] + '\n\n' + 'Q: ' + doc['question'] + '\n\n' + 'A: '
if include_target:
answer_list = doc['answers']['text']
if len(answer_list) > 0:
answer = answer_list[0]
else:
answer = 'unanswerable'
text += answer
return text
def evaluate(self, docs, lm, provide_description, num_fewshot):
# TODO: Write evaluation function
raise NotImplementedError()
\ No newline at end of file
...@@ -94,7 +94,11 @@ class Copa(HFTask): ...@@ -94,7 +94,11 @@ class Copa(HFTask):
def doc_to_text(self, doc, include_target=True): def doc_to_text(self, doc, include_target=True):
# Drop the period # Drop the period
text = doc["premise"].strip()[:-1] + " because " connector = {
"cause": "because",
"effect": "therefore",
}[doc["question"]]
text = doc["premise"].strip()[:-1] + f" {connector} "
if include_target: if include_target:
correct_choice = doc["choice1"] if doc["label"] == 0 else doc["choice2"] correct_choice = doc["choice1"] if doc["label"] == 0 else doc["choice2"]
# Connect the sentences # Connect the sentences
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment