Unverified Commit b0db32bc authored by Stella Biderman's avatar Stella Biderman Committed by GitHub
Browse files

Merge pull request #50 from cfoster0/winograd

Winograd changes
parents b8a3edaf 05bd05e9
...@@ -4,6 +4,7 @@ from . import arc ...@@ -4,6 +4,7 @@ from . import arc
from . import race from . import race
from . import webqs from . import webqs
from . import anli from . import anli
from . import wsc273
from . import winogrande from . import winogrande
from . import quac from . import quac
from . import hellaswag from . import hellaswag
...@@ -27,7 +28,7 @@ TASK_REGISTRY = { ...@@ -27,7 +28,7 @@ TASK_REGISTRY = {
"copa": superglue.Copa, "copa": superglue.Copa,
"multirc": superglue.MultiRC, "multirc": superglue.MultiRC,
"wic": superglue.WordsInContext, "wic": superglue.WordsInContext,
"wsc": superglue.WinogradSchemaChallenge, "wsc": superglue.SGWinogradSchemaChallenge,
# Order by benchmark/genre? # Order by benchmark/genre?
"arc_easy": arc.ARCEasy, "arc_easy": arc.ARCEasy,
"arc_challenge": arc.ARCChallenge, "arc_challenge": arc.ARCChallenge,
...@@ -37,6 +38,7 @@ TASK_REGISTRY = { ...@@ -37,6 +38,7 @@ TASK_REGISTRY = {
"squad": squad.SQuAD, "squad": squad.SQuAD,
"race": race.RACE, "race": race.RACE,
"webqs": webqs.WebQs, "webqs": webqs.WebQs,
"wsc273": wsc273.WinogradSchemaChallenge273,
"winogrande": winogrande.Winogrande, "winogrande": winogrande.Winogrande,
"anli_r1": anli.ANLIRound1, "anli_r1": anli.ANLIRound1,
"anli_r2": anli.ANLIRound2, "anli_r2": anli.ANLIRound2,
......
...@@ -218,7 +218,7 @@ class WordsInContext(HFTask): ...@@ -218,7 +218,7 @@ class WordsInContext(HFTask):
return simple_accuracy_metric(preds=preds, golds=golds) return simple_accuracy_metric(preds=preds, golds=golds)
class WinogradSchemaChallenge(HFTask): class SGWinogradSchemaChallenge(HFTask):
DATASET_PATH = "super_glue" DATASET_PATH = "super_glue"
DATASET_NAME = "wsc" DATASET_NAME = "wsc"
......
import json
import random
import os
from lm_eval.base import Dataset
from ..utils import sh
class WinogradSchemaChallenge273(Dataset):
def __init__(self):
super().__init__()
def download(self):
if not os.path.exists('data/wsc273'):
sh("""
mkdir -p data/wsc273
wget https://git.cse.msu.edu/bakerb15/nlp-final-project/raw/master/Winogard/reproduce/commonsense_test/wsc273.json -O data/wsc273/wsc273.json
""")
def has_training_docs(self):
return False
def has_validation_docs(self):
return False
def has_test_docs(self):
return True
def training_docs(self):
return []
def validation_docs(self):
return []
def test_docs(self):
myjson = json.load(open('data/wsc273/wsc273.json'))
return self.load_doc(myjson)
def fewshot_description(self):
# This format is ONLY for the purposes of deduplication. For the task evaluation, we'll need to find a new strategy,
# to meet the needs of this particular task.
return "Winograd schema sentence with correct continuation. True. Winograd schema sentence with incorrect continuation. False."
def load_doc(self, myjson):
docs = []
for i in range(0, 273 * 2, 2):
item1 = myjson[i]
item2 = myjson[i+1]
if item1['question_id'] != item2['question_id']:
raise ValueError("WSC273 has missing completion pair.")
question_id = item1['question_id']
if item1['correctness'] == True:
doc = {
'id': question_id,
'completions': {
'T': item1['substitution'],
'F': item2['substitution'],
},
}
if item2['correctness'] == True:
doc = {
'id': question_id,
'completions': {
'F': item1['substitution'],
'T': item2['substitution'],
},
}
docs.append(doc)
return docs
def doc_to_text(self, doc, include_target=True):
# WSC273 is currently only writing out full examples. Partial evaluation needs implementing.
text = doc['completions']['T'] + ' True. ' + doc['completions']['F'] + ' False.'
return text
def evaluate(self, docs, lm):
# TODO: Write evaluation function
raise NotImplementedError()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment