Commit 05bd05e9 authored by Charles Foster's avatar Charles Foster
Browse files

Renamed WSC to make distinction between SuperGLUE Winograd Schemas...

Renamed WSC to make distinction between SuperGLUE Winograd Schemas (SGWinogradSchemaChallenge) and WSC273 (WinogradSchemaChallenge273) clearer. Also, added WSC273.
parent ca24b52f
...@@ -4,6 +4,7 @@ from . import arc ...@@ -4,6 +4,7 @@ from . import arc
from . import race from . import race
from . import webqs from . import webqs
from . import anli from . import anli
from . import wsc273
from . import winogrande from . import winogrande
from . import quac from . import quac
from . import hellaswag from . import hellaswag
...@@ -27,7 +28,7 @@ TASK_REGISTRY = { ...@@ -27,7 +28,7 @@ TASK_REGISTRY = {
"copa": superglue.Copa, "copa": superglue.Copa,
"multirc": superglue.MultiRC, "multirc": superglue.MultiRC,
"wic": superglue.WordsInContext, "wic": superglue.WordsInContext,
"wsc": superglue.WinogradSchemaChallenge, "wsc": superglue.SGWinogradSchemaChallenge,
# Order by benchmark/genre? # Order by benchmark/genre?
"arc_easy": arc.ARCEasy, "arc_easy": arc.ARCEasy,
"arc_challenge": arc.ARCChallenge, "arc_challenge": arc.ARCChallenge,
...@@ -37,6 +38,7 @@ TASK_REGISTRY = { ...@@ -37,6 +38,7 @@ TASK_REGISTRY = {
"squad": squad.SQuAD, "squad": squad.SQuAD,
"race": race.RACE, "race": race.RACE,
"webqs": webqs.WebQs, "webqs": webqs.WebQs,
"wsc273": wsc273.WinogradSchemaChallenge273,
"winogrande": winogrande.Winogrande, "winogrande": winogrande.Winogrande,
"anli_r1": anli.ANLIRound1, "anli_r1": anli.ANLIRound1,
"anli_r2": anli.ANLIRound2, "anli_r2": anli.ANLIRound2,
......
...@@ -218,7 +218,7 @@ class WordsInContext(HFTask): ...@@ -218,7 +218,7 @@ class WordsInContext(HFTask):
return simple_accuracy_metric(preds=preds, golds=golds) return simple_accuracy_metric(preds=preds, golds=golds)
class WinogradSchemaChallenge(HFTask): class SGWinogradSchemaChallenge(HFTask):
DATASET_PATH = "super_glue" DATASET_PATH = "super_glue"
DATASET_NAME = "wsc" DATASET_NAME = "wsc"
......
import json
import random
import os
from lm_eval.base import Dataset
from ..utils import sh
class WinogradSchemaChallenge273(Dataset):
def __init__(self):
super().__init__()
def download(self):
if not os.path.exists('data/wsc273'):
sh("""
mkdir -p data/wsc273
wget https://git.cse.msu.edu/bakerb15/nlp-final-project/raw/master/Winogard/reproduce/commonsense_test/wsc273.json -O data/wsc273/wsc273.json
""")
def has_training_docs(self):
return False
def has_validation_docs(self):
return False
def has_test_docs(self):
return True
def training_docs(self):
return []
def validation_docs(self):
return []
def test_docs(self):
myjson = json.load(open('data/wsc273/wsc273.json'))
return self.load_doc(myjson)
def fewshot_description(self):
# This format is ONLY for the purposes of deduplication. For the task evaluation, we'll need to find a new strategy,
# to meet the needs of this particular task.
return "Winograd schema sentence with correct continuation. True. Winograd schema sentence with incorrect continuation. False."
def load_doc(self, myjson):
docs = []
for i in range(0, 273 * 2, 2):
item1 = myjson[i]
item2 = myjson[i+1]
if item1['question_id'] != item2['question_id']:
raise ValueError("WSC273 has missing completion pair.")
question_id = item1['question_id']
if item1['correctness'] == True:
doc = {
'id': question_id,
'completions': {
'T': item1['substitution'],
'F': item2['substitution'],
},
}
if item2['correctness'] == True:
doc = {
'id': question_id,
'completions': {
'F': item1['substitution'],
'T': item2['substitution'],
},
}
docs.append(doc)
return docs
def doc_to_text(self, doc, include_target=True):
# WSC273 is currently only writing out full examples. Partial evaluation needs implementing.
text = doc['completions']['T'] + ' True. ' + doc['completions']['F'] + ' False.'
return text
def evaluate(self, docs, lm):
# TODO: Write evaluation function
raise NotImplementedError()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment