Merge pull request #45 from cfoster0/winogrande

Add Winogrande dataset

Merge pull request #45 from cfoster0/winogrande
Add Winogrande dataset
eb9a5224 · Stella Biderman · GitHub · 80f5fc3b · 0e0e37f4 · eb9a5224
Unverified Commit eb9a5224 authored Oct 23, 2020 by Stella Biderman Committed by GitHub Oct 23, 2020
Hide whitespace changes
Inline Side-by-side

Showing with 52 additions and 0 deletions

lm_eval/tasks/__init__.py lm_eval/tasks/__init__.py +2 -0

lm_eval/tasks/winogrande.py lm_eval/tasks/winogrande.py +50 -0

No files found.
--- a/lm_eval/tasks/__init__.py
+++ b/lm_eval/tasks/__init__.py
@@ -4,6 +4,7 @@ from . import arc
 from . import race
 from . import webqs
 from . import anli
+from . import winogrande
 from . import quac
 from . import hellaswag
 from . import openbookqa
@@ -36,6 +37,7 @@ TASK_REGISTRY = {
    "squad": squad.SQuAD,
    "race": race.RACE,
    "webqs": webqs.WebQs,
+    "winogrande": winogrande.Winogrande,
    "anli_r1": anli.ANLIRound1,
    "anli_r2": anli.ANLIRound2,
    "anli_r3": anli.ANLIRound3,

--- a/lm_eval/tasks/winogrande.py
+++ b/lm_eval/tasks/winogrande.py
+import numpy as np
+from scipy.stats import pearsonr, spearmanr
+from sklearn.metrics import f1_score, matthews_corrcoef
+from tqdm import auto as tqdm_lib
+from . common import HFTask, simple_accuracy_metric, yesno
+class Winogrande(HFTask):
+    DATASET_PATH = "winogrande"
+    DATASET_NAME = "winogrande_xl"
+    def has_training_docs(self):
+        return True
+    def has_validation_docs(self):
+        return True
+    def has_test_docs(self):
+        return True
+    def training_docs(self):
+        if self.has_training_docs():
+            return self.data["train"]
+    def validation_docs(self):
+        if self.has_validation_docs():
+            return self.data["validation"]
+    def test_docs(self):
+        if self.has_test_docs():
+            return self.data["test"]
+    def fewshot_description(self):
+        return "Winograd schema sentence including a either a ___ blank with a missing word, making the pronoun ambiguous, or the same with the word filled in."
+    def doc_to_text(self, doc, include_target=True):
+        text = doc['sentence']
+        if include_target:
+            answer_n = doc['answer']
+            if answer_n == '1':
+                answer = doc['option1']
+            elif answer_n == '2':
+                answer = doc['option2']
+            else:
+                raise ValueError("Winogrande from HF datasets contained an invalid answer key")
+            text = text.replace("_", answer)
+        return text
+    def evaluate(self, docs, lm, provide_description, num_fewshot):
+        # TODO: Write evaluation function
+        raise NotImplementedError()
\ No newline at end of file