Merge pull request #239 from bigscience-workshop/thomas/fix_wnli

FIX wnli

Merge pull request #239 from bigscience-workshop/thomas/fix_wnli
FIX wnli
e8e4873a · Stella Biderman · GitHub · df5d7cf0 · a48e3cf2 · e8e4873a
Unverified Commit e8e4873a authored Dec 23, 2021 by Stella Biderman Committed by GitHub Dec 23, 2021
Hide whitespace changes
Inline Side-by-side

Showing with 7 additions and 8 deletions

lm_eval/tasks/glue.py lm_eval/tasks/glue.py +7 -8

No files found.
--- a/lm_eval/tasks/glue.py
+++ b/lm_eval/tasks/glue.py
@@ -227,7 +227,7 @@ class QNLI(HFTask):
 class WNLI(HFTask):
-    VERSION = 0
+    VERSION = 1
    DATASET_PATH = "glue"
    DATASET_NAME = "wnli"
@@ -241,26 +241,25 @@ class WNLI(HFTask):
        return False
    def doc_to_text(self, doc):
-        return "{}\nQuestion: {} True, False or Neither?\nAnswer:".format(
+        return "{}\nQuestion: {} True or False?\nAnswer:".format(
            doc["sentence1"],
            doc["sentence2"],
        )
    def doc_to_target(self, doc):
        # True = entailment
-        # False = contradiction
+        # False = not_entailment
-        # Neither = neutral
+        return " {}".format({0: "False", 1: "True"}[doc["label"]])
-        return " {}".format({0: "True", 1: "Neither", 2: "False"}[doc["label"]])
    def construct_requests(self, doc, ctx):
        ll_true, _ = rf.loglikelihood(ctx, " True")
-        ll_neither, _ = rf.loglikelihood(ctx, " Neither")
        ll_false, _ = rf.loglikelihood(ctx, " False")
-        return ll_true, ll_neither, ll_false
+        return ll_true, ll_false
    def process_results(self, doc, results):
+        ll_true, ll_false = results
+        pred = ll_true > ll_false
        gold = doc["label"]
-        pred = np.argmax(results)
        return {
            "acc": pred == gold
        }