Add ANLI

aa125d0a · Leo Gao · 988a400f · aa125d0a · aa125d0a
Commit aa125d0a authored Oct 05, 2020 by Leo Gao
Hide whitespace changes
Inline Side-by-side

Showing with 61 additions and 0 deletions

lm_eval/tasks/__init__.py lm_eval/tasks/__init__.py +4 -0

lm_eval/tasks/anli.py lm_eval/tasks/anli.py +57 -0

No files found.
--- a/lm_eval/tasks/__init__.py
+++ b/lm_eval/tasks/__init__.py
@@ -3,6 +3,7 @@ from . import glue
 from . import arc
 from . import race
 from . import webqs
+from . import anli
 TASK_REGISTRY = {
    # GLUE
@@ -27,6 +28,9 @@ TASK_REGISTRY = {
    "arc_challenge": arc.ARCChallenge,
    "race": race.RACE,
    "webqs": webqs.WebQs,
+    "anli_r1": anli.ANLIRound1,
+    "anli_r2": anli.ANLIRound2,
+    "anli_r3": anli.ANLIRound3,
 }

--- a/lm_eval/tasks/anli.py
+++ b/lm_eval/tasks/anli.py
+from . common import HFTask
+class ANLIBase(HFTask):
+    DATASET_PATH = "anli"
+    DATASET_NAME = None
+    SPLIT = None
+    def has_training_docs(self):
+        return True
+    def has_validation_docs(self):
+        return True
+    def has_test_docs(self):
+        return True
+    def training_docs(self):
+        if self.has_training_docs():
+            if self._training_docs is None:
+                self._training_docs = list(self.data["train_r" + str(self.SPLIT)])
+            return self._training_docs
+    def validation_docs(self):
+        if self.has_validation_docs():
+            return self.data["dev_r" + str(self.SPLIT)]
+    def test_docs(self):
+        if self.has_test_docs():
+            return self.data["test_r" + str(self.SPLIT)]
+    def fewshot_description(self):
+        # TODO: figure out description
+        return ""
+    def doc_to_text(self, doc, include_target=True):
+        print(doc)
+        # OA does this a bit weirdly: they prepend "anli 1:  anli 1:  " to the beginning
+        # of the prompt (yes, repeating it!). also, " True, False, or Neither?" is directly 
+        # appended onto the question, with no "Answer:" or even a newline. Do we *really* 
+        # want to do it exactly as OA did?
+        q = doc['premise'] + '\nQuestion: ' + doc['hypothesis'] + '\n'
+        a = "True, False, or Neither?" + ((" " + ["True", "Neither", "False"][doc['label']]) if include_target else '')
+        return q + a
+    def evaluate(self, docs, lm, provide_description, num_fewshot):
+        # TODO: implement
+        raise NotImplementedError()
+class ANLIRound1(ANLIBase):
+    SPLIT = 1
+class ANLIRound2(ANLIBase):
+    SPLIT = 2
+class ANLIRound3(ANLIBase):
+    SPLIT = 3
\ No newline at end of file