Merge pull request #47 from cfoster0/squad

Add SQuAD v2 dataset

Merge pull request #47 from cfoster0/squad
Add SQuAD v2 dataset
c7e7a050 · Stella Biderman · GitHub · aa125d0a · 76b08133 · c7e7a050
Unverified Commit c7e7a050 authored Oct 22, 2020 by Stella Biderman Committed by GitHub Oct 22, 2020
Hide whitespace changes
Inline Side-by-side

Showing with 46 additions and 0 deletions

lm_eval/tasks/__init__.py lm_eval/tasks/__init__.py +2 -0

lm_eval/tasks/squad.py lm_eval/tasks/squad.py +44 -0

No files found.
--- a/lm_eval/tasks/__init__.py
+++ b/lm_eval/tasks/__init__.py
@@ -4,6 +4,7 @@ from . import arc
 from . import race
 from . import webqs
 from . import anli
+from . import squad

 TASK_REGISTRY = {
    # GLUE
@@ -26,6 +27,7 @@ TASK_REGISTRY = {
    # Order by benchmark/genre?
    "arc_easy": arc.ARCEasy,
    "arc_challenge": arc.ARCChallenge,
+    "squad": squad.SQuAD,
    "race": race.RACE,
    "webqs": webqs.WebQs,
    "anli_r1": anli.ANLIRound1,

--- a/lm_eval/tasks/squad.py
+++ b/lm_eval/tasks/squad.py
+import numpy as np
+from scipy.stats import pearsonr, spearmanr
+from sklearn.metrics import f1_score, matthews_corrcoef
+from tqdm import auto as tqdm_lib
+from . common import HFTask, simple_accuracy_metric, yesno
+
+class SQuAD(HFTask):
+    DATASET_PATH = "squad_v2"
+    DATASET_NAME = None
+
+    def has_training_docs(self):
+        return True
+
+    def has_validation_docs(self):
+        return True
+
+    def has_test_docs(self):
+        return False
+
+    def training_docs(self):
+        if self.has_training_docs():
+            return self.data["train"]
+
+    def validation_docs(self):
+        if self.has_validation_docs():
+            return self.data["validation"]
+
+    def fewshot_description(self):
+        return "Title: The_Title_of_It\n\nBackground: A text passage as background to answer the question with.\n\nQ: Question about the passage.\n\nA: Answer."
+
+    def doc_to_text(self, doc, include_target=True):
+        text = 'Title: ' + doc['title'] + '\n\n' + 'Background: ' + doc['context'] + '\n\n' + 'Q: ' + doc['question'] + '\n\n' + 'A: '
+        if include_target:
+            answer_list = doc['answers']['text']
+            if len(answer_list) > 0:
+                answer = answer_list[0]
+            else:
+                answer = 'unanswerable'
+            text += answer
+        return text
+
+    def evaluate(self, docs, lm, provide_description, num_fewshot):
+        # TODO: Write evaluation function
+        raise NotImplementedError()
\ No newline at end of file