Added more eval tasks and fixed directory error tasks

bb0eafbb · Quentin Anthony · 9b472bc9 · bb0eafbb · bb0eafbb · bb0eafbb
Commit bb0eafbb authored Jan 30, 2022 by Quentin Anthony
7 changed files
--- a/lm_eval/tasks/hellaswag.py
+++ b/lm_eval/tasks/hellaswag.py
@@ -42,3 +42,9 @@ class HellaSwag(HFTask, MultipleChoiceTask):

    def doc_to_text(self, doc):
        return doc["query"]
+
+    def should_decontaminate(self):
+        return True
+
+    def doc_to_decontamination_query(self, doc):
+        return doc["query"]
--- a/lm_eval/tasks/lambada.py
+++ b/lm_eval/tasks/lambada.py
@@ -45,6 +45,12 @@ class LAMBADA(Task):
    def doc_to_text(self, doc):
        return doc['text'].rsplit(' ', 1)[0]

+    def should_decontaminate(self):
+        return True
+
+    def doc_to_decontamination_query(self, doc):
+        return doc['text']
+
    def doc_to_target(self, doc):
        return " " + doc['text'].rsplit(' ', 1)[1]
    

--- a/lm_eval/tasks/mathqa.py
+++ b/lm_eval/tasks/mathqa.py
@@ -35,3 +35,9 @@ class MathQA(HFTask, MultipleChoiceTask):

    def doc_to_text(self, doc):
        return doc["query"]
+
+    def should_decontaminate(self):
+        return True
+
+    def doc_to_decontamination_query(self, doc):
+        return doc["query"]
--- a/lm_eval/tasks/piqa.py
+++ b/lm_eval/tasks/piqa.py
@@ -32,3 +32,9 @@ class PiQA(HFTask, MultipleChoiceTask):

    def doc_to_text(self, doc):
        return "Question: " + doc["goal"] + "\nAnswer:"
+
+    def should_decontaminate(self):
+        return True
+
+    def doc_to_decontamination_query(self, doc):
+        return doc["goal"]
--- a/lm_eval/tasks/pubmedqa.py
+++ b/lm_eval/tasks/pubmedqa.py
@@ -36,6 +36,12 @@ class Pubmed_QA(HFTask):
            doc["final_decision"]
        )

+    def should_decontaminate(self):
+        return True
+
+    def doc_to_decontamination_query(self, doc):
+        return doc["question"] + " " + "\n".join(doc["context"]["contexts"])
+
    def doc_to_target(self, doc):
        return " {}".format(doc["final_decision"])


--- a/lm_eval/tasks/winogrande.py
+++ b/lm_eval/tasks/winogrande.py
@@ -29,6 +29,12 @@ class Winogrande(HFTask):
    def doc_to_text(self, doc):
        return self.partial_context(doc, doc["option" + doc["answer"]])

+    def should_decontaminate(self):
+        return True
+
+    def doc_to_decontamination_query(self, doc):
+        return doc["sentence"]
+
    def fewshot_description(self):
        # TODO: redo description
        return "Winograd schema sentence including a either a ___ blank with a missing word, making the pronoun ambiguous, or the same with the word filled in."

--- a/scripts/clean_training_data/contamination.py
+++ b/scripts/clean_training_data/contamination.py
@@ -49,6 +49,8 @@ def get_train_overlap(docs_by_task_set, ngrams_path, ngrams_n_size, limit):
    sets_to_decontaminate = len(docs_by_task_set.keys())

    for (task_name, task_set), docs in docs_by_task_set.items():
+        if not os.path.exists(f"data/{task_name}"):
+            os.mkdir(f"data/{task_name}")
        # Check if we've decontaminated this set before
        overlaps_dump_path = get_overlaps_dump_path(task_name, task_set, ngrams_n_size, limit)
        if os.path.exists(overlaps_dump_path):