Added decontamination to remaining evals

dae7b868 · Quentin Gregory Anthony · 341663a9 · dae7b868 · dae7b868 · dae7b868
Commit dae7b868 authored Feb 05, 2022 by Quentin Gregory Anthony
13 changed files
--- a/lm_eval/tasks/quac.py
+++ b/lm_eval/tasks/quac.py
@@ -66,6 +66,12 @@ class QuAC(Task):
    def doc_to_text(self, doc):
        return 'TITLE: ' + doc['title'] + '\n' + 'PARAGRAPH: ' + doc['paragraph'] + '\n\n' + 'Q: ' + doc['question'] + '\n\n' + 'A: '
+    def should_decontaminate(self):
+        return True
+    def doc_to_decontamination_query(self, doc):
+        return doc['paragraph']
    def doc_to_target(self, doc):
        return doc['answer']

--- a/lm_eval/tasks/race.py
+++ b/lm_eval/tasks/race.py
@@ -86,6 +86,12 @@ class RACE(HFTask):
        text += self.last_problem(doc)['question']
        return text
+    def should_decontaminate(self):
+        return True
+    def doc_to_decontamination_query(self, doc):
+        return doc['article']
    def doc_to_target(self, doc):
        return " " + self.get_answer_option(self.last_problem(doc))

--- a/lm_eval/tasks/sat.py
+++ b/lm_eval/tasks/sat.py
@@ -63,3 +63,10 @@ class SATAnalogies(MultipleChoiceTask):
    def doc_to_text(self, doc):
        return "{} is to {} as".format(*doc['query'])
+    def should_decontaminate(self):
+        return True
+    def doc_to_decontamination_query(self, doc):
+        return doc["source"] + " " + doc["query"]
--- a/lm_eval/tasks/squad.py
+++ b/lm_eval/tasks/squad.py
@@ -44,6 +44,12 @@ class SQuAD2(HFTask):
    def doc_to_text(self, doc):
        return 'Title: ' + doc['title'] + '\n\n' + 'Background: ' + doc['context'] + '\n\n' + 'Question: ' + doc['question'] + '\n\n' + 'Answer:'
+    def should_decontaminate(self):
+        return True
+    def doc_to_decontamination_query(self, doc):
+        return doc['context']
    def doc_to_target(self, doc):
        answer_list = doc['answers']['text']
        if len(answer_list) > 0:

--- a/lm_eval/tasks/storycloze.py
+++ b/lm_eval/tasks/storycloze.py
@@ -36,6 +36,12 @@ class StoryCloze(Task):
    def doc_to_text(self, doc):
        return ' '.join([*doc[1:5]])
+    def should_decontaminate(self):
+        return True
+    def doc_to_decontamination_query(self, doc):
+        return doc["context"]
    def doc_to_target(self, doc):
        return " " + doc[int(doc[-1]) - 4]

--- a/lm_eval/tasks/superglue.py
+++ b/lm_eval/tasks/superglue.py
@@ -28,6 +28,12 @@ class BoolQ(HFTask):
    def doc_to_text(self, doc):
        return f"{doc['passage']}\nQuestion: {doc['question']}?\nAnswer:"
+    def should_decontaminate(self):
+        return True
+    def doc_to_decontamination_query(self, doc):
+        return doc['passage']
    def doc_to_target(self, doc):
        return " " + yesno(doc['label']) 

--- a/lm_eval/tasks/translation.py
+++ b/lm_eval/tasks/translation.py
@@ -109,6 +109,12 @@ class GeneralTranslationTask(Task):
        tar_lang = code_to_language(language_codes[1])
        return f"{src_lang} phrase: " + doc["src"] + f"\n{tar_lang} phrase:"
+    def should_decontaminate(self):
+        return True
+    def doc_to_decontamination_query(self, doc):
+        return doc["ref"] + " " + doc["ref"]
    def doc_to_target(self, doc):
        # This shows a single target, though there may be multiple targets in a lang test
        return " " + doc["ref"] if isinstance(doc["ref"], str) else doc["ref"][0]

--- a/lm_eval/tasks/triviaqa.py
+++ b/lm_eval/tasks/triviaqa.py
@@ -39,6 +39,12 @@ class TriviaQA(Task):
    def doc_to_text(self, doc):
        return f"Question: {doc['Question']}\nAnswer:"
+    def should_decontaminate(self):
+        return True
+    def doc_to_decontamination_query(self, doc):
+        return doc['Question'] + " " + doc['SearchResults']['Description']
    def doc_to_target(self, doc):
        return " " + doc['Answer']['Value']

--- a/lm_eval/tasks/truthfulqa.py
+++ b/lm_eval/tasks/truthfulqa.py
@@ -82,6 +82,12 @@ class TruthfulQAMultipleChoice(Task):
    def doc_to_text(self, doc):
        return QA_PROMPT + "\n\nQ: " + doc['question'] + "\nA:"
+    def should_decontaminate(self):
+        return True
+    def doc_to_decontamination_query(self, doc):
+        return doc['question']
    def doc_to_target(self, doc):
        return " "

--- a/lm_eval/tasks/unscramble.py
+++ b/lm_eval/tasks/unscramble.py
@@ -48,6 +48,12 @@ class WordUnscrambleTask(Task):
    def doc_to_text(self, doc):
        return doc["context"]
+    def should_decontaminate(self):
+        return True
+    def doc_to_decontamination_query(self, doc):
+        return doc["context"]
    def doc_to_target(self, doc):
        return doc["completion"]

--- a/lm_eval/tasks/webqs.py
+++ b/lm_eval/tasks/webqs.py
@@ -20,6 +20,12 @@ class WebQs(HFTask):
    def doc_to_text(self, doc):
        return "Question: " + doc['question'] + '\nAnswer:'
+    def should_decontaminate(self):
+        return True
+    def doc_to_decontamination_query(self, doc):
+        return doc['question']
    def doc_to_target(self, doc):
        # this picks one answer to be the "correct" one, despite sometimes 
        # multiple correct answers being possible.

--- a/lm_eval/tasks/wikitext.py
+++ b/lm_eval/tasks/wikitext.py
@@ -80,6 +80,12 @@ class WikiText(PerplexityTask):
    def doc_to_target(self, doc):
        return wikitext_detokenizer(doc)
+    def should_decontaminate(self):
+        return True
+    def doc_to_decontamination_query(self, doc):
+        return doc["text"]
    def count_words(self, doc):
        # count number of words in *original doc before detokenization*

--- a/lm_eval/tasks/wsc273.py
+++ b/lm_eval/tasks/wsc273.py
@@ -65,6 +65,12 @@ class WinogradSchemaChallenge273(HFTask):
    def doc_to_text(self, doc):
        return self.partial_context(doc, doc["options"][doc["label"]])
+    def should_decontaminate(self):
+        return True
+    def doc_to_decontamination_query(self, doc):
+        return doc["text"]
    @classmethod
    def partial_context(cls, doc, option):
        # Substitute the pronoun in the original text with the specified