Commit dae7b868 authored by Quentin Gregory Anthony's avatar Quentin Gregory Anthony
Browse files

Added decontamination to remaining evals

parent 341663a9
...@@ -66,6 +66,12 @@ class QuAC(Task): ...@@ -66,6 +66,12 @@ class QuAC(Task):
def doc_to_text(self, doc): def doc_to_text(self, doc):
return 'TITLE: ' + doc['title'] + '\n' + 'PARAGRAPH: ' + doc['paragraph'] + '\n\n' + 'Q: ' + doc['question'] + '\n\n' + 'A: ' return 'TITLE: ' + doc['title'] + '\n' + 'PARAGRAPH: ' + doc['paragraph'] + '\n\n' + 'Q: ' + doc['question'] + '\n\n' + 'A: '
def should_decontaminate(self):
return True
def doc_to_decontamination_query(self, doc):
return doc['paragraph']
def doc_to_target(self, doc): def doc_to_target(self, doc):
return doc['answer'] return doc['answer']
......
...@@ -86,6 +86,12 @@ class RACE(HFTask): ...@@ -86,6 +86,12 @@ class RACE(HFTask):
text += self.last_problem(doc)['question'] text += self.last_problem(doc)['question']
return text return text
def should_decontaminate(self):
return True
def doc_to_decontamination_query(self, doc):
return doc['article']
def doc_to_target(self, doc): def doc_to_target(self, doc):
return " " + self.get_answer_option(self.last_problem(doc)) return " " + self.get_answer_option(self.last_problem(doc))
......
...@@ -63,3 +63,10 @@ class SATAnalogies(MultipleChoiceTask): ...@@ -63,3 +63,10 @@ class SATAnalogies(MultipleChoiceTask):
def doc_to_text(self, doc): def doc_to_text(self, doc):
return "{} is to {} as".format(*doc['query']) return "{} is to {} as".format(*doc['query'])
def should_decontaminate(self):
return True
def doc_to_decontamination_query(self, doc):
return doc["source"] + " " + doc["query"]
...@@ -44,6 +44,12 @@ class SQuAD2(HFTask): ...@@ -44,6 +44,12 @@ class SQuAD2(HFTask):
def doc_to_text(self, doc): def doc_to_text(self, doc):
return 'Title: ' + doc['title'] + '\n\n' + 'Background: ' + doc['context'] + '\n\n' + 'Question: ' + doc['question'] + '\n\n' + 'Answer:' return 'Title: ' + doc['title'] + '\n\n' + 'Background: ' + doc['context'] + '\n\n' + 'Question: ' + doc['question'] + '\n\n' + 'Answer:'
def should_decontaminate(self):
return True
def doc_to_decontamination_query(self, doc):
return doc['context']
def doc_to_target(self, doc): def doc_to_target(self, doc):
answer_list = doc['answers']['text'] answer_list = doc['answers']['text']
if len(answer_list) > 0: if len(answer_list) > 0:
......
...@@ -36,6 +36,12 @@ class StoryCloze(Task): ...@@ -36,6 +36,12 @@ class StoryCloze(Task):
def doc_to_text(self, doc): def doc_to_text(self, doc):
return ' '.join([*doc[1:5]]) return ' '.join([*doc[1:5]])
def should_decontaminate(self):
return True
def doc_to_decontamination_query(self, doc):
return doc["context"]
def doc_to_target(self, doc): def doc_to_target(self, doc):
return " " + doc[int(doc[-1]) - 4] return " " + doc[int(doc[-1]) - 4]
......
...@@ -28,6 +28,12 @@ class BoolQ(HFTask): ...@@ -28,6 +28,12 @@ class BoolQ(HFTask):
def doc_to_text(self, doc): def doc_to_text(self, doc):
return f"{doc['passage']}\nQuestion: {doc['question']}?\nAnswer:" return f"{doc['passage']}\nQuestion: {doc['question']}?\nAnswer:"
def should_decontaminate(self):
return True
def doc_to_decontamination_query(self, doc):
return doc['passage']
def doc_to_target(self, doc): def doc_to_target(self, doc):
return " " + yesno(doc['label']) return " " + yesno(doc['label'])
......
...@@ -109,6 +109,12 @@ class GeneralTranslationTask(Task): ...@@ -109,6 +109,12 @@ class GeneralTranslationTask(Task):
tar_lang = code_to_language(language_codes[1]) tar_lang = code_to_language(language_codes[1])
return f"{src_lang} phrase: " + doc["src"] + f"\n{tar_lang} phrase:" return f"{src_lang} phrase: " + doc["src"] + f"\n{tar_lang} phrase:"
def should_decontaminate(self):
return True
def doc_to_decontamination_query(self, doc):
return doc["ref"] + " " + doc["ref"]
def doc_to_target(self, doc): def doc_to_target(self, doc):
# This shows a single target, though there may be multiple targets in a lang test # This shows a single target, though there may be multiple targets in a lang test
return " " + doc["ref"] if isinstance(doc["ref"], str) else doc["ref"][0] return " " + doc["ref"] if isinstance(doc["ref"], str) else doc["ref"][0]
......
...@@ -39,6 +39,12 @@ class TriviaQA(Task): ...@@ -39,6 +39,12 @@ class TriviaQA(Task):
def doc_to_text(self, doc): def doc_to_text(self, doc):
return f"Question: {doc['Question']}\nAnswer:" return f"Question: {doc['Question']}\nAnswer:"
def should_decontaminate(self):
return True
def doc_to_decontamination_query(self, doc):
return doc['Question'] + " " + doc['SearchResults']['Description']
def doc_to_target(self, doc): def doc_to_target(self, doc):
return " " + doc['Answer']['Value'] return " " + doc['Answer']['Value']
......
...@@ -82,6 +82,12 @@ class TruthfulQAMultipleChoice(Task): ...@@ -82,6 +82,12 @@ class TruthfulQAMultipleChoice(Task):
def doc_to_text(self, doc): def doc_to_text(self, doc):
return QA_PROMPT + "\n\nQ: " + doc['question'] + "\nA:" return QA_PROMPT + "\n\nQ: " + doc['question'] + "\nA:"
def should_decontaminate(self):
return True
def doc_to_decontamination_query(self, doc):
return doc['question']
def doc_to_target(self, doc): def doc_to_target(self, doc):
return " " return " "
......
...@@ -48,6 +48,12 @@ class WordUnscrambleTask(Task): ...@@ -48,6 +48,12 @@ class WordUnscrambleTask(Task):
def doc_to_text(self, doc): def doc_to_text(self, doc):
return doc["context"] return doc["context"]
def should_decontaminate(self):
return True
def doc_to_decontamination_query(self, doc):
return doc["context"]
def doc_to_target(self, doc): def doc_to_target(self, doc):
return doc["completion"] return doc["completion"]
......
...@@ -20,6 +20,12 @@ class WebQs(HFTask): ...@@ -20,6 +20,12 @@ class WebQs(HFTask):
def doc_to_text(self, doc): def doc_to_text(self, doc):
return "Question: " + doc['question'] + '\nAnswer:' return "Question: " + doc['question'] + '\nAnswer:'
def should_decontaminate(self):
return True
def doc_to_decontamination_query(self, doc):
return doc['question']
def doc_to_target(self, doc): def doc_to_target(self, doc):
# this picks one answer to be the "correct" one, despite sometimes # this picks one answer to be the "correct" one, despite sometimes
# multiple correct answers being possible. # multiple correct answers being possible.
......
...@@ -80,6 +80,12 @@ class WikiText(PerplexityTask): ...@@ -80,6 +80,12 @@ class WikiText(PerplexityTask):
def doc_to_target(self, doc): def doc_to_target(self, doc):
return wikitext_detokenizer(doc) return wikitext_detokenizer(doc)
def should_decontaminate(self):
return True
def doc_to_decontamination_query(self, doc):
return doc["text"]
def count_words(self, doc): def count_words(self, doc):
# count number of words in *original doc before detokenization* # count number of words in *original doc before detokenization*
......
...@@ -65,6 +65,12 @@ class WinogradSchemaChallenge273(HFTask): ...@@ -65,6 +65,12 @@ class WinogradSchemaChallenge273(HFTask):
def doc_to_text(self, doc): def doc_to_text(self, doc):
return self.partial_context(doc, doc["options"][doc["label"]]) return self.partial_context(doc, doc["options"][doc["label"]])
def should_decontaminate(self):
return True
def doc_to_decontamination_query(self, doc):
return doc["text"]
@classmethod @classmethod
def partial_context(cls, doc, option): def partial_context(cls, doc, option):
# Substitute the pronoun in the original text with the specified # Substitute the pronoun in the original text with the specified
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment