Commit dae7b868 authored by Quentin Gregory Anthony's avatar Quentin Gregory Anthony
Browse files

Added decontamination to remaining evals

parent 341663a9
......@@ -40,6 +40,12 @@ class ANLIBase(HFTask):
# want to do it exactly as OA did?
return doc['premise'] + '\nQuestion: ' + doc['hypothesis'] + ' True, False, or Neither?\nAnswer:'
def should_decontaminate(self):
return True
def doc_to_decontamination_query(self, doc):
return doc["premise"]
def doc_to_target(self, doc):
# True = entailment
# False = contradiction
......
......@@ -32,6 +32,12 @@ class ARCEasy(HFTask, MultipleChoiceTask):
def doc_to_text(self, doc):
return doc["query"]
def should_decontaminate(self):
return True
def doc_to_decontamination_query(self, doc):
return doc["query"]
class ARCChallenge(ARCEasy):
DATASET_PATH = "ai2_arc"
......
......@@ -55,6 +55,12 @@ class Arithmetic(Task):
def doc_to_text(self, doc):
return doc.context
def should_decontaminate(self):
return True
def doc_to_decontamination_query(self, doc):
return doc.context
def doc_to_target(self, doc):
return doc.completion
......
......@@ -93,6 +93,12 @@ class Asdiv(Task):
# TODO: add solution-type
return doc['body'] + '\n' + 'Question:' + doc['question'] + '\n' + 'Answer:'
def should_decontaminate(self):
return True
def doc_to_decontamination_query(self, doc):
return doc['body'] + " " + doc['question']
def doc_to_target(self, doc):
# TODO: add formula
......
......@@ -47,6 +47,12 @@ class BlimpTask(HFTask):
# this method is invoked by tests only
return ""
def should_decontaminate(self):
return True
def doc_to_decontamination_query(self, doc):
return doc["sentence_good"] + " " + doc["sentence_bad"]
def doc_to_target(self, doc):
# this method is invoked by tests only
return ""
......
......@@ -38,6 +38,13 @@ class CBTBase(HFTask):
text = "Passage: " + passage + "\nQuestion: " + doc["question"]
return self.detokenize(text)
def should_decontaminate(self):
return True
def doc_to_decontamination_query(self, doc):
passage = " ".join(doc["sentences"])
return passage
def doc_to_target(self, doc):
return ""
......
......@@ -47,6 +47,12 @@ class CoQA(Task):
doc_text += question + answer
return doc_text
def should_decontaminate(self):
return True
def doc_to_decontamination_query(self, doc):
return doc["story"] + " " + doc["questions"]
@classmethod
def get_answers(cls, doc, turn_id):
# Returns unique answers and valid alternatives (Some questions in CoQA have multiple valid answers).
......
......@@ -87,6 +87,12 @@ class DROP(Task):
def doc_to_text(self, doc):
return f"Passage: {doc['passage']}\nQuestion: {doc['question']}\nAnswer:"
def should_decontaminate(self):
return True
def doc_to_decontamination_query(self, doc):
return doc['passage'] + " " + doc['question']
def doc_to_target(self, doc):
return " " + ", ".join(doc["answers"][0])
......
......@@ -24,6 +24,12 @@ class CoLA(HFTask):
def doc_to_text(self, doc):
return "{}\nQuestion: Does this sentence make sense?\nAnswer:".format(doc["sentence"])
def should_decontaminate(self):
return True
def doc_to_decontamination_query(self, doc):
return doc["sentence"]
def doc_to_target(self, doc):
return " {}".format({1: "yes", 0: "no"}[doc["label"]])
......
......@@ -27,6 +27,12 @@ class HeadQABase(HFTask, MultipleChoiceTask):
def doc_to_text(self, doc):
return doc["query"]
def should_decontaminate(self):
return True
def doc_to_decontamination_query(self, doc):
return doc["query"]
class HeadQAEn(HeadQABase):
DATASET_NAME = "en"
......@@ -39,4 +45,4 @@ class HeadQAEsDeprecated(HeadQABase):
def __init__(self):
super().__init__()
print("WARNING: headqa is deprecated. Please use headqa_es or headqa_en instead. See https://github.com/EleutherAI/lm-evaluation-harness/pull/240 for more info.")
\ No newline at end of file
print("WARNING: headqa is deprecated. Please use headqa_es or headqa_en instead. See https://github.com/EleutherAI/lm-evaluation-harness/pull/240 for more info.")
......@@ -98,6 +98,12 @@ class EthicsCM(Ethics):
def doc_to_text(self, doc):
return "{}\nQuestion: Is this wrong?\nAnswer:".format(doc[1])
def should_decontaminate(self):
return True
def doc_to_decontamination_query(self, doc):
return doc[1]
def doc_to_target(self, doc):
return " {}".format(yesno(int(doc[0])))
......@@ -138,6 +144,12 @@ class EthicsDeontology(Ethics):
prompt = " ".join([doc[1], doc[2]])
return "Question: Would most people believe this reasonable or unreasonable to say? \"{}\"\nAnswer:".format(prompt)
def should_decontaminate(self):
return True
def doc_to_decontamination_query(self, doc):
return " ".join([doc[1], doc[2]])
def doc_to_target(self, doc):
target = ["unreasonable", "reasonable"][int(doc[0])]
return " {}".format(target)
......@@ -187,6 +199,12 @@ class EthicsJustice(Ethics):
def doc_to_text(self, doc):
return "Question: Would most people believe this reasonable or unreasonable to say? \"{}\"\nAnswer:".format(doc[1])
def should_decontaminate(self):
return True
def doc_to_decontamination_query(self, doc):
return doc[1]
def doc_to_target(self, doc):
target = ["unreasonable", "reasonable"][int(doc[0])]
return " {}".format(target)
......@@ -253,6 +271,12 @@ class EthicsUtilitarianismOriginal(Ethics):
def doc_to_text(self, doc):
return 'Activity: "{}"\nRating:'.format(doc["activity"])
def should_decontaminate(self):
return True
def doc_to_decontamination_query(self, doc):
return doc["activity"]
def doc_to_target(self, doc):
return " " + doc["rating"]
......
......@@ -58,6 +58,12 @@ class Math(Task):
def doc_to_text(self, doc):
return "Problem: " + doc["problem"] + "\nAnswer:"
def should_decontaminate(self):
return True
def doc_to_decontamination_query(self, doc):
return doc["problem"]
def doc_to_target(self, doc):
return " " + doc["answer"]
......
......@@ -116,3 +116,10 @@ class GeneralHendrycksTest(MultipleChoiceTask):
def doc_to_text(self, doc):
return doc["query"]
def should_decontaminate(self):
return True
def doc_to_decontamination_query(self, doc):
return doc["query"]
......@@ -11,5 +11,11 @@ class LAMBADA_cloze(LAMBADA):
def doc_to_text(self, doc):
return doc['text'].rsplit(' ', 1)[0] + " ____. ->"
def should_decontaminate(self):
return True
def doc_to_decontamination_query(self, doc):
return doc['text']
def doc_to_target(self, doc):
return " " + doc['text'].rsplit(' ', 1)[1]
......@@ -43,6 +43,12 @@ class MCTACO(HFTask):
return f"{doc['sentence']}\nQuestion: {doc['question']}\n"\
f"Answer: {doc['answer']}\nPlausible:"
def should_decontaminate(self):
return True
def doc_to_decontamination_query(self, doc):
return doc['question'] + " " + doc['sentence']
def doc_to_target(self, doc):
return " " + ["no", "yes"][doc['label']]
......
......@@ -73,6 +73,12 @@ class MuTualBase(Task):
def doc_to_text(self, doc):
return self.detokenize(doc["article"])
def should_decontaminate(self):
return True
def doc_to_decontamination_query(self, doc):
return doc["article"]
def doc_to_target(self, doc):
return " " + self.detokenize(doc["options"][self.CHOICES.index(doc["answers"])])
......
......@@ -36,6 +36,12 @@ class NaturalQs(HFTask):
def doc_to_text(self, doc):
return 'Q: ' + doc['question']['text'] + '\n\n' + 'A:'
def should_decontaminate(self):
return True
def doc_to_decontamination_query(self, doc):
return doc['question']['text']
def doc_to_target(self, doc):
# There's a short answer and a long answer. Based on the paper, I'm using the long answer.
short_answer = doc['annotations']['short_answers'][0]['text']
......
......@@ -27,3 +27,10 @@ class OpenBookQA(HFTask, MultipleChoiceTask):
def doc_to_text(self, doc):
return doc["query"]
def should_decontaminate(self):
return True
def doc_to_decontamination_query(self, doc):
return doc["query"]
......@@ -55,3 +55,10 @@ class PROST(HFTask, MultipleChoiceTask):
def doc_to_text(self, doc):
return doc["query"]
def should_decontaminate(self):
return True
def doc_to_decontamination_query(self, doc):
return doc["query"]
......@@ -73,6 +73,13 @@ class QA4MRE(MultipleChoiceTask):
def doc_to_text(self, doc):
return "{}\nQuestion: {}\nAnswer:".format(doc["source"], doc["query"])
def should_decontaminate(self):
return True
def doc_to_decontamination_query(self, doc):
return doc["source"] + " " + doc["query"]
class QA4MRE_2011(QA4MRE):
YEAR = 2011
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment