Unverified Commit 0e4139b8 authored by Leo Gao's avatar Leo Gao Committed by GitHub
Browse files

Merge pull request #86 from jon-tow/doc-to-text-refactor

Move `doc_to_text` target code into `doc_to_target`
parents e5d0229f d77241eb
......@@ -34,16 +34,16 @@ class ANLIBase(HFTask):
# TODO: figure out description
return ""
def doc_to_text(self, doc, include_target=True):
def doc_to_text(self, doc):
print(doc)
# OA does this a bit weirdly: they prepend "anli 1: anli 1: " to the beginning
# of the prompt (yes, repeating it!). also, " True, False, or Neither?" is directly
# appended onto the question, with no "Answer:" or even a newline. Do we *really*
# want to do it exactly as OA did?
q = doc['premise'] + '\nQuestion: ' + doc['hypothesis'] + '\n'
return doc['premise'] + '\nQuestion: ' + doc['hypothesis'] + '\nTrue, False, or Neither?'
a = "True, False, or Neither?" + ((" " + ["True", "Neither", "False"][doc['label']]) if include_target else '')
return q + a
def doc_to_target(self, doc):
return " " + ["True", "Neither", "False"][doc['label']]
# TODO: Implement evaluation code
......
......@@ -19,10 +19,11 @@ class ARCEasy(HFTask):
# TODO: figure out description
return ""
def doc_to_text(self, doc, include_target=True):
q = "Question: " + doc['question'] + '\n'
a = "Answer:" + ((" " + doc['choices']['text'][doc['choices']['label'].index(doc['answerKey'])]) if include_target else "")
return q + a
def doc_to_text(self, doc):
return "Question: " + doc['question'] + '\nAnswer:'
def doc_to_target(self, doc):
return " " + doc['choices']['text'][doc['choices']['label'].index(doc['answerKey'])]
def evaluate(self, docs, lm, provide_description, num_fewshot):
# TODO: implement
......
......@@ -39,11 +39,11 @@ class CoLA(HFTask):
def fewshot_description(self):
return "Does this sentence make sense?:\tTrue or False?"
def doc_to_text(self, doc, include_target=True):
text = "Sentence: {}\nAnswer:".format(doc["sentence"])
if include_target:
text += " {}".format({1: "True", 0: "False"}[doc["label"]])
return text
def doc_to_text(self, doc):
return "Sentence: {}\nAnswer:".format(doc["sentence"])
def doc_to_target(self, doc):
return " {}".format({1: "True", 0: "False"}[doc["label"]])
def evaluate(self, docs, lm, provide_description, num_fewshot):
......@@ -92,17 +92,17 @@ class MNLI(HFTask):
if self.has_test_docs():
return self.data["test_matched"]
def doc_to_text(self, doc, include_target=True):
text = "{}\nquestion:\t{}\tTrue, False or Neither?\nanswer:".format(
def doc_to_text(self, doc):
return "{}\nquestion:\t{}\tTrue, False or Neither?\nanswer:".format(
doc["premise"],
doc["hypothesis"],
)
if include_target:
# True = entailment
# False = contradiction
# Neither = neutral
text += " {}".format({0: "True", 1: "Neither", 2: "False"}[doc["label"]])
return text
def doc_to_target(self, doc):
# True = entailment
# False = contradiction
# Neither = neutral
return " {}".format({0: "True", 1: "Neither", 2: "False"}[doc["label"]])
def evaluate(self, docs, lm, provide_description, num_fewshot):
# TODO: Implement evaluation code using new framework
......@@ -154,14 +154,14 @@ class MRPC(HFTask):
def fewshot_description(self):
return "Indicate if both sentences mean the same thing."
def doc_to_text(self, doc, include_target=True):
text = "sentence 1:\t{}\nsentence 2:\t{}\nanswer:".format(
def doc_to_text(self, doc):
return "sentence 1:\t{}\nsentence 2:\t{}\nanswer:".format(
doc["sentence1"],
doc["sentence2"],
)
if include_target:
text += " {}".format(yesno(doc["label"]))
return text
def doc_to_target(self, doc):
return " {}".format(yesno(doc["label"]))
def evaluate(self, docs, lm, provide_description, num_fewshot):
# TODO: Implement evaluation code using new framework
......@@ -194,16 +194,16 @@ class RTE(HFTask):
def has_test_docs(self):
return True
def doc_to_text(self, doc, include_target=True):
text = "{}\nquestion:\t{}\tTrue or False?\nanswer:".format(
def doc_to_text(self, doc):
return "{}\nquestion:\t{}\tTrue or False?\nanswer:".format(
doc["sentence1"],
doc["sentence2"],
)
if include_target:
# 0 = entailment
# 1 = not_entailment
text += " {}".format({0: "True", 1: "False"}[doc["label"]])
return text
def doc_to_target(self, doc):
# 0 = entailment
# 1 = not_entailment
return " {}".format({0: "True", 1: "False"}[doc["label"]])
def evaluate(self, docs, lm, provide_description, num_fewshot):
# TODO: Implement evaluation code using new framework
......@@ -236,16 +236,16 @@ class QNLI(HFTask):
def has_test_docs(self):
return True
def doc_to_text(self, doc, include_target=True):
text = "question:\t{}\nresponse:\t{}\nDoes this answer the question, Yes or No?:".format(
def doc_to_text(self, doc):
return "question:\t{}\nresponse:\t{}\nDoes this answer the question, Yes or No?:".format(
doc["question"],
doc["sentence"],
)
if include_target:
# True = entailment
# False = not entailment
text += " {}".format({0: "Yes", 1: "No"}[doc["label"]])
return text
def doc_to_target(self, doc):
# True = entailment
# False = not entailment
return " {}".format({0: "Yes", 1: "No"}[doc["label"]])
def evaluate(self, docs, lm, provide_description, num_fewshot):
# TODO: Implement evaluation code using new framework
......@@ -281,14 +281,14 @@ class QQP(HFTask):
def fewshot_description(self):
return "Indicate if both questions ask the same thing."
def doc_to_text(self, doc, include_target=True):
text = "question 1:\t{}\nquestion 2:\t{}\nanswer:".format(
def doc_to_text(self, doc):
return "question 1:\t{}\nquestion 2:\t{}\nanswer:".format(
doc["question1"],
doc["question2"],
)
if include_target:
text += " {}".format(yesno(doc["label"]))
return text
def doc_to_target(self, doc):
return " {}".format(yesno(doc["label"]))
def evaluate(self, docs, lm, provide_description, num_fewshot):
# TODO: Implement evaluation code using new framework
......@@ -325,14 +325,14 @@ class STSB(HFTask):
return "Indicate if both sentences mean the same thing from a scale of 0-5, " \
"where 5 means identical and 0 means unrelated."
def doc_to_text(self, doc, include_target=True):
text = "sentence 1:\t{}\nsentence 2:\t{}\nanswer:".format(
def doc_to_text(self, doc):
return "sentence 1:\t{}\nsentence 2:\t{}\nanswer:".format(
doc["sentence1"],
doc["sentence2"],
)
if include_target:
text += " {}".format(doc["label"])
return text
def doc_to_target(self, doc):
return " {}".format(doc["label"])
def evaluate(self, docs, lm, provide_description, num_fewshot):
# TODO: Implement evaluation code using new framework
......@@ -386,13 +386,13 @@ class SST(HFTask):
def fewshot_description(self):
return "Indicate if each sentence is Positive or Negative."
def doc_to_text(self, doc, include_target=True):
text = "sentence:\t{}\t\nanswer:".format(
def doc_to_text(self, doc):
return "sentence:\t{}\t\nanswer:".format(
doc["sentence"],
)
if include_target:
text += " {}".format({1: "Positive", 0: "Negative"}[doc["label"]])
return text
def doc_to_target(self, doc):
return " {}".format({1: "Positive", 0: "Negative"}[doc["label"]])
def evaluate(self, docs, lm, provide_description, num_fewshot):
# TODO: Implement evaluation code using new framework
......@@ -425,17 +425,17 @@ class WNLI(HFTask):
def has_test_docs(self):
return True
def doc_to_text(self, doc, include_target=True):
text = "{}\nquestion:\t{}\tTrue, False or Neither?\nanswer:".format(
def doc_to_text(self, doc):
return "{}\nquestion:\t{}\tTrue, False or Neither?\nanswer:".format(
doc["sentence1"],
doc["sentence2"],
)
if include_target:
# True = entailment
# False = contradiction
# Neither = neutral
text += " {}".format({0: "True", 1: "Neither", 2: "False"}[doc["label"]])
return text
def doc_to_target(self, doc):
# True = entailment
# False = contradiction
# Neither = neutral
return " {}".format({0: "True", 1: "Neither", 2: "False"}[doc["label"]])
def evaluate(self, docs, lm, provide_description, num_fewshot):
# TODO: Implement evaluation code using new framework
......
......@@ -34,22 +34,22 @@ class HellaSwag(HFTask):
def fewshot_description(self):
return "Label for the relevant action: Sentences describing the context, with an incomplete sentence trailing\nanswer that plausibly completes the situation."
def doc_to_text(self, doc, include_target=True):
text = doc['activity_label'] + ': ' + doc['ctx'] + '\n'
if include_target:
letter_answer = doc['label']
if letter_answer == '0':
index = 0
elif letter_answer == '1':
index = 1
elif letter_answer == '2':
index = 2
elif letter_answer == '3':
index = 3
else:
raise ValueError("HellaSwag from HF datasets contained an invalid answer key")
text += doc['endings'][index]
return text
def doc_to_text(self, doc):
return doc['activity_label'] + ': ' + doc['ctx'] + '\n'
def doc_to_target(self, doc):
letter_answer = doc['label']
if letter_answer == '0':
index = 0
elif letter_answer == '1':
index = 1
elif letter_answer == '2':
index = 2
elif letter_answer == '3':
index = 3
else:
raise ValueError("HellaSwag from HF datasets contained an invalid answer key")
return doc['endings'][index]
# TODO: Implement evaluation code
......
......@@ -32,23 +32,19 @@ class NaturalQs(HFTask):
return random.sample(self._traindocs, k)
def doc_to_text(self, doc, include_target=True):
question = doc['question']['text']
text = 'Q: ' + question + '\n\n' + 'A: '
if include_target:
# There's a short answer and a long answer. Based on the paper, I'm using the long answer.
short_answer = doc['annotations']['short_answers'][0]['text']
long_answer_start = doc['annotations']['long_answer'][0]['start_token']
long_answer_end = doc['annotations']['long_answer'][0]['end_token']
long_answer_span = doc['document']['tokens']['token'][long_answer_start:long_answer_end]
long_answer_is_html = doc['document']['tokens']['is_html'][long_answer_start:long_answer_end]
long_answer_chars = [tok for (tok, is_html) in zip(long_answer_span, long_answer_is_html) if not is_html]
long_answer = " ".join(long_answer_chars)
text += long_answer # Replace with short_answer[0] for short answer
return text
def doc_to_text(self, doc):
return 'Q: ' + doc['question']['text'] + '\n\n' + 'A: '
def doc_to_target(self, doc):
# There's a short answer and a long answer. Based on the paper, I'm using the long answer.
short_answer = doc['annotations']['short_answers'][0]['text']
long_answer_start = doc['annotations']['long_answer'][0]['start_token']
long_answer_end = doc['annotations']['long_answer'][0]['end_token']
long_answer_span = doc['document']['tokens']['token'][long_answer_start:long_answer_end]
long_answer_is_html = doc['document']['tokens']['is_html'][long_answer_start:long_answer_end]
long_answer_chars = [tok for (tok, is_html) in zip(long_answer_span, long_answer_is_html) if not is_html]
long_answer = " ".join(long_answer_chars)
return long_answer # Replace with short_answer[0] for short answer
# TODO: Implement evaluation code
......
......@@ -36,22 +36,22 @@ class OpenBookQA(HFTask):
def fewshot_description(self):
return "Text of the question prompt\nText of the answer completion"
def doc_to_text(self, doc, include_target=True):
text = doc['question_stem'] + '\n'
if include_target:
letter_answer = doc['answerKey']
if letter_answer == 'A':
index = 0
elif letter_answer == 'B':
index = 1
elif letter_answer == 'C':
index = 2
elif letter_answer == 'D':
index = 3
else:
raise ValueError("OpenBookQA from HF datasets contained an invalid answer key")
text += doc['choices']['text'][index] + '.'
return text
def doc_to_text(self, doc):
return doc['question_stem'] + '\n'
def doc_to_target(self, doc):
letter_answer = doc['answerKey']
if letter_answer == 'A':
index = 0
elif letter_answer == 'B':
index = 1
elif letter_answer == 'C':
index = 2
elif letter_answer == 'D':
index = 3
else:
raise ValueError("OpenBookQA from HF datasets contained an invalid answer key")
return doc['choices']['text'][index] + '.'
# TODO: Implement evaluation code
......
......@@ -47,13 +47,14 @@ class PiQA(Dataset):
def fewshot_description(self):
pass
def doc_to_text(self, doc, include_target=True):
if include_target:
rightanswer = int(doc[1][0])+1
return ''.join([doc[0]['goal'],' ',doc[0]['sol'+str(rightanswer)]])
def doc_to_text(self, doc):
#TODO: check if oa uses newline
return doc['goal'] + ' '
def doc_to_target(self, doc):
rightanswer = int(doc[1][0]) + 1
return ''.join([doc[0]['goal'],' ',doc[0]['sol'+str(rightanswer)]])
# TODO: Implement evaluation code
# ***IMPORTANT***: this evaluation function needs to be written for the new framework.
......
......@@ -55,11 +55,11 @@ class QuAC(Dataset):
docs.append(doc)
return docs
def doc_to_text(self, doc, include_target=True):
text = 'TITLE: ' + doc['title'] + '\n' + 'PARAGRAPH: ' + doc['paragraph'] + '\n\n' + 'Q: ' + doc['question'] + '\n\n' + 'A: '
if include_target:
text += doc['answer']
return text
def doc_to_text(self, doc):
return 'TITLE: ' + doc['title'] + '\n' + 'PARAGRAPH: ' + doc['paragraph'] + '\n\n' + 'Q: ' + doc['question'] + '\n\n' + 'A: '
def doc_to_target(self, doc):
return doc['answer']
# TODO: Implement evaluation code
......
......@@ -31,16 +31,16 @@ class SQuAD(HFTask):
# TODO: redo description
return "Title: The_Title_of_It\n\nBackground: A text passage as background to answer the question with.\n\nQ: Question about the passage.\n\nA: Answer."
def doc_to_text(self, doc, include_target=True):
text = 'Title: ' + doc['title'] + '\n\n' + 'Background: ' + doc['context'] + '\n\n' + 'Q: ' + doc['question'] + '\n\n' + 'A: '
if include_target:
answer_list = doc['answers']['text']
if len(answer_list) > 0:
answer = answer_list[0]
else:
answer = 'unanswerable'
text += answer
return text
def doc_to_text(self, doc):
return 'Title: ' + doc['title'] + '\n\n' + 'Background: ' + doc['context'] + '\n\n' + 'Q: ' + doc['question'] + '\n\n' + 'A: '
def doc_to_target(self, doc):
answer_list = doc['answers']['text']
if len(answer_list) > 0:
answer = answer_list[0]
else:
answer = 'unanswerable'
return answer
# TODO: Implement evaluation code
......
......@@ -41,11 +41,11 @@ class StoryCloze(Dataset):
def fewshot_description(self):
pass
def doc_to_text(self, doc, include_target=True):
if include_target:
return ' '.join([*doc[1:5],doc[int(doc[-1])-4]])
else:
return ' '.join([*doc[1:5]])
def doc_to_text(self, doc):
return ' '.join([*doc[1:5]])
def doc_to_target(self, doc):
return " " + doc[int(doc[-1]) - 4]
# TODO: Implement evaluation code
......
......@@ -69,17 +69,17 @@ class CommitmentBank(HFTask):
def has_test_docs(self):
return True
def doc_to_text(self, doc, include_target=True):
text = "{}\nquestion:\t{}\ttrue, false or neither?\nanswer:".format(
def doc_to_text(self, doc):
return "{}\nquestion:\t{}\ttrue, false or neither?\nanswer:".format(
doc["premise"],
doc["hypothesis"],
)
if include_target:
# True = entailment
# False = contradiction
# Neither = neutral
text += " {}".format({0: "true", 1: "neither", 2: "false"}[doc["label"]])
return text
def doc_to_target(self, doc):
# True = entailment
# False = contradiction
# Neither = neutral
return " {}".format({0: "true", 1: "neither", 2: "false"}[doc["label"]])
def evaluate(self, docs, lm, provide_description, num_fewshot):
# TODO: Implement evaluation code using new framework
......@@ -117,18 +117,18 @@ class Copa(HFTask):
def has_test_docs(self):
return True
def doc_to_text(self, doc, include_target=True):
def doc_to_text(self, doc):
# Drop the period
connector = {
"cause": "because",
"effect": "therefore",
}[doc["question"]]
text = doc["premise"].strip()[:-1] + f" {connector} "
if include_target:
correct_choice = doc["choice1"] if doc["label"] == 0 else doc["choice2"]
# Connect the sentences
text += self.convert_choice(correct_choice)
return text
return doc["premise"].strip()[:-1] + f" {connector} "
def doc_to_target(self, doc):
correct_choice = doc["choice1"] if doc["label"] == 0 else doc["choice2"]
# Connect the sentences
return self.convert_choice(correct_choice)
def evaluate(self, docs, lm, provide_description, num_fewshot):
# TODO: Implement evaluation code using new framework
......@@ -170,10 +170,11 @@ class MultiRC(HFTask):
def fewshot_description(self):
return "READING COMPREHENSION ANSWER KEY"
def doc_to_text(self, doc, include_target=True):
return f"{doc['paragraph']}\n\n{doc['question']}\n" \
+ (self.format_answer(answer=doc["answer"], label=doc["label"])
if include_target else "")
def doc_to_text(self, doc):
return f"{doc['paragraph']}\n\n{doc['question']}\n"
def doc_to_target(self, doc):
return self.format_answer(answer=doc["answer"], label=doc["label"])
@staticmethod
def format_answer(answer, label):
......@@ -229,16 +230,16 @@ class WordsInContext(HFTask):
def has_test_docs(self):
return True
def doc_to_text(self, doc, include_target=True):
text = "{}\n{}\nquestion\tIs the word '{}' used in the same way in the" \
def doc_to_text(self, doc):
return "{}\n{}\nquestion\tIs the word '{}' used in the same way in the" \
" two sentences above?\nanswer:".format(
doc["sentence1"],
doc["sentence2"],
doc["sentence1"][doc["start1"]:doc["end1"]],
)
if include_target:
text += " {}".format({0: "no", 1: "yes"}[doc["label"]])
return text
def doc_to_target(self, doc):
return " {}".format({0: "no", 1: "yes"}[doc["label"]])
def evaluate(self, docs, lm, provide_description, num_fewshot):
# TODO: Implement evaluation code using new framework
......@@ -288,7 +289,7 @@ class SGWinogradSchemaChallenge(HFTask):
"For each passage, you must identify which noun the pronoun marked in *bold*" \
" refers to.\n====="
def doc_to_text(self, doc, include_target=True):
def doc_to_text(self, doc):
raw_passage = doc["text"]
passage = (
raw_passage[:doc["span2_index"]]
......@@ -301,10 +302,11 @@ class SGWinogradSchemaChallenge(HFTask):
+ f"Question: In the passage above, what does the pronoun \"*{pronoun}*\" refer to?\n"
+ "Answer:"
)
if include_target:
text += " {}".format(doc["span1_text"])
return text
def doc_to_target(self, doc):
return " {}".format(doc["span1_text"])
def evaluate(self, docs, lm, provide_description, num_fewshot):
# TODO: Implement evaluation code using new framework
......@@ -336,16 +338,12 @@ class RTE(HFTask):
#TODO: implement
pass
def doc_to_text(self, doc, include_target=True):
if include_target:
if doc['label'] == 0:
answer = 'True'
else:
answer = 'False'
return ''.join([doc['premise'], '\nquestion: ',doc['hypothesis'], ' True or False?\nanswer: ', answer])
else:
return ''.join([doc['premise'], '\nquestion: ',doc['hypothesis'], ' True or False?\nanswer: '])
def doc_to_text(self, doc):
return ''.join([doc['premise'], '\nquestion: ',doc['hypothesis'], ' True or False?\nanswer: '])
def doc_to_target(self, doc):
return 'True' if doc['label'] == 0 else 'False'
# TODO: Implement evaluation code
# ***IMPORTANT***: this evaluation function needs to be written for the new framework.
......
......@@ -39,11 +39,11 @@ class TriviaQA(Dataset):
def fewshot_description(self):
pass
def doc_to_text(self, doc, include_target=True):
if include_target:
return ''.join(['Q: ', doc['Question'], '\n\n','A: ', doc['Answer']['Aliases'][0]])
else:
return ''.join(['Q: ', doc['Question'], '\n\n','A: '])
def doc_to_text(self, doc):
return ''.join(['Q: ', doc['Question'], '\n\n','A: '])
def doc_to_target(self, doc):
return doc['Answer']['Aliases'][0]
# TODO: Implement evaluation code
......
......@@ -19,15 +19,15 @@ class WebQs(HFTask):
# TODO: figure out description
return ""
def doc_to_text(self, doc, include_target=True):
def doc_to_text(self, doc):
print(doc)
q = "Q: " + doc['question'] + '\n'
return "Q: " + doc['question'] + '\nA:'
def doc_to_target(self, doc):
# this picks one answer to be the "correct" one, despite sometimes
# multiple correct answers being possible.
# TODO: make sure we're actually handling multi-answer correctly
a = "A:" + ((" " + doc['answers'][0]) if include_target else '')
return q + a
return " " + doc['answers'][0]
# TODO: Implement evaluation code
......
......@@ -34,18 +34,19 @@ class Winogrande(HFTask):
def fewshot_description(self):
return "Winograd schema sentence including a either a ___ blank with a missing word, making the pronoun ambiguous, or the same with the word filled in."
def doc_to_text(self, doc, include_target=True):
def doc_to_text(self, doc):
return doc['sentence']
def doc_to_target(self, doc):
text = doc['sentence']
if include_target:
answer_n = doc['answer']
if answer_n == '1':
answer = doc['option1']
elif answer_n == '2':
answer = doc['option2']
else:
raise ValueError("Winogrande from HF datasets contained an invalid answer key")
text = text.replace("_", answer)
return text
answer_n = doc['answer']
if answer_n == '1':
answer = doc['option1']
elif answer_n == '2':
answer = doc['option2']
else:
raise ValueError("Winogrande from HF datasets contained an invalid answer key")
return text.replace("_", answer)
# TODO: Implement evaluation code
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment