"092/benchmark_serving.py" did not exist on "77765e10b1637256b4ae6d4badeb22d2b5c51c26"
Commit c3f724cf authored by Leo Gao's avatar Leo Gao
Browse files

Change glue and superglue prompts

parent 1050109b
...@@ -3,7 +3,7 @@ from lm_eval.base import rf, mean, f1_score, matthews_corrcoef ...@@ -3,7 +3,7 @@ from lm_eval.base import rf, mean, f1_score, matthews_corrcoef
from scipy.stats import pearsonr, spearmanr from scipy.stats import pearsonr, spearmanr
from tqdm import auto as tqdm_lib from tqdm import auto as tqdm_lib
from . common import HFTask, yesno from . common import HFTask, yesno
from ..utils import general_detokenize
# Single-Sentence Tasks # Single-Sentence Tasks
...@@ -22,10 +22,10 @@ class CoLA(HFTask): ...@@ -22,10 +22,10 @@ class CoLA(HFTask):
return True return True
def fewshot_description(self): def fewshot_description(self):
return "Does this sentence make sense?:\tTrue or False?" return "Does this sentence make sense? (True or False)"
def doc_to_text(self, doc): def doc_to_text(self, doc):
return "Sentence: {}\nAnswer:".format(doc["sentence"]) return "{}\nQuestion: Does this sentence make sense?\nAnswer:".format(doc["sentence"])
def doc_to_target(self, doc): def doc_to_target(self, doc):
return " {}".format({1: "True", 0: "False"}[doc["label"]]) return " {}".format({1: "True", 0: "False"}[doc["label"]])
...@@ -71,8 +71,8 @@ class SST(HFTask): ...@@ -71,8 +71,8 @@ class SST(HFTask):
return "Indicate if each sentence is Positive or Negative." return "Indicate if each sentence is Positive or Negative."
def doc_to_text(self, doc): def doc_to_text(self, doc):
return "sentence:\t{}\t\nanswer:".format( return "{}\nQuestion: Is this sentence Positive or Negative?\nAnswer:".format(
doc["sentence"], general_detokenize(doc["sentence"]),
) )
def doc_to_target(self, doc): def doc_to_target(self, doc):
...@@ -127,9 +127,9 @@ class MNLI(HFTask): ...@@ -127,9 +127,9 @@ class MNLI(HFTask):
return self.data["test_matched"] return self.data["test_matched"]
def doc_to_text(self, doc): def doc_to_text(self, doc):
return "{}\nquestion:\t{}\tTrue, False or Neither?\nanswer:".format( return "{}\nQuestion: {} True, False or Neither?\nAnswer:".format(
doc["premise"], doc["premise"],
doc["hypothesis"], doc["hypothesis"] + ('' if doc["hypothesis"].endswith('.') else '.'),
) )
def doc_to_target(self, doc): def doc_to_target(self, doc):
...@@ -187,7 +187,7 @@ class QNLI(HFTask): ...@@ -187,7 +187,7 @@ class QNLI(HFTask):
return True return True
def doc_to_text(self, doc): def doc_to_text(self, doc):
return "question:\t{}\nresponse:\t{}\nDoes this answer the question, Yes or No?:".format( return "{}\n{}\nQuestion: Does this response answer the question?\nAnswer:".format(
doc["question"], doc["question"],
doc["sentence"], doc["sentence"],
) )
...@@ -235,7 +235,7 @@ class WNLI(HFTask): ...@@ -235,7 +235,7 @@ class WNLI(HFTask):
return True return True
def doc_to_text(self, doc): def doc_to_text(self, doc):
return "{}\nquestion:\t{}\tTrue, False or Neither?\nanswer:".format( return "{}\nQuestion: {} True, False or Neither?\nAnswer:".format(
doc["sentence1"], doc["sentence1"],
doc["sentence2"], doc["sentence2"],
) )
...@@ -284,7 +284,7 @@ class RTE(HFTask): ...@@ -284,7 +284,7 @@ class RTE(HFTask):
return True return True
def doc_to_text(self, doc): def doc_to_text(self, doc):
return "{}\nquestion:\t{}\tTrue or False?\nanswer:".format( return "{}\nQuestion: {} True or False?\nAnswer:".format(
doc["sentence1"], doc["sentence1"],
doc["sentence2"], doc["sentence2"],
) )
...@@ -338,17 +338,17 @@ class MRPC(HFTask): ...@@ -338,17 +338,17 @@ class MRPC(HFTask):
return "Indicate if both sentences mean the same thing." return "Indicate if both sentences mean the same thing."
def doc_to_text(self, doc): def doc_to_text(self, doc):
return "sentence 1:\t{}\nsentence 2:\t{}\nanswer:".format( return "Sentence 1: {}\nSentence 2: {}\nQuestion: Do both sentences mean the same thing?\nAnswer:".format(
doc["sentence1"], general_detokenize(doc["sentence1"]),
doc["sentence2"], general_detokenize(doc["sentence2"]),
) )
def doc_to_target(self, doc): def doc_to_target(self, doc):
return " {}".format(yesno(doc["label"])) return " {}".format(yesno(doc["label"]))
def construct_requests(self, doc, ctx): def construct_requests(self, doc, ctx):
ll_yes, _ = rf.loglikelihood(ctx, " yes") ll_yes, _ = rf.loglikelihood(ctx, " Yes")
ll_no, _ = rf.loglikelihood(ctx, " no") ll_no, _ = rf.loglikelihood(ctx, " No")
return ll_yes, ll_no return ll_yes, ll_no
def process_results(self, doc, results): def process_results(self, doc, results):
...@@ -390,7 +390,7 @@ class QQP(HFTask): ...@@ -390,7 +390,7 @@ class QQP(HFTask):
return "Indicate if both questions ask the same thing." return "Indicate if both questions ask the same thing."
def doc_to_text(self, doc): def doc_to_text(self, doc):
return "question 1:\t{}\nquestion 2:\t{}\nanswer:".format( return "Question 1: {}\nQuestion 2: {}\nQuestion: Do both questions ask the same thing?\nAnswer:".format(
doc["question1"], doc["question1"],
doc["question2"], doc["question2"],
) )
...@@ -443,7 +443,7 @@ class STSB(HFTask): ...@@ -443,7 +443,7 @@ class STSB(HFTask):
"where 5 means identical and 0 means unrelated." "where 5 means identical and 0 means unrelated."
def doc_to_text(self, doc): def doc_to_text(self, doc):
return "sentence 1:\t{}\nsentence 2:\t{}\nanswer:".format( return "sentence 1: {}\nsentence 2: {}\nAnswer:".format(
doc["sentence1"], doc["sentence1"],
doc["sentence2"], doc["sentence2"],
) )
......
...@@ -28,7 +28,7 @@ class BoolQ(HFTask): ...@@ -28,7 +28,7 @@ class BoolQ(HFTask):
return "Read the following passages and answer each question with a yes or a no." return "Read the following passages and answer each question with a yes or a no."
def doc_to_text(self, doc): def doc_to_text(self, doc):
return f"{doc['passage']}\nquestion: {doc['question']}\nanswer:" return f"{doc['passage']}\nQuestion: {doc['question']}\nAnswer:"
def doc_to_target(self, doc): def doc_to_target(self, doc):
return " " + yesno(doc['label']) return " " + yesno(doc['label'])
...@@ -80,7 +80,7 @@ class CommitmentBank(HFTask): ...@@ -80,7 +80,7 @@ class CommitmentBank(HFTask):
"to the truth of the hypothesis. The three possible labels are true, false or neither." "to the truth of the hypothesis. The three possible labels are true, false or neither."
def doc_to_text(self, doc): def doc_to_text(self, doc):
return "{}\nquestion: {} true, false or neither?\nanswer:".format( return "{}\nQuestion: {}. True, False or Neither?\nAnswer:".format(
doc["premise"], doc["premise"],
doc["hypothesis"], doc["hypothesis"],
) )
...@@ -89,12 +89,12 @@ class CommitmentBank(HFTask): ...@@ -89,12 +89,12 @@ class CommitmentBank(HFTask):
# True = entailment # True = entailment
# False = contradiction # False = contradiction
# Neither = neutral # Neither = neutral
return " {}".format({0: "true", 1: "neither", 2: "false"}[doc["label"]]) return " {}".format({0: "True", 1: "Neither", 2: "False"}[doc["label"]])
def construct_requests(self, doc, ctx): def construct_requests(self, doc, ctx):
ll_true, _ = rf.loglikelihood(ctx, ' true') ll_true, _ = rf.loglikelihood(ctx, ' True')
ll_neither, _ = rf.loglikelihood(ctx, ' neither') ll_neither, _ = rf.loglikelihood(ctx, ' Neither')
ll_false, _ = rf.loglikelihood(ctx, ' false') ll_false, _ = rf.loglikelihood(ctx, ' False')
return ll_true, ll_neither, ll_false return ll_true, ll_neither, ll_false
...@@ -214,15 +214,15 @@ class MultiRC(HFTask): ...@@ -214,15 +214,15 @@ class MultiRC(HFTask):
return "READING COMPREHENSION ANSWER KEY" return "READING COMPREHENSION ANSWER KEY"
def doc_to_text(self, doc): def doc_to_text(self, doc):
return f"{doc['paragraph']}\n\n{doc['question']}\n" return f"{doc['paragraph']}\nQuestion: {doc['question']}\nAnswer:"
def doc_to_target(self, doc): def doc_to_target(self, doc):
return self.format_answer(answer=doc["answer"], label=doc["label"]) return self.format_answer(answer=doc["answer"], label=doc["label"])
@staticmethod @staticmethod
def format_answer(answer, label): def format_answer(answer, label):
label_str = "True" if label else "False" label_str = "Yes" if label else "No"
return f"[{label_str}] {answer}" return f"{label_str}, {answer}"
def construct_requests(self, doc, ctx): def construct_requests(self, doc, ctx):
true_choice = self.format_answer(answer=doc["answer"], label=True) true_choice = self.format_answer(answer=doc["answer"], label=True)
...@@ -364,8 +364,8 @@ class WordsInContext(HFTask): ...@@ -364,8 +364,8 @@ class WordsInContext(HFTask):
return "" return ""
def doc_to_text(self, doc): def doc_to_text(self, doc):
return "{}\n{}\nQuestion: Is the word '{}' used in the same way in the" \ return "Sentence 1: {}\nSentence 2: {}\nQuestion: Is the word '{}' used in the same way in the" \
" two sentences above?\nanswer:".format( " two sentences above?\nAnswer:".format(
doc["sentence1"], doc["sentence1"],
doc["sentence2"], doc["sentence2"],
doc["sentence1"][doc["start1"]:doc["end1"]], doc["sentence1"][doc["start1"]:doc["end1"]],
...@@ -438,7 +438,7 @@ class SGWinogradSchemaChallenge(HFTask): ...@@ -438,7 +438,7 @@ class SGWinogradSchemaChallenge(HFTask):
# NOTE: HuggingFace span indices are word-based not character-based. # NOTE: HuggingFace span indices are word-based not character-based.
pre = " ".join(raw_passage.split()[:doc["span2_index"]]) pre = " ".join(raw_passage.split()[:doc["span2_index"]])
post = raw_passage[len(pre) + len(doc["span2_text"]) + 1:] post = raw_passage[len(pre) + len(doc["span2_text"]) + 1:]
passage = pre + " *{}*".format(doc['span2_text']) + post passage = general_detokenize(pre + " *{}*".format(doc['span2_text']) + post)
noun = doc["span1_text"] noun = doc["span1_text"]
pronoun = doc["span2_text"] pronoun = doc["span2_text"]
text = ( text = (
......
import os import os
import re
class ExitCodeError(Exception): class ExitCodeError(Exception):
...@@ -39,4 +40,13 @@ def chunks(iter, n): ...@@ -39,4 +40,13 @@ def chunks(iter, n):
yield arr yield arr
arr = [] arr = []
if arr: yield arr if arr: yield arr
\ No newline at end of file
def general_detokenize(string):
string = string.replace(" n't", "n't")
string = string.replace(" )", ")")
string = string.replace("( ", "(")
string = string.replace("\" ", "\"")
string = string.replace(" \"", "\"")
string = re.sub(r" (['.,])", r"\1")
return string
\ No newline at end of file
...@@ -16,7 +16,7 @@ def parse_args(): ...@@ -16,7 +16,7 @@ def parse_args():
parser.add_argument('--model_args', default="") parser.add_argument('--model_args', default="")
parser.add_argument('--tasks', default="all_tasks") parser.add_argument('--tasks', default="all_tasks")
parser.add_argument('--provide_description', action="store_true") parser.add_argument('--provide_description', action="store_true")
parser.add_argument('--num_fewshot', type=int, default=1) parser.add_argument('--num_fewshot', type=int, default=0)
parser.add_argument('--seed', type=int, default=1234) parser.add_argument('--seed', type=int, default=1234)
parser.add_argument('--output_path', default=None) parser.add_argument('--output_path', default=None)
parser.add_argument('--limit', type=int, default=None) parser.add_argument('--limit', type=int, default=None)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment