"test/git@developer.sourcefind.cn:gaoqiong/migraphx.git" did not exist on "123f7a01d8f63d18d0a3a5433b6eed3d0749f07e"
Commit c93093b6 authored by cjlovering's avatar cjlovering
Browse files

Removed the default option for an acc task

parent 4ae2ab37
...@@ -54,16 +54,6 @@ class BoolQ(PromptSourceTask): ...@@ -54,16 +54,6 @@ class BoolQ(PromptSourceTask):
def validation_docs(self): def validation_docs(self):
return self.dataset["validation"] return self.dataset["validation"]
def higher_is_better(self):
return {
"acc": True
}
def aggregation(self):
return {
"acc": mean
}
class CommitmentBank(PromptSourceTask): class CommitmentBank(PromptSourceTask):
VERSION = 1 VERSION = 1
...@@ -90,18 +80,12 @@ class CommitmentBank(PromptSourceTask): ...@@ -90,18 +80,12 @@ class CommitmentBank(PromptSourceTask):
def process_results(self, doc, results): def process_results(self, doc, results):
gold = doc["label"] gold = doc["label"]
pred = np.argmax(results) pred = np.argmax(results)
acc = 1. if pred == gold else 0. acc = 1.0 if pred == gold else 0.0
return {"acc": acc, "f1": (pred, gold)}
return {
"acc": acc,
"f1": (pred, gold)
}
def higher_is_better(self): def higher_is_better(self):
return { return {"acc": True, "f1": True}
"acc": True,
"f1": True
}
@classmethod @classmethod
def cb_multi_fi(cls, items): def cb_multi_fi(cls, items):
...@@ -113,7 +97,7 @@ class CommitmentBank(PromptSourceTask): ...@@ -113,7 +97,7 @@ class CommitmentBank(PromptSourceTask):
f13 = sklearn.metrics.f1_score(y_true=golds == 2, y_pred=preds == 2) f13 = sklearn.metrics.f1_score(y_true=golds == 2, y_pred=preds == 2)
avg_f1 = mean([f11, f12, f13]) avg_f1 = mean([f11, f12, f13])
return avg_f1 return avg_f1
def aggregation(self): def aggregation(self):
return { return {
"acc": mean, "acc": mean,
...@@ -146,21 +130,15 @@ class Copa(PromptSourceTask): ...@@ -146,21 +130,15 @@ class Copa(PromptSourceTask):
def process_results(self, doc, results): def process_results(self, doc, results):
gold = doc["label"] gold = doc["label"]
pred = np.argmax(results) pred = np.argmax(results)
acc = 1. if pred == gold else 0. acc = 1.0 if pred == gold else 0.0
return {"acc": acc}
return {
"acc": acc
}
def higher_is_better(self): def higher_is_better(self):
return { return {"acc": True}
"acc": True
}
def aggregation(self): def aggregation(self):
return { return {"acc": mean}
"acc": mean
}
@staticmethod @staticmethod
def convert_choice(choice): def convert_choice(choice):
...@@ -192,19 +170,13 @@ class MultiRC(PromptSourceTask): ...@@ -192,19 +170,13 @@ class MultiRC(PromptSourceTask):
def process_results(self, doc, results): def process_results(self, doc, results):
ll_true_choice, ll_false_choice = results ll_true_choice, ll_false_choice = results
pred = ll_true_choice > ll_false_choice pred = ll_true_choice > ll_false_choice
return { return {"acc": (pred, doc)}
"acc": (pred, doc)
}
def higher_is_better(self): def higher_is_better(self):
return { return {"acc": True}
"acc": True
}
def aggregation(self): def aggregation(self):
return { return {"acc": acc_all}
"acc": acc_all
}
class ReCoRD(PromptSourceTask): class ReCoRD(PromptSourceTask):
...@@ -255,8 +227,12 @@ class ReCoRD(PromptSourceTask): ...@@ -255,8 +227,12 @@ class ReCoRD(PromptSourceTask):
prediction = doc["entities"][max_idx] prediction = doc["entities"][max_idx]
gold_label_set = doc["answers"] gold_label_set = doc["answers"]
f1 = metric_max_over_ground_truths(squad_metrics.compute_f1, prediction, gold_label_set) f1 = metric_max_over_ground_truths(
em = metric_max_over_ground_truths(squad_metrics.compute_exact, prediction, gold_label_set) squad_metrics.compute_f1, prediction, gold_label_set
)
em = metric_max_over_ground_truths(
squad_metrics.compute_exact, prediction, gold_label_set
)
return { return {
"f1": f1, "f1": f1,
...@@ -299,14 +275,10 @@ class WordsInContext(PromptSourceTask): ...@@ -299,14 +275,10 @@ class WordsInContext(PromptSourceTask):
return self.dataset["validation"] return self.dataset["validation"]
def higher_is_better(self): def higher_is_better(self):
return { return {"acc": True}
"acc": True
}
def aggregation(self): def aggregation(self):
return { return {"acc": mean}
"acc": mean
}
class SGWinogradSchemaChallenge(PromptSourceTask): class SGWinogradSchemaChallenge(PromptSourceTask):
...@@ -330,9 +302,7 @@ class SGWinogradSchemaChallenge(PromptSourceTask): ...@@ -330,9 +302,7 @@ class SGWinogradSchemaChallenge(PromptSourceTask):
if self._training_docs is None: if self._training_docs is None:
# GPT-3 Paper's format only uses positive examples for fewshot "training" # GPT-3 Paper's format only uses positive examples for fewshot "training"
self._training_docs = [ self._training_docs = [
doc for doc in doc for doc in self.dataset["train"] if doc["label"]
self.dataset["train"]
if doc["label"]
] ]
return self._training_docs return self._training_docs
...@@ -340,11 +310,7 @@ class SGWinogradSchemaChallenge(PromptSourceTask): ...@@ -340,11 +310,7 @@ class SGWinogradSchemaChallenge(PromptSourceTask):
return self.dataset["validation"] return self.dataset["validation"]
def higher_is_better(self): def higher_is_better(self):
return { return {"acc": True}
"acc": True
}
def aggregation(self): def aggregation(self):
return { return {"acc": mean}
"acc": mean
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment