Commit 4604f458 authored by kabbi159's avatar kabbi159
Browse files

Remove f1 metric

parent f60f1753
...@@ -21,7 +21,7 @@ Homepage: https://huggingface.co/datasets/skt/kobest_v1 ...@@ -21,7 +21,7 @@ Homepage: https://huggingface.co/datasets/skt/kobest_v1
import numpy as np import numpy as np
from lm_eval.base import MultipleChoiceTask, rf, Task from lm_eval.base import MultipleChoiceTask, rf, Task
from lm_eval.metrics import f1_score, macro_f1_score, mean from lm_eval.metrics import macro_f1_score, mean
class BoolQ(Task): class BoolQ(Task):
...@@ -65,21 +65,18 @@ class BoolQ(Task): ...@@ -65,21 +65,18 @@ class BoolQ(Task):
pred = np.argmax(results) pred = np.argmax(results)
gold = doc["label"] gold = doc["label"]
return { return {
"f1": (gold, pred),
"acc": pred == gold, "acc": pred == gold,
"macro_f1": (gold, pred) "macro_f1": (gold, pred)
} }
def higher_is_better(self): def higher_is_better(self):
return { return {
"f1": True,
"acc": True, "acc": True,
"macro_f1": True "macro_f1": True
} }
def aggregation(self): def aggregation(self):
return { return {
"f1": f1_score,
"acc": mean, "acc": mean,
"macro_f1": macro_f1_score "macro_f1": macro_f1_score
} }
...@@ -137,14 +134,12 @@ class COPA(Task): ...@@ -137,14 +134,12 @@ class COPA(Task):
pred = np.argmax(results) pred = np.argmax(results)
gold = doc["label"] gold = doc["label"]
return { return {
"f1": (gold, pred),
"acc": pred == gold, "acc": pred == gold,
"macro_f1": (gold, pred) "macro_f1": (gold, pred)
} }
def higher_is_better(self): def higher_is_better(self):
return { return {
"f1": True,
"acc": True, "acc": True,
"macro_f1": True "macro_f1": True
} }
...@@ -152,7 +147,6 @@ class COPA(Task): ...@@ -152,7 +147,6 @@ class COPA(Task):
def aggregation(self): def aggregation(self):
return { return {
"f1": f1_score,
"acc": mean, "acc": mean,
"macro_f1": macro_f1_score "macro_f1": macro_f1_score
} }
...@@ -198,21 +192,18 @@ class WiC(Task): ...@@ -198,21 +192,18 @@ class WiC(Task):
pred = np.argmax(results) pred = np.argmax(results)
gold = doc["label"] gold = doc["label"]
return { return {
"f1": (gold, pred),
"acc": pred == gold, "acc": pred == gold,
"macro_f1": (gold, pred) "macro_f1": (gold, pred)
} }
def higher_is_better(self): def higher_is_better(self):
return { return {
"f1": True,
"acc": True, "acc": True,
"macro_f1": True "macro_f1": True
} }
def aggregation(self): def aggregation(self):
return { return {
"f1": f1_score,
"acc": mean, "acc": mean,
"macro_f1": macro_f1_score "macro_f1": macro_f1_score
} }
...@@ -316,21 +307,18 @@ class SentiNeg(Task): ...@@ -316,21 +307,18 @@ class SentiNeg(Task):
pred = np.argmax(results) pred = np.argmax(results)
gold = doc["label"] gold = doc["label"]
return { return {
"f1": (gold, pred),
"acc": pred == gold, "acc": pred == gold,
"macro_f1": (gold, pred) "macro_f1": (gold, pred)
} }
def higher_is_better(self): def higher_is_better(self):
return { return {
"f1": True,
"acc": True, "acc": True,
"macro_f1": True "macro_f1": True
} }
def aggregation(self): def aggregation(self):
return { return {
"f1": f1_score,
"acc": mean, "acc": mean,
"macro_f1": macro_f1_score "macro_f1": macro_f1_score
} }
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment