Commit 5165bd38 authored by thefazzer's avatar thefazzer
Browse files

Added F1_score metric

parent 68a8790c
import abc import abc
import random import random
import collections import collections
from sklearn.metrics import precision_recall_fscore_support as score
class LM(abc.ABC): class LM(abc.ABC):
@abc.abstractmethod @abc.abstractmethod
...@@ -180,6 +180,13 @@ class Dataset(abc.ABC): ...@@ -180,6 +180,13 @@ class Dataset(abc.ABC):
def mean(arr): def mean(arr):
return sum(arr) / len(arr) return sum(arr) / len(arr)
def f1_score(items):
unzipped_list = list(zip(*items))
golds = unzipped_list[0]
preds = unzipped_list[1]
precision, recall, fscore, support = score(golds, preds)
return max(fscore)
def median(arr): def median(arr):
return arr[len(arr) // 2] return arr[len(arr) // 2]
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
import numpy as np import numpy as np
from tqdm import auto as tqdm_lib from tqdm import auto as tqdm_lib
from . common import HFTask, simple_accuracy_metric, yesno, trueneitherfalse from . common import HFTask, simple_accuracy_metric, yesno, trueneitherfalse
from lm_eval.base import rf, mean from lm_eval.base import rf, mean, f1_score
class BoolQ(HFTask): class BoolQ(HFTask):
DATASET_PATH = "super_glue" DATASET_PATH = "super_glue"
...@@ -96,10 +96,12 @@ class CommitmentBank(HFTask): ...@@ -96,10 +96,12 @@ class CommitmentBank(HFTask):
def process_results(self, doc, results): def process_results(self, doc, results):
gold = doc["label"] gold = doc["label"]
acc = 1. if (np.argmax(results)) == gold else 0. pred = np.argmax(results)
acc = 1. if pred == gold else 0.
return { return {
"acc": acc "acc": acc,
"f1": (pred, gold)
} }
def higher_is_better(self): def higher_is_better(self):
...@@ -109,7 +111,8 @@ class CommitmentBank(HFTask): ...@@ -109,7 +111,8 @@ class CommitmentBank(HFTask):
def aggregation(self): def aggregation(self):
return { return {
"acc": mean "acc": mean,
"f1": f1_score
} }
class Copa(HFTask): class Copa(HFTask):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment