Commit 99ce4eff authored by lintangsutawika's avatar lintangsutawika
Browse files

process hf evaluate metrics

parent 150f11f6
...@@ -159,21 +159,6 @@ def acc_mutual_info_fn(items): ...@@ -159,21 +159,6 @@ def acc_mutual_info_fn(items):
return mean(items) return mean(items)
class HFEvaluateAdaptor:
def __init__(self, *metric_args, **kwargs):
metric_object = evaluate.load(*metric_args)
self.hf_evaluate_fn = partial(metric_object, **kwargs)
def __call__(self, items):
refs = list(zip(*items))[0]
preds = list(zip(*items))[1]
return self.hf_evaluate_fn(
references=refs,
predictions=preds
)
exact_match = evaluate.load("exact_match") exact_match = evaluate.load("exact_match")
@register_metric( @register_metric(
......
import os import os
import logging
import evaluate import evaluate
from functools import partial
from lm_eval.api.model import LM from lm_eval.api.model import LM
from lm_eval.api.metrics import HFEvaluateAdaptor
import logging
eval_logger = logging.getLogger("lm-eval") eval_logger = logging.getLogger("lm-eval")
MODEL_REGISTRY = {} MODEL_REGISTRY = {}
class HFEvaluateAdaptor:
def __init__(self, name, **kwargs):
self.name = name
metric_object = evaluate.load(name)
self.hf_evaluate_fn = partial(metric_object.compute, **kwargs)
def __call__(self, items):
refs = list(zip(*items))[0]
preds = list(zip(*items))[1]
return self.hf_evaluate_fn(
references=refs,
predictions=preds
)[self.name]
def register_model(*names): def register_model(*names):
# either pass a list or a single alias. # either pass a list or a single alias.
...@@ -126,7 +142,7 @@ def get_metric(name, hf_evaluate_metric=False, **kwargs): ...@@ -126,7 +142,7 @@ def get_metric(name, hf_evaluate_metric=False, **kwargs):
) )
try: try:
from lm_eval.metrics import HFEvaluateAdaptor # from lm_eval.metrics import HFEvaluateAdaptor
return HFEvaluateAdaptor(name, **kwargs) return HFEvaluateAdaptor(name, **kwargs)
except Exception: except Exception:
eval_logger.error( eval_logger.error(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment