metric.py 1.6 KB
Newer Older
Baber's avatar
Baber committed
1
2
3
from __future__ import annotations

from collections.abc import Callable
Baber's avatar
Baber committed
4
5
from dataclasses import dataclass
from functools import cached_property
Baber's avatar
Baber committed
6
from typing import Any
Baber's avatar
Baber committed
7
8
9
10
11
12
13


@dataclass
class MetricConfig:
    """Encapsulates information about a single metric."""

    name: str
Baber's avatar
Baber committed
14
15
16
    fn: Callable | None = None
    kwargs: dict | None = None
    aggregation_fn: Callable | None = None
Baber's avatar
Baber committed
17
18
19
20
21
22
23
24
25
26
    higher_is_better: bool = True
    hf_evaluate: bool = False
    is_elementwise: bool = True

    @cached_property
    def metric_name(self) -> str:
        return self.name

    @cached_property
    def aggregation(self) -> Callable:
Baber's avatar
nit  
Baber committed
27
28
        from lm_eval.api.registry import get_aggregation

Baber's avatar
Baber committed
29
30
31
32
33
34
        if self.aggregation_fn is None:
            return get_aggregation(self.name)
        return self.aggregation_fn

    @cached_property
    def _higher_is_better(self) -> bool:
Baber's avatar
nit  
Baber committed
35
36
        from lm_eval.api.registry import is_higher_better

Baber's avatar
Baber committed
37
38
39
40
41
42
43
44
45
46
        if self.higher_is_better is None:
            return is_higher_better(self.name)
        return self.higher_is_better

    def compute_metric(self, *args, **kwargs) -> Any:
        """Calculates the metric using the provided function and arguments."""
        if self.fn is None:
            raise ValueError(f"Metric function for {self.name} is not defined.")
        return self.fn(*args, **{**self.kwargs, **kwargs})

Baber's avatar
Baber committed
47
    def compute_aggregation(self, values: list[Any]) -> Any:
Baber's avatar
Baber committed
48
49
50
51
        """Computes the aggregation of the metric values."""
        if self.aggregation_fn is None:
            raise ValueError(f"Aggregation function for {self.name} is not defined.")
        return self.aggregation_fn(values)