metric.py 1.67 KB
Newer Older
Baber's avatar
Baber committed
1
2
from __future__ import annotations

Baber's avatar
Baber committed
3
from collections.abc import Callable, Mapping
Baber's avatar
Baber committed
4
from dataclasses import dataclass, field
Baber's avatar
Baber committed
5
from functools import cached_property
Baber's avatar
Baber committed
6
from typing import Any
Baber's avatar
Baber committed
7
8
9
10
11
12
13


@dataclass
class MetricConfig:
    """Encapsulates information about a single metric."""

    name: str
Baber's avatar
Baber committed
14
15
    fn: Callable
    kwargs: Mapping[str, Any] = field(default_factory=dict)
Baber's avatar
Baber committed
16
    aggregation_fn: Callable | None = None
Baber's avatar
Baber committed
17
18
19
20
21
22
23
24
25
    higher_is_better: bool = True
    hf_evaluate: bool = False
    is_elementwise: bool = True

    @cached_property
    def metric_name(self) -> str:
        return self.name

    @cached_property
Baber's avatar
Baber committed
26
    def aggregation(self) -> Callable[..., Any] | None:
Baber's avatar
nit  
Baber committed
27
28
        from lm_eval.api.registry import get_aggregation

Baber's avatar
Baber committed
29
30
31
32
33
        if self.aggregation_fn is None:
            return get_aggregation(self.name)
        return self.aggregation_fn

    @cached_property
Baber's avatar
Baber committed
34
    def _higher_is_better(self) -> bool | None:
Baber's avatar
nit  
Baber committed
35
36
        from lm_eval.api.registry import is_higher_better

Baber's avatar
Baber committed
37
38
39
40
41
42
43
44
        if self.higher_is_better is None:
            return is_higher_better(self.name)
        return self.higher_is_better

    def compute_metric(self, *args, **kwargs) -> Any:
        """Calculates the metric using the provided function and arguments."""
        if self.fn is None:
            raise ValueError(f"Metric function for {self.name} is not defined.")
Baber's avatar
Baber committed
45
        return self.fn(*args, **{**(self.kwargs or {}), **kwargs})
Baber's avatar
Baber committed
46

Baber's avatar
Baber committed
47
    def compute_aggregation(self, *args, **kwargs) -> Any:
Baber's avatar
Baber committed
48
49
50
        """Computes the aggregation of the metric values."""
        if self.aggregation_fn is None:
            raise ValueError(f"Aggregation function for {self.name} is not defined.")
Baber's avatar
Baber committed
51
        return self.aggregation_fn(*args, **kwargs)