refactor: improve default behavior for metric aggregation and higher-better checks

897fbb37 · Baber · 5c3badbe · 897fbb37 · 897fbb37 · 897fbb37
Commit 897fbb37 authored Jul 21, 2025 by Baber
Hide whitespace changes
Inline Side-by-side

Showing with 11 additions and 7 deletions

lm_eval/api/registry.py lm_eval/api/registry.py +8 -4

lm_eval/config/task.py lm_eval/config/task.py +1 -1

lm_eval/config/template.py lm_eval/config/template.py +2 -2

No files found.
--- a/lm_eval/api/registry.py
+++ b/lm_eval/api/registry.py
@@ -167,20 +167,24 @@ def get_aggregation(name: str) -> Callable[..., Any] | None:
        eval_logger.warning(f"{name} not a registered aggregation metric!")


-def get_metric_aggregation(name: str) -> Callable[[], dict[str, Callable]] | None:
+def get_metric_aggregation(name: str) -> Callable[[], dict[str, Callable]]:
    try:
        return METRIC_AGGREGATION_REGISTRY[name]
    except KeyError:
-        eval_logger.warning(f"{name} metric is not assigned a default aggregation!")
+        eval_logger.warning(
+            f"{name} metric is not assigned a default aggregation!. Using default aggregation mean"
+        )
+        return AGGREGATION_REGISTRY["mean"]


-def is_higher_better(metric_name: str) -> bool | None:
+def is_higher_better(metric_name: str) -> bool:
    try:
        return HIGHER_IS_BETTER_REGISTRY[metric_name]
    except KeyError:
        eval_logger.warning(
-            f"higher_is_better not specified for metric '{metric_name}'!"
+            f"higher_is_better not specified for metric '{metric_name}'!. Will default to True."
        )
+        return True


 def register_filter(name: str):

--- a/lm_eval/config/task.py
+++ b/lm_eval/config/task.py
@@ -240,7 +240,7 @@ class TaskConfig(dict):
                    name=metric_name,
                    fn=get_metric(metric_name),
                    aggregation_fn=get_metric_aggregation(metric_name),
-                    higher_is_better=is_higher_better(metric_name),
+                    higher_is_better=is_higher_better(metric_name) or True,
                )
                for metric_name in _metric_list
            )

--- a/lm_eval/config/template.py
+++ b/lm_eval/config/template.py
@@ -31,7 +31,7 @@ class TemplateConfig:


 @dataclass
-class MCQTemplateConfig:
+class MCQTemplateConfig(TemplateConfig):
    """Encapsulates information about a template.
    Would return a sample with the following format:
    Question: <doc_to_text(doc)>
@@ -58,7 +58,7 @@ class MCQTemplateConfig:


 @dataclass
-class ClozeTemplateConfig:
+class ClozeTemplateConfig(TemplateConfig):
    """Encapsulates information about a template.
    Would return a sample with the following format:
    Question:  <doc_to_text(doc)>