add `sample_metric` and `is_elementwise` to MetricConfig

cb8dfe63 · Baber · 108674ed · cb8dfe63
Commit cb8dfe63 authored Jun 30, 2025 by Baber
Hide whitespace changes
Inline Side-by-side

Showing with 12 additions and 26 deletions

lm_eval/api/task.py lm_eval/api/task.py +12 -26

No files found.
--- a/lm_eval/api/task.py
+++ b/lm_eval/api/task.py
@@ -63,6 +63,8 @@ class MetricConfig:
    aggregation_fn: Optional[Callable] = None
    higher_is_better: bool = True
    hf_evaluate: bool = False
+    sample_metric: bool = True
+    is_elementwise: bool = True

    @cached_property
    def metric_names(self) -> str:
@@ -81,6 +83,15 @@ class MetricConfig:
        return self.higher_is_better


+@dataclass
+class RepeatConfig:
+    """Encapsulates information about a single repeat."""
+
+    repeats: int = 1
+    metric_fn: Optional[Callable] = None
+    kwargs: Optional[dict] = None
+
+
 @dataclass
 class FilterConfig:
    """Encapsulates information about a single filter."""
@@ -94,6 +105,7 @@ class FilterConfig:
 class FewshotConfig:
    sampler: str
    samples: list[dict]
+    process_docs: Optional[Callable] = None


 @dataclass
@@ -948,32 +960,6 @@ class ConfigurableTask(Task):

        self._filters = self.config.get_filters()

-        # if self.config.filter_list is not None:
-        #     self._filters = []
-        #     if isinstance(self.config.filter_list, dict):
-        #         for filter_config in self.config.filter_list:
-        #             self._filters.append(
-        #                 build_filter_ensemble(
-        #                     filter_config["name"],
-        #                     [
-        #                         [
-        #                             {
-        #                                 key: function[key]
-        #                                 for key in function
-        #                                 if key != "function"
-        #                             }
-        #                         ]
-        #                         for function in filter_config["filter"]
-        #                     ],
-        #                 )
-        #             )
-        # else:
-        #     # TODO: handle repeats in a more general way rather than just discarding
-        #     eval_logger.debug(
-        #         "No custom filters defined. Using default 'take_first' filter for handling repeats."
-        #     )
-        #     self._filters = [build_filter_ensemble("none", [["take_first", None]])]
-
        if self.config.use_prompt is not None:
            eval_logger.info(f"loading prompt {self.config.use_prompt}")
            self.prompt = get_prompt(