Commit abd17276 authored by Baber's avatar Baber
Browse files

Merge branch 'smolrefact' into tasklist

# Conflicts:
#	lm_eval/__main__.py
#	lm_eval/api/group.py
#	lm_eval/api/task.py
#	lm_eval/evaluator_utils.py
#	lm_eval/tasks/__init__.py
#	lm_eval/utils.py
#	pyproject.toml
parents 00afd536 70314843
from .evaluate_config import EvaluatorConfig
__all__ = [
"EvaluatorConfig",
]
This diff is collapsed.
from __future__ import annotations
from collections.abc import Callable, Mapping
from dataclasses import dataclass, field
from functools import cached_property
from typing import Any
@dataclass
class MetricConfig:
"""Encapsulates information about a single metric."""
name: str
fn: Callable
kwargs: Mapping[str, Any] = field(default_factory=dict)
aggregation_fn: Callable | None = None
higher_is_better: bool = True
hf_evaluate: bool = False
is_elementwise: bool = True
@cached_property
def metric_name(self) -> str:
return self.name
@cached_property
def aggregation(self) -> Callable[..., Any] | None:
from lm_eval.api.registry import get_aggregation
if self.aggregation_fn is None:
return get_aggregation(self.name)
return self.aggregation_fn
@cached_property
def _higher_is_better(self) -> bool | None:
from lm_eval.api.registry import is_higher_better
if self.higher_is_better is None:
return is_higher_better(self.name)
return self.higher_is_better
def compute(self, *args, **kwargs) -> Any:
"""Calculates the metric using the provided function and arguments."""
if self.fn is None:
raise ValueError(f"Metric function for {self.name} is not defined.")
return self.fn(*args, **{**(self.kwargs or {}), **kwargs})
def compute_aggregation(self, *args, **kwargs) -> Any:
"""Computes the aggregation of the metric values."""
if self.aggregation_fn is None:
raise ValueError(f"Aggregation function for {self.name} is not defined.")
return self.aggregation_fn(*args, **kwargs)
This diff is collapsed.
from __future__ import annotations
from abc import ABC, abstractmethod
from dataclasses import dataclass, field
from typing import TYPE_CHECKING, Callable
from lm_eval.config.utils import create_mc_choices
if TYPE_CHECKING:
from lm_eval.config.metric import MetricConfig
@dataclass
class TemplateConfig(ABC):
"""Encapsulates information about a template."""
#
template: str
task: str
doc_to_text: str | Callable[[dict], str] | list[str]
doc_to_choice: str | list | Callable[[dict], list]
doc_to_target: int | Callable[[dict], int]
description: str
context_prefix: str
prefix_delimiter: str
context_delimiter: str
answer_suffix: str
target_delimiter: str
choice_format: str | None
choice_delimiter: str | None
fewshot_delimiter: str
metric_list: list[str] | list[MetricConfig] | None = field(
default_factory=lambda: ["acc", "acc_norm"]
)
@abstractmethod
def _doc_to_text(self, doc: dict) -> str:
"""Convert a document to text."""
raise NotImplementedError
def _doc_to_choice(self, doc: dict) -> str:
"""Convert a document to choices."""
raise NotImplementedError
def _doc_to_target(self, doc: dict) -> int | str:
"""Convert a document to target."""
raise NotImplementedError
@dataclass
class MCQTemplateConfig:
"""Encapsulates information about a template.
Would return a sample with the following format:
Question: <doc_to_text(doc)>
A. <doc_to_choice(doc)[0]>
B. <doc_to_choice(doc)[1]>
C. <doc_to_choice(doc)[2]>
D. <doc_to_choice(doc)[3]>
Answer: 'doc_to_choice(doc)` for each choice.
"""
doc_to_text: str | Callable[[dict], str]
doc_to_choice: list[str]
doc_to_target: int | Callable[[dict], int]
template = "mcq"
context_prefix: str = "Question:"
prefix_delimiter: str = " "
context_delimiter: str = "\n"
answer_suffix: str = "Answer:"
target_delimiter: str = "\n"
choice_format: str | None = "letters"
choice_delimiter: str = "\n"
fewshot_delimiter: str = "\n\n"
metric_list: list[MetricConfig] | None = field(default_factory=lambda: ["acc"])
def _doc_to_text(self, doc: dict) -> str:
"""Convert a document to text."""
doc_to_text: str = (
self.doc_to_text
if isinstance(self.doc_to_text, str)
else self.doc_to_text(doc)
)
return (
self.context_prefix
+ self.prefix_delimiter
+ doc_to_text
+ self.context_delimiter
+ create_mc_choices(
self.doc_to_choice, choice_delimiter=self.choice_delimiter
)
+ self.answer_suffix
)
def _doc_to_choice(self, doc: dict) -> str:
if callable(self.doc_to_choice):
doc_to_choice = self.doc_to_choice(doc)
elif isinstance(self.doc_to_choice, str):
doc_to_choice = doc[self.doc_to_choice]
else:
doc_to_choice = self.doc_to_choice
return create_mc_choices(doc_to_choice, choice_delimiter=self.choice_delimiter)
def _doc_to_target(self, doc: dict) -> int:
"""Convert a document to target."""
if callable(self.doc_to_target):
return self.doc_to_target(doc)
elif isinstance(self.doc_to_target, str):
return doc[self.doc_to_target]
else:
return self.doc_to_target
@dataclass
class ClozeTemplateConfig(TemplateConfig):
"""Encapsulates information about a template.
Would return a sample with the following format:
Question: <doc_to_text(doc)>
Answer:` <doc_to_target(doc)>`
"""
doc_to_text: str | Callable[[dict], str]
doc_to_choice: list[str]
doc_to_target: int | Callable[[dict], int]
template: str = "cloze"
description: str = ""
context_prefix: str = "Question:"
prefix_delimiter: str = " "
context_delimiter: str = "\n"
answer_suffix: str = "Answer:"
target_delimiter: str = " "
choice_format: str | None = None
choice_delimiter: str = ""
fewshot_delimiter: str = "\n\n"
metric_list: list[MetricConfig] | None = field(
default_factory=lambda: ["acc", "acc_norm"]
)
def _doc_to_text(self, doc: dict) -> str:
"""Convert a document to text."""
doc_to_text: str = (
self.doc_to_text
if isinstance(self.doc_to_text, str)
else self.doc_to_text(doc)
)
return (
self.context_prefix
+ self.prefix_delimiter
+ doc_to_text
+ self.context_delimiter
+ self.answer_suffix
)
def _doc_to_choice(self, doc: dict) -> str:
if callable(self.doc_to_choice):
doc_to_choice = self.doc_to_choice(doc)
elif isinstance(self.doc_to_choice, str):
doc_to_choice = doc[self.doc_to_choice]
else:
doc_to_choice = self.doc_to_choice
return create_mc_choices(doc_to_choice, choice_delimiter=self.choice_delimiter)
def _doc_to_target(self, doc: dict) -> int:
"""Convert a document to target."""
if callable(self.doc_to_target):
return self.doc_to_target(doc)
elif isinstance(self.doc_to_target, str):
return doc[self.doc_to_target]
else:
return self.doc_to_target
This diff is collapsed.
This diff is collapsed.
...@@ -5,8 +5,9 @@ import traceback ...@@ -5,8 +5,9 @@ import traceback
from typing import Iterator, List, Sequence, Tuple, TypeVar from typing import Iterator, List, Sequence, Tuple, TypeVar
# This is a cpp module. Compile janitor_util.cpp with: # This is a cpp module.
# c++ -O3 -Wall -shared -std=c++11 -fPIC $(python3 -m pybind11 --includes) janitor_util.cpp -o janitor_util$(python3-config --extension-suffix) -undefined dynamic_lookup # See scripts/clean_training_data/README.md for instructions to compile janitor_util.cpp
try: try:
import janitor_util import janitor_util
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
...@@ -27,7 +27,6 @@ class TakeFirstFilter(Filter): ...@@ -27,7 +27,6 @@ class TakeFirstFilter(Filter):
class TakeKFilter(Filter): class TakeKFilter(Filter):
def __init__(self, **kwargs) -> None: def __init__(self, **kwargs) -> None:
self.k = kwargs.pop("k") self.k = kwargs.pop("k")
super().__init__(**kwargs) super().__init__(**kwargs)
def apply(self, resps, docs): def apply(self, resps, docs):
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment