template.py 5.29 KB
Newer Older
Baber's avatar
Baber committed
1
2
from __future__ import annotations

3
from abc import ABC, abstractmethod
Baber's avatar
Baber committed
4
from dataclasses import dataclass, field
Baber's avatar
Baber committed
5
from typing import TYPE_CHECKING, Callable
Baber's avatar
Baber committed
6

7
8
from lm_eval.config.utils import create_mc_choices

Baber's avatar
Baber committed
9
10
11
12
13
14

if TYPE_CHECKING:
    from lm_eval.config.metric import MetricConfig


@dataclass
15
class TemplateConfig(ABC):
Baber's avatar
Baber committed
16
17
    """Encapsulates information about a template."""

18
    #
Baber's avatar
Baber committed
19
    template: str
20
    task: str
Baber's avatar
Baber committed
21
    doc_to_text: str | Callable[[dict], str] | list[str]
Baber's avatar
Baber committed
22
23
    doc_to_choice: str | list | Callable[[dict], list]
    doc_to_target: int | Callable[[dict], int]
Baber's avatar
Baber committed
24
25
26
27
28
29
    description: str
    context_prefix: str
    prefix_delimiter: str
    context_delimiter: str
    answer_suffix: str
    target_delimiter: str
Baber's avatar
Baber committed
30
31
    choice_format: str | None
    choice_delimiter: str | None
Baber's avatar
Baber committed
32
    fewshot_delimiter: str
Baber's avatar
Baber committed
33
    metric_list: list[str] | list[MetricConfig] | None = field(
Baber's avatar
Baber committed
34
35
36
        default_factory=lambda: ["acc", "acc_norm"]
    )

37
38
39
40
41
42
43
44
45
46
47
48
49
    @abstractmethod
    def _doc_to_text(self, doc: dict) -> str:
        """Convert a document to text."""
        raise NotImplementedError

    def _doc_to_choice(self, doc: dict) -> str:
        """Convert a document to choices."""
        raise NotImplementedError

    def _doc_to_target(self, doc: dict) -> int | str:
        """Convert a document to target."""
        raise NotImplementedError

Baber's avatar
Baber committed
50
51

@dataclass
Baber's avatar
Baber committed
52
class MCQTemplateConfig:
Baber's avatar
Baber committed
53
54
55
56
57
58
59
    """Encapsulates information about a template.
    Would return a sample with the following format:
    Question: <doc_to_text(doc)>
    A. <doc_to_choice(doc)[0]>
    B. <doc_to_choice(doc)[1]>
    C. <doc_to_choice(doc)[2]>
    D. <doc_to_choice(doc)[3]>
Baber's avatar
Baber committed
60
    Answer: 'doc_to_choice(doc)` for each choice.
Baber's avatar
Baber committed
61
62
    """

Baber's avatar
Baber committed
63
    doc_to_text: str | Callable[[dict], str]
Baber's avatar
Baber committed
64
    doc_to_choice: list[str]
Baber's avatar
Baber committed
65
    doc_to_target: int | Callable[[dict], int]
Baber's avatar
Baber committed
66
67
68
69
70
71
    template = "mcq"
    context_prefix: str = "Question:"
    prefix_delimiter: str = " "
    context_delimiter: str = "\n"
    answer_suffix: str = "Answer:"
    target_delimiter: str = "\n"
Baber's avatar
Baber committed
72
    choice_format: str | None = "letters"
Baber's avatar
Baber committed
73
    choice_delimiter: str = "\n"
Baber's avatar
Baber committed
74
    fewshot_delimiter: str = "\n\n"
Baber's avatar
Baber committed
75
    metric_list: list[MetricConfig] | None = field(default_factory=lambda: ["acc"])
Baber's avatar
Baber committed
76

77
78
    def _doc_to_text(self, doc: dict) -> str:
        """Convert a document to text."""
Baber's avatar
Baber committed
79
        doc_to_text: str = (
80
81
82
83
            self.doc_to_text
            if isinstance(self.doc_to_text, str)
            else self.doc_to_text(doc)
        )
Baber's avatar
Baber committed
84
85
86
87
88
89
90
91
92
93
        return (
            self.context_prefix
            + self.prefix_delimiter
            + doc_to_text
            + self.context_delimiter
            + create_mc_choices(
                self.doc_to_choice, choice_delimiter=self.choice_delimiter
            )
            + self.answer_suffix
        )
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112

    def _doc_to_choice(self, doc: dict) -> str:
        if callable(self.doc_to_choice):
            doc_to_choice = self.doc_to_choice(doc)
        elif isinstance(self.doc_to_choice, str):
            doc_to_choice = doc[self.doc_to_choice]
        else:
            doc_to_choice = self.doc_to_choice
        return create_mc_choices(doc_to_choice, choice_delimiter=self.choice_delimiter)

    def _doc_to_target(self, doc: dict) -> int:
        """Convert a document to target."""
        if callable(self.doc_to_target):
            return self.doc_to_target(doc)
        elif isinstance(self.doc_to_target, str):
            return doc[self.doc_to_target]
        else:
            return self.doc_to_target

Baber's avatar
Baber committed
113
114

@dataclass
115
class ClozeTemplateConfig(TemplateConfig):
Baber's avatar
Baber committed
116
117
118
119
120
121
    """Encapsulates information about a template.
    Would return a sample with the following format:
    Question:  <doc_to_text(doc)>
    Answer:` <doc_to_target(doc)>`
    """

Baber's avatar
Baber committed
122
    doc_to_text: str | Callable[[dict], str]
Baber's avatar
Baber committed
123
    doc_to_choice: list[str]
Baber's avatar
Baber committed
124
    doc_to_target: int | Callable[[dict], int]
Baber's avatar
Baber committed
125
126
127
128
129
130
131
    template: str = "cloze"
    description: str = ""
    context_prefix: str = "Question:"
    prefix_delimiter: str = " "
    context_delimiter: str = "\n"
    answer_suffix: str = "Answer:"
    target_delimiter: str = " "
Baber's avatar
Baber committed
132
    choice_format: str | None = None
Baber's avatar
Baber committed
133
    choice_delimiter: str = ""
Baber's avatar
Baber committed
134
    fewshot_delimiter: str = "\n\n"
Baber's avatar
Baber committed
135
    metric_list: list[MetricConfig] | None = field(
Baber's avatar
Baber committed
136
137
        default_factory=lambda: ["acc", "acc_norm"]
    )
Baber's avatar
Baber committed
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170

    def _doc_to_text(self, doc: dict) -> str:
        """Convert a document to text."""
        doc_to_text: str = (
            self.doc_to_text
            if isinstance(self.doc_to_text, str)
            else self.doc_to_text(doc)
        )
        return (
            self.context_prefix
            + self.prefix_delimiter
            + doc_to_text
            + self.context_delimiter
            + self.answer_suffix
        )

    def _doc_to_choice(self, doc: dict) -> str:
        if callable(self.doc_to_choice):
            doc_to_choice = self.doc_to_choice(doc)
        elif isinstance(self.doc_to_choice, str):
            doc_to_choice = doc[self.doc_to_choice]
        else:
            doc_to_choice = self.doc_to_choice
        return create_mc_choices(doc_to_choice, choice_delimiter=self.choice_delimiter)

    def _doc_to_target(self, doc: dict) -> int:
        """Convert a document to target."""
        if callable(self.doc_to_target):
            return self.doc_to_target(doc)
        elif isinstance(self.doc_to_target, str):
            return doc[self.doc_to_target]
        else:
            return self.doc_to_target