Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
3b4d0af1
"examples/vscode:/vscode.git/clone" did not exist on "2c7d0a5b8b33d9a90ede19a0ee227393982ac340"
Commit
3b4d0af1
authored
Jul 08, 2025
by
Baber
Browse files
refactor: update type hints and improve filter ensemble construction
parent
c81c03ee
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
23 additions
and
16 deletions
+23
-16
lm_eval/api/model.py
lm_eval/api/model.py
+2
-2
lm_eval/api/task.py
lm_eval/api/task.py
+3
-3
lm_eval/config/task.py
lm_eval/config/task.py
+16
-9
lm_eval/filters/__init__.py
lm_eval/filters/__init__.py
+2
-2
No files found.
lm_eval/api/model.py
View file @
3b4d0af1
...
...
@@ -101,7 +101,7 @@ class LM(abc.ABC):
# TODO: Add an optional max length
@
abc
.
abstractmethod
def
generate_until
(
self
,
requests
:
list
[
Instance
])
->
list
[
str
]:
def
generate_until
(
self
,
requests
:
list
[
"
Instance
"
])
->
list
[
str
]:
"""Generate greedily until a stopping sequence
:param requests: list[Instance]
...
...
@@ -432,7 +432,7 @@ class TemplateLM(LM):
@
abc
.
abstractmethod
def
generate_until
(
self
,
requests
,
disable_tqdm
:
bool
=
False
self
,
requests
:
list
[
"Instance"
]
,
disable_tqdm
:
bool
=
False
)
->
list
[
str
]:
"""Generate until a stopping sequence.
...
...
lm_eval/api/task.py
View file @
3b4d0af1
...
...
@@ -102,9 +102,9 @@ class Task(abc.ABC):
self
.
_fewshot_docs
:
Optional
[
list
]
=
None
self
.
_instances
:
Optional
[
List
[
Instance
]]
=
None
self
.
_config
:
TaskConfig
=
TaskConfig
({
**
config
})
if
config
else
TaskConfig
()
self
.
_config
:
TaskConfig
=
TaskConfig
.
from_yaml
({
**
config
})
self
.
_filters
=
[
build_filter_ensemble
(
"none"
,
[
[
"take_first"
,
None
]
])]
self
.
_filters
=
[
build_filter_ensemble
(
"none"
,
[
(
"take_first"
,
None
)
])]
self
.
fewshot_rnd
:
Optional
[
random
.
Random
]
=
(
None
# purposely induce errors in case of improper usage
)
...
...
@@ -655,7 +655,7 @@ class ConfigurableTask(Task):
else
:
self
.
prompt
=
None
if
self
.
config
.
fewshot_cfg
.
num
>
0
and
self
.
fewshot_docs
()
is
not
None
:
if
self
.
config
.
fewshot_cfg
.
num
()
>
0
and
self
.
fewshot_docs
()
is
not
None
:
self
.
fewshot_rnd
=
random
.
Random
()
self
.
sampler
=
self
.
config
.
fewshot_cfg
.
init_sampler
(
list
(
self
.
fewshot_docs
()),
self
,
rnd
=
self
.
fewshot_rnd
...
...
lm_eval/config/task.py
View file @
3b4d0af1
...
...
@@ -2,7 +2,6 @@ import logging
from
dataclasses
import
asdict
,
dataclass
,
field
from
typing
import
TYPE_CHECKING
,
Callable
,
Iterable
,
Optional
,
Union
from
lm_eval.api.filter
import
FilterEnsemble
from
lm_eval.api.instance
import
OutputType
from
lm_eval.config.metric
import
MetricConfig
from
lm_eval.config.utils
import
maybe_serialize
...
...
@@ -10,7 +9,8 @@ from lm_eval.config.utils import maybe_serialize
if
TYPE_CHECKING
:
from
lm_eval.api.samplers
import
ContextSampler
from
lm_eval.api.task
import
Task
,
eval_logger
from
lm_eval.api.task
import
Task
from
lm_eval.filters
import
FilterEnsemble
eval_logger
=
logging
.
getLogger
(
__name__
)
...
...
@@ -35,7 +35,9 @@ class FilterConfig:
@
dataclass
class
FewshotConfig
:
num
:
int
=
0
# hack: this returns task.config.num_fewshot
# to keep in sync as it is runtime-modified
num_fewshot
:
Callable
[[],
int
]
split
:
Optional
[
str
]
=
None
sampler
:
Union
[
str
,
Callable
]
=
"default"
samples
:
Union
[
Callable
[[],
list
[
dict
]],
list
[
dict
],
None
]
=
None
...
...
@@ -162,10 +164,10 @@ class TaskConfig(dict):
fewshot_config
:
Optional
[
dict
]
=
None
# runtime configuration options
num_fewshot
:
Optional
[
int
]
=
0
generation_kwargs
:
Optional
[
dict
]
=
None
# scoring options
metric_list
:
Optional
[
list
]
=
None
output_type
:
OutputType
=
"generate_until"
generation_kwargs
:
Optional
[
dict
]
=
None
repeats
:
int
=
1
filter_list
:
Optional
[
list
[
dict
]]
=
None
should_decontaminate
:
bool
=
False
...
...
@@ -224,6 +226,7 @@ class TaskConfig(dict):
# ---setup fewshot config--- #
_fewshot_cfg
=
self
.
fewshot_config
if
self
.
fewshot_config
is
not
None
else
{}
self
.
fewshot_cfg
=
FewshotConfig
(
num_fewshot
=
lambda
:
self
.
num_fewshot
or
_fewshot_cfg
[
"num_fewshot"
],
split
=
self
.
fewshot_split
,
sampler
=
_fewshot_cfg
.
get
(
"sampler"
,
"default"
),
samples
=
_fewshot_cfg
.
get
(
"samples"
,
None
),
...
...
@@ -331,26 +334,30 @@ class TaskConfig(dict):
eval_logger
.
debug
(
"No custom filters defined; falling back to 'take_first' for handling repeats."
)
return
[
build_filter_ensemble
(
"none"
,
[
[
"take_first"
,
None
]
])]
return
[
build_filter_ensemble
(
"none"
,
[
(
"take_first"
,
None
)
])]
else
:
def
_strip_fn
(
d
:
dict
)
->
dict
:
return
{
k
:
v
for
k
,
v
in
d
.
items
()
if
k
!=
"function"
}
def
_strip_fn
(
d
:
dict
)
->
tuple
[
str
,
dict
]
:
return
d
[
"function"
],
{
k
:
v
for
k
,
v
in
d
.
items
()
if
k
!=
"function"
}
configs
=
(
self
.
filter_list
.
values
()
if
isinstance
(
self
.
filter_list
,
dict
)
else
self
.
filter_list
)
return
[
build_filter_ensemble
(
filter_name
=
cfg
[
"name"
],
components
=
[
[
_strip_fn
(
f
)
for
f
in
cfg
[
"filter"
]]
]
,
components
=
[
_strip_fn
(
f
)
for
f
in
cfg
[
"filter"
]],
)
for
cfg
in
configs
]
@
classmethod
def
from_yaml
(
cls
,
data
:
dict
)
->
"TaskConfig"
:
"""Create a TaskConfig instance from a YAML-like dictionary."""
return
cls
(
**
data
)
def
__getitem__
(
self
,
item
):
return
getattr
(
self
,
item
)
...
...
lm_eval/filters/__init__.py
View file @
3b4d0af1
from
functools
import
partial
from
typing
import
List
,
Union
from
typing
import
Iterable
,
List
,
Optional
,
Union
from
lm_eval.api.filter
import
FilterEnsemble
from
lm_eval.api.registry
import
get_filter
...
...
@@ -8,7 +8,7 @@ from . import custom, extraction, selection, transformation
def
build_filter_ensemble
(
filter_name
:
str
,
components
:
list
[
Union
[
list
[
dict
],
list
[
str
]]]
filter_name
:
str
,
components
:
list
[
tuple
[
str
,
Optional
[
dict
]]]
)
->
FilterEnsemble
:
"""
Create a filtering pipeline.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment