Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
e72ec96c
"macapp/package.json" did not exist on "df5fdd6647e17a546e4bc66d8730541408cdf8a5"
Commit
e72ec96c
authored
Jul 25, 2025
by
Baber
Browse files
fix
parent
d762e2aa
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
20 additions
and
8 deletions
+20
-8
.pre-commit-config.yaml
.pre-commit-config.yaml
+2
-2
lm_eval/api/task.py
lm_eval/api/task.py
+16
-4
lm_eval/config/task.py
lm_eval/config/task.py
+2
-2
No files found.
.pre-commit-config.yaml
View file @
e72ec96c
...
@@ -29,11 +29,11 @@ repos:
...
@@ -29,11 +29,11 @@ repos:
-
id
:
mixed-line-ending
-
id
:
mixed-line-ending
args
:
[
--fix=lf
]
args
:
[
--fix=lf
]
-
repo
:
https://github.com/astral-sh/ruff-pre-commit
-
repo
:
https://github.com/astral-sh/ruff-pre-commit
rev
:
v0.12.
2
rev
:
v0.12.
5
hooks
:
hooks
:
# Run the linter.
# Run the linter.
-
id
:
ruff-check
-
id
:
ruff-check
args
:
[
--fix
]
args
:
[
--fix
]
-
id
:
ruff-format
-
id
:
ruff-format
-
repo
:
https://github.com/codespell-project/codespell
-
repo
:
https://github.com/codespell-project/codespell
rev
:
v2.4.1
rev
:
v2.4.1
...
...
lm_eval/api/task.py
View file @
e72ec96c
...
@@ -8,7 +8,6 @@ import re
...
@@ -8,7 +8,6 @@ import re
from
collections.abc
import
Callable
,
Iterable
,
Iterator
,
Mapping
from
collections.abc
import
Callable
,
Iterable
,
Iterator
,
Mapping
from
copy
import
deepcopy
from
copy
import
deepcopy
from
functools
import
cached_property
from
functools
import
cached_property
from
types
import
MethodType
from
typing
import
TYPE_CHECKING
,
Any
,
Literal
,
overload
from
typing
import
TYPE_CHECKING
,
Any
,
Literal
,
overload
import
datasets
import
datasets
...
@@ -656,9 +655,16 @@ class ConfigurableTask(Task):
...
@@ -656,9 +655,16 @@ class ConfigurableTask(Task):
)
)
self
.
task_docs
=
self
.
eval_docs
self
.
task_docs
=
self
.
eval_docs
for
_method
,
fn
in
self
.
config
.
_fn
.
items
():
# for name, fn in self.config._fn.items():
if
hasattr
(
self
,
_method
):
# if hasattr(self, name):
setattr
(
self
,
_method
,
MethodType
(
fn
,
self
))
# setattr(
# self,
# name,
# types.MethodType(
# lambda self, *args, _fn=fn, **kwargs: _fn(*args, **kwargs),
# self,
# ),
# )
self
.
runtime_checks
(
self
.
task_docs
[
0
])
self
.
runtime_checks
(
self
.
task_docs
[
0
])
...
@@ -974,6 +980,8 @@ class ConfigurableTask(Task):
...
@@ -974,6 +980,8 @@ class ConfigurableTask(Task):
# if self.prompt is not None:
# if self.prompt is not None:
# doc_to_text = self.prompt
# doc_to_text = self.prompt
doc_to_text
=
doc_to_text
or
self
.
config
.
doc_to_text
doc_to_text
=
doc_to_text
or
self
.
config
.
doc_to_text
if
callable
(
doc_to_text
):
return
doc_to_text
(
doc
)
if
doc_to_text
in
doc
:
if
doc_to_text
in
doc
:
return
doc
[
doc_to_text
]
return
doc
[
doc_to_text
]
elif
isinstance
(
doc_to_text
,
str
):
elif
isinstance
(
doc_to_text
,
str
):
...
@@ -1019,6 +1027,8 @@ class ConfigurableTask(Task):
...
@@ -1019,6 +1027,8 @@ class ConfigurableTask(Task):
# if self.prompt is not None:
# if self.prompt is not None:
# doc_to_target = self.prompt
# doc_to_target = self.prompt
doc_to_target
=
doc_to_target
or
self
.
config
.
doc_to_target
doc_to_target
=
doc_to_target
or
self
.
config
.
doc_to_target
if
callable
(
doc_to_target
):
doc_to_target
(
doc
)
if
doc_to_target
in
doc
:
if
doc_to_target
in
doc
:
return
doc
[
doc_to_target
]
return
doc
[
doc_to_target
]
elif
isinstance
(
doc_to_target
,
str
):
elif
isinstance
(
doc_to_target
,
str
):
...
@@ -1280,6 +1290,8 @@ class ConfigurableTask(Task):
...
@@ -1280,6 +1290,8 @@ class ConfigurableTask(Task):
)
)
def
process_results
(
self
,
doc
:
dict
,
results
:
list
)
->
dict
[
str
,
Any
]:
def
process_results
(
self
,
doc
:
dict
,
results
:
list
)
->
dict
[
str
,
Any
]:
if
callable
(
self
.
config
.
process_results
):
return
self
.
config
.
process_results
(
doc
,
results
)
result_dict
=
{}
result_dict
=
{}
use_metric
=
list
(
m
.
metric_name
for
m
in
self
.
config
.
_metric_list
)
use_metric
=
list
(
m
.
metric_name
for
m
in
self
.
config
.
_metric_list
)
if
self
.
OUTPUT_TYPE
==
"loglikelihood"
:
if
self
.
OUTPUT_TYPE
==
"loglikelihood"
:
...
...
lm_eval/config/task.py
View file @
e72ec96c
...
@@ -10,7 +10,7 @@ import datasets
...
@@ -10,7 +10,7 @@ import datasets
from
lm_eval.api.filter
import
FilterEnsemble
from
lm_eval.api.filter
import
FilterEnsemble
from
lm_eval.api.instance
import
OutputType
from
lm_eval.api.instance
import
OutputType
from
lm_eval.config.metric
import
MetricConfig
from
lm_eval.config.metric
import
MetricConfig
from
lm_eval.config.utils
import
doc_to_closure
,
maybe_serialize
from
lm_eval.config.utils
import
maybe_serialize
if
TYPE_CHECKING
:
if
TYPE_CHECKING
:
...
@@ -364,7 +364,7 @@ class TaskConfig:
...
@@ -364,7 +364,7 @@ class TaskConfig:
@
classmethod
@
classmethod
def
from_yaml
(
cls
,
data
:
dict
[
str
,
Any
])
->
TaskConfig
:
def
from_yaml
(
cls
,
data
:
dict
[
str
,
Any
])
->
TaskConfig
:
"""Create a TaskConfig instance from a YAML-like dictionary."""
"""Create a TaskConfig instance from a YAML-like dictionary."""
fn
=
{
k
:
doc_to_closure
(
v
)
for
k
,
v
in
data
.
items
()
if
callable
(
v
)}
fn
=
{
k
:
v
for
k
,
v
in
data
.
items
()
if
callable
(
v
)}
return
cls
(
**
data
,
_fn
=
fn
)
return
cls
(
**
data
,
_fn
=
fn
)
@
classmethod
@
classmethod
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment