Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
69e95b87
Commit
69e95b87
authored
May 23, 2025
by
Baber
Browse files
fix filters and metrics
parent
72f5a5df
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
13 additions
and
4 deletions
+13
-4
lm_eval/api/filter.py
lm_eval/api/filter.py
+9
-1
lm_eval/api/task.py
lm_eval/api/task.py
+4
-3
No files found.
lm_eval/api/filter.py
View file @
69e95b87
...
@@ -3,6 +3,7 @@ from dataclasses import dataclass
...
@@ -3,6 +3,7 @@ from dataclasses import dataclass
from
typing
import
Callable
,
Iterable
,
List
,
Union
from
typing
import
Callable
,
Iterable
,
List
,
Union
from
lm_eval.api.instance
import
Instance
from
lm_eval.api.instance
import
Instance
from
lm_eval.api.schemas
import
GenerateOutput
class
Filter
(
ABC
):
class
Filter
(
ABC
):
...
@@ -45,7 +46,14 @@ class FilterEnsemble:
...
@@ -45,7 +46,14 @@ class FilterEnsemble:
def
apply
(
self
,
instances
:
List
[
Instance
])
->
None
:
def
apply
(
self
,
instances
:
List
[
Instance
])
->
None
:
resps
,
docs
=
zip
(
*
((
inst
.
resps
,
inst
.
doc
)
for
inst
in
instances
))
resps
,
docs
=
zip
(
*
((
inst
.
resps
,
inst
.
doc
)
for
inst
in
instances
))
# TODO: add backward
# TODO: add backward
resps
,
docs
=
list
([
r
.
text
]
for
y
in
resps
for
r
in
y
),
list
(
docs
)
# unwrap responses from GenerateOutput as the filters expect strings
resps
=
tuple
(
[
item
.
text
if
isinstance
(
item
,
GenerateOutput
)
else
str
(
item
)
for
item
in
sublist
]
for
sublist
in
resps
)
for
f
in
self
.
filters
:
for
f
in
self
.
filters
:
# apply filters in sequence
# apply filters in sequence
...
...
lm_eval/api/task.py
View file @
69e95b87
...
@@ -1769,7 +1769,7 @@ class ConfigurableTask(Task):
...
@@ -1769,7 +1769,7 @@ class ConfigurableTask(Task):
def
calculate_metrics
(
def
calculate_metrics
(
self
,
instances_by_doc_id
,
filter_key
,
samples
,
rank
,
limit
,
world_size
self
,
instances_by_doc_id
,
filter_key
,
samples
,
rank
,
limit
,
world_size
):
)
->
list
[
list
[
dict
]]
:
"""Calculate metrics for all datapoints in the task.
"""Calculate metrics for all datapoints in the task.
Args:
Args:
...
@@ -1797,13 +1797,14 @@ class ConfigurableTask(Task):
...
@@ -1797,13 +1797,14 @@ class ConfigurableTask(Task):
# doc_id_true = indices[doc_id] if indices else doc_id
# doc_id_true = indices[doc_id] if indices else doc_id
requests
=
instances_by_doc_id
[
doc_id
]
requests
=
instances_by_doc_id
[
doc_id
]
metrics
=
[
metrics
:
list
[
list
[
dict
]]
=
[
self
.
process_results
(
doc
,
response
)
self
.
process_results
(
doc
,
response
)
for
req
in
requests
for
req
in
requests
for
response
in
req
.
filtered_resps
[
filter_key
]
for
response
in
req
.
filtered_resps
[
filter_key
]
]
]
all_metrics
.
extend
(
metrics
)
# TODO: This turns metrics into a list of lists of dicts rather than flat list.
all_metrics
.
append
(
metrics
)
return
all_metrics
return
all_metrics
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment