Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
e2bfdf3b
Commit
e2bfdf3b
authored
Jan 23, 2024
by
lintangsutawika
Browse files
add weight_by_size config
parent
45a8f709
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
7 additions
and
7 deletions
+7
-7
lm_eval/api/task.py
lm_eval/api/task.py
+1
-0
lm_eval/evaluator.py
lm_eval/evaluator.py
+6
-7
No files found.
lm_eval/api/task.py
View file @
e2bfdf3b
...
@@ -80,6 +80,7 @@ class TaskConfig(dict):
...
@@ -80,6 +80,7 @@ class TaskConfig(dict):
filter_list
:
Union
[
str
,
list
]
=
None
filter_list
:
Union
[
str
,
list
]
=
None
should_decontaminate
:
bool
=
False
should_decontaminate
:
bool
=
False
doc_to_decontamination_query
:
str
=
None
doc_to_decontamination_query
:
str
=
None
weight_by_size
:
bool
=
False
metadata
:
Union
[
metadata
:
Union
[
str
,
list
str
,
list
...
...
lm_eval/evaluator.py
View file @
e2bfdf3b
...
@@ -123,7 +123,7 @@ def simple_evaluate(
...
@@ -123,7 +123,7 @@ def simple_evaluate(
for
task_name
in
task_dict
.
keys
():
for
task_name
in
task_dict
.
keys
():
task_obj
=
task_dict
[
task_name
]
task_obj
=
task_dict
[
task_name
]
if
type
(
task_obj
)
==
tuple
:
if
type
(
task_obj
)
==
tuple
:
group
,
task_obj
=
task_obj
_
,
task_obj
=
task_obj
if
task_obj
is
None
:
if
task_obj
is
None
:
continue
continue
...
@@ -484,12 +484,11 @@ def evaluate(
...
@@ -484,12 +484,11 @@ def evaluate(
if
"alias"
in
metrics
:
if
"alias"
in
metrics
:
metrics
.
pop
(
"alias"
)
metrics
.
pop
(
"alias"
)
current_size
=
metrics
.
pop
(
"samples"
)
if
configs
[
task
][
"weight_by_size"
]:
# TODO: There should be a way for users
current_size
=
metrics
.
pop
(
"samples"
)
# to toggle between weighted and
else
:
# unweighted averaging
metrics
.
pop
(
"samples"
)
# For unweighted averaging, use:
current_size
=
1
# current_size = 1
all_stderr
=
[]
all_stderr
=
[]
for
metric
in
[
for
metric
in
[
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment