Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
9ef853ac
Unverified
Commit
9ef853ac
authored
Dec 13, 2023
by
Lintang Sutawika
Committed by
GitHub
Dec 13, 2023
Browse files
Revert "Simplified `evaluator.py`" (#1116)
parent
72e583d5
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
51 additions
and
23 deletions
+51
-23
lm_eval/evaluator.py
lm_eval/evaluator.py
+51
-23
No files found.
lm_eval/evaluator.py
View file @
9ef853ac
...
...
@@ -234,7 +234,8 @@ def evaluate(
padding_requests
=
collections
.
defaultdict
(
int
)
# store the hierarchy to do proper ordering
task_hierarchy
=
collections
.
defaultdict
(
list
)
# store task aliases
# store the ordering of tasks and groups
task_order
=
collections
.
defaultdict
(
int
)
task_group_alias
=
collections
.
defaultdict
(
dict
)
# store num-fewshot value per task
num_fewshot
=
collections
.
defaultdict
(
int
)
...
...
@@ -439,6 +440,32 @@ def evaluate(
vals
=
vals_torch
if
lm
.
rank
==
0
:
### Get task ordering for correct sample-wide aggregation
group_to_task
=
{}
for
group
in
task_hierarchy
.
keys
():
if
group
not
in
task_order
:
task_order
[
group
]
=
0
if
len
(
task_hierarchy
[
group
])
>
0
:
group_to_task
[
group
]
=
task_hierarchy
[
group
].
copy
()
for
task
in
task_hierarchy
[
group
]:
if
task
in
task_order
:
task_order
[
task
]
+=
1
else
:
task_order
[
task
]
=
1
+
task_order
[
group
]
if
task
in
task_hierarchy
:
group_to_task
[
group
].
remove
(
task
)
group_to_task
[
group
].
extend
(
task_hierarchy
[
task
])
task_to_group
=
{}
for
group
in
group_to_task
:
for
task
in
group_to_task
[
group
]:
if
task
in
task_to_group
:
task_to_group
[
task
].
append
(
group
)
else
:
task_to_group
[
task
]
=
[
group
]
### Aggregate results over all datapoints ###
# aggregate results ; run bootstrap CIs
...
...
@@ -526,36 +553,37 @@ def evaluate(
results
[
group
][
"samples"
]
=
total_size
def
print_tasks
(
task_hierarchy
,
ta
b
=
0
):
def
print_tasks
(
task_hierarchy
,
ta
sk_order
,
task_version
,
task_group_alias
):
results_agg
=
collections
.
defaultdict
(
dict
)
groups_agg
=
collections
.
defaultdict
(
dict
)
for
group_name
,
task_list
in
task_hierarchy
.
items
():
order
=
task_order
[
group_name
]
results_agg
[
group_name
]
=
results
[
group_name
].
copy
()
results_agg
[
group_name
][
"tab"
]
=
order
(
group_name
,
task_list
),
*
_
=
task_hierarchy
.
items
()
task_list
=
sorted
(
task_list
)
results_agg
[
group_name
]
=
results
[
group_name
].
copy
()
results_agg
[
group_name
][
"tab"
]
=
tab
if
(
order
<
max
(
task_order
.
values
()))
and
(
len
(
task_list
)
>
0
):
groups_agg
[
group_name
]
=
results
[
group_name
].
copy
()
groups_agg
[
group_name
][
"tab"
]
=
order
if
len
(
task_list
)
>
0
:
groups_agg
[
group_name
]
=
results
[
group_name
].
copy
()
groups_agg
[
group_name
][
"tab"
]
=
tab
if
task_list
!=
[]:
for
task
in
sorted
(
task_list
):
if
task
in
task_hierarchy
:
_task_hierarchy
=
{
task
:
task_hierarchy
[
task
]}
else
:
_task_hierarchy
=
{
task
:
[]}
for
task_name
in
task_list
:
if
task_name
in
task_hierarchy
:
_task_hierarchy
=
{
**
{
task_name
:
task_hierarchy
[
task_name
]},
**
task_hierarchy
,
}
else
:
_task_hierarchy
=
{
task_name
:
[]}
_results_agg
,
_groups_agg
,
task_version
=
print_tasks
(
_task_hierarchy
,
task_order
,
task_version
,
task_group_alias
)
_results_agg
,
_groups_agg
=
print_tasks
(
_task_hierarchy
,
tab
+
1
)
results_agg
=
{
**
results_agg
,
**
_results_agg
}
groups_agg
=
{
**
groups_agg
,
**
_groups_agg
}
results_agg
=
{
**
results_agg
,
**
_results_agg
}
groups_agg
=
{
**
groups_agg
,
**
_groups_agg
}
return
results_agg
,
groups_agg
return
results_agg
,
groups_agg
,
task_version
results_agg
,
groups_agg
=
print_tasks
(
task_hierarchy
)
results_agg
,
groups_agg
,
versions
=
print_tasks
(
task_hierarchy
,
task_order
,
versions
,
task_group_alias
)
for
task
in
results_agg
:
task_results
=
results_agg
[
task
]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment