Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
bcc887ad
Commit
bcc887ad
authored
Apr 25, 2024
by
lintangsutawika
Browse files
fixed issues related to printing alias of group and updated yaml
parent
9551bbf2
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
35 additions
and
15 deletions
+35
-15
lm_eval/evaluator_utils.py
lm_eval/evaluator_utils.py
+19
-13
lm_eval/tasks/__init__.py
lm_eval/tasks/__init__.py
+2
-2
lm_eval/tasks/benchmarks/flan/flan_held_in.yaml
lm_eval/tasks/benchmarks/flan/flan_held_in.yaml
+14
-0
No files found.
lm_eval/evaluator_utils.py
View file @
bcc887ad
...
@@ -164,7 +164,10 @@ def get_sample_size(task, limit: Optional[int]) -> Union[int, None]:
...
@@ -164,7 +164,10 @@ def get_sample_size(task, limit: Optional[int]) -> Union[int, None]:
def
prepare_print_tasks
(
def
prepare_print_tasks
(
task_hierarchy
:
dict
,
results
:
dict
,
tab
=
0
task_hierarchy
:
dict
,
results
:
dict
,
tab
=
0
,
group_tab
=
0
,
)
->
Tuple
[
dict
,
dict
]:
)
->
Tuple
[
dict
,
dict
]:
"""
"""
@param task_hierarchy: Dictionary representing the group hierarchy of tasks. Each key is a group name and its
@param task_hierarchy: Dictionary representing the group hierarchy of tasks. Each key is a group name and its
...
@@ -197,17 +200,20 @@ def prepare_print_tasks(
...
@@ -197,17 +200,20 @@ def prepare_print_tasks(
results_agg
[
group_name
][
"alias"
]
=
tab_string
+
group_name
results_agg
[
group_name
][
"alias"
]
=
tab_string
+
group_name
if
len
(
task_list
)
>
0
:
if
len
(
task_list
)
>
0
:
groups_agg
[
group_name
]
=
results
[
group_name
].
copy
()
if
" "
not
in
results
[
group_name
]:
# groups_agg[group_name]["tab"] = tab
group_tab_string
=
" "
*
group_tab
+
"- "
if
group_tab
>
0
else
""
if
"samples"
in
groups_agg
[
group_name
]:
groups_agg
[
group_name
]
=
results
[
group_name
].
copy
()
groups_agg
[
group_name
].
pop
(
"samples"
)
group_tab
+=
1
if
"alias"
in
groups_agg
[
group_name
]:
if
"samples"
in
groups_agg
[
group_name
]:
groups_agg
[
group_name
][
"alias"
]
=
(
groups_agg
[
group_name
].
pop
(
"samples"
)
tab_string
+
groups_agg
[
group_name
][
"alias"
]
)
if
"alias"
in
groups_agg
[
group_name
]:
else
:
groups_agg
[
group_name
][
"alias"
]
=
(
groups_agg
[
group_name
][
"alias"
]
=
tab_string
+
group_name
group_tab_string
+
groups_agg
[
group_name
][
"alias"
]
)
else
:
groups_agg
[
group_name
][
"alias"
]
=
group_tab_string
+
group_name
for
task_name
in
task_list
:
for
task_name
in
task_list
:
if
task_name
in
task_hierarchy
:
if
task_name
in
task_hierarchy
:
...
@@ -222,7 +228,7 @@ def prepare_print_tasks(
...
@@ -222,7 +228,7 @@ def prepare_print_tasks(
}
}
_results_agg
,
_groups_agg
=
prepare_print_tasks
(
_results_agg
,
_groups_agg
=
prepare_print_tasks
(
_task_hierarchy
,
results
,
tab
+
1
_task_hierarchy
,
results
,
tab
+
1
,
group_tab
)
)
results_agg
=
{
**
results_agg
,
**
_results_agg
}
results_agg
=
{
**
results_agg
,
**
_results_agg
}
groups_agg
=
{
**
groups_agg
,
**
_groups_agg
}
groups_agg
=
{
**
groups_agg
,
**
_groups_agg
}
...
...
lm_eval/tasks/__init__.py
View file @
bcc887ad
...
@@ -178,7 +178,6 @@ class TaskManager:
...
@@ -178,7 +178,6 @@ class TaskManager:
yaml_path
=
self
.
_get_yaml_path
(
group_name
)
yaml_path
=
self
.
_get_yaml_path
(
group_name
)
if
(
update_config
is
not
None
)
and
(
"group_alias"
in
update_config
):
if
(
update_config
is
not
None
)
and
(
"group_alias"
in
update_config
):
group_name
=
update_config
[
"group_alias"
]
update_config
.
pop
(
"group_alias"
)
update_config
.
pop
(
"group_alias"
)
if
isinstance
(
name_or_config
,
dict
):
if
isinstance
(
name_or_config
,
dict
):
...
@@ -240,8 +239,9 @@ class TaskManager:
...
@@ -240,8 +239,9 @@ class TaskManager:
all_subtasks
=
{}
all_subtasks
=
{}
if
parent_name
is
not
None
:
if
parent_name
is
not
None
:
# all_subtasks = {group_name: (parent_name, None)}
parent_group_config
=
self
.
_get_config
(
parent_name
)
parent_group_config
=
self
.
_get_config
(
parent_name
)
if
"group_alias"
in
parent_group_config
:
parent_name
=
parent_group_config
[
"group_alias"
]
all_subtasks
=
{
group_name
:
(
parent_name
,
parent_group_config
)}
all_subtasks
=
{
group_name
:
(
parent_name
,
parent_group_config
)}
fn
=
partial
(
fn
=
partial
(
...
...
lm_eval/tasks/benchmarks/flan/flan_held_in.yaml
View file @
bcc887ad
...
@@ -4,6 +4,8 @@ task:
...
@@ -4,6 +4,8 @@ task:
# ANLI R1
# ANLI R1
-
group
:
anli_r1_flan
-
group
:
anli_r1_flan
group_alias
:
ANLI R1
group_alias
:
ANLI R1
group_config
:
aggregate_metric
:
True
task
:
task
:
-
task
:
anli_r1
-
task
:
anli_r1
task_alias
:
prompt-0
task_alias
:
prompt-0
...
@@ -53,6 +55,8 @@ task:
...
@@ -53,6 +55,8 @@ task:
# ANLI R2
# ANLI R2
-
group
:
anli_r2_flan
-
group
:
anli_r2_flan
group_alias
:
ANLI R2
group_alias
:
ANLI R2
group_config
:
aggregate_metric
:
True
task
:
task
:
-
task
:
anli_r2
-
task
:
anli_r2
task_alias
:
prompt-0
task_alias
:
prompt-0
...
@@ -102,6 +106,8 @@ task:
...
@@ -102,6 +106,8 @@ task:
# ANLI R3
# ANLI R3
-
group
:
anli_r3_flan
-
group
:
anli_r3_flan
group_alias
:
ANLI R3
group_alias
:
ANLI R3
group_config
:
aggregate_metric
:
True
task
:
task
:
-
task
:
anli_r3
-
task
:
anli_r3
task_alias
:
prompt-0
task_alias
:
prompt-0
...
@@ -151,6 +157,8 @@ task:
...
@@ -151,6 +157,8 @@ task:
# Arc Easy
# Arc Easy
-
group
:
arc_easy_flan
-
group
:
arc_easy_flan
group_alias
:
Arc Easy
group_alias
:
Arc Easy
group_config
:
aggregate_metric
:
True
task
:
task
:
-
task
:
arc_easy
-
task
:
arc_easy
task_alias
:
prompt-0
task_alias
:
prompt-0
...
@@ -190,6 +198,8 @@ task:
...
@@ -190,6 +198,8 @@ task:
# Arc Challenge
# Arc Challenge
-
group
:
arc_challenge_flan
-
group
:
arc_challenge_flan
group_alias
:
Arc Challenge
group_alias
:
Arc Challenge
group_config
:
aggregate_metric
:
True
task
:
task
:
-
task
:
arc_challenge
-
task
:
arc_challenge
task_alias
:
prompt-0
task_alias
:
prompt-0
...
@@ -229,6 +239,8 @@ task:
...
@@ -229,6 +239,8 @@ task:
# BoolQ
# BoolQ
-
group
:
boolq_flan
-
group
:
boolq_flan
group_alias
:
BoolQ
group_alias
:
BoolQ
group_config
:
aggregate_metric
:
True
task
:
task
:
-
task
:
boolq
-
task
:
boolq
task_alias
:
prompt-0
task_alias
:
prompt-0
...
@@ -283,6 +295,8 @@ task:
...
@@ -283,6 +295,8 @@ task:
# RTE
# RTE
-
group
:
rte_flan
-
group
:
rte_flan
group_alias
:
RTE
group_alias
:
RTE
group_config
:
aggregate_metric
:
True
task
:
task
:
-
task
:
rte
-
task
:
rte
task_alias
:
prompt-0
task_alias
:
prompt-0
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment