Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
4eeb8715
Commit
4eeb8715
authored
Jun 04, 2024
by
lintangsutawika
Browse files
format fix
parent
3e1301bb
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
32 additions
and
35 deletions
+32
-35
lm_eval/evaluator.py
lm_eval/evaluator.py
+0
-1
lm_eval/evaluator_utils.py
lm_eval/evaluator_utils.py
+2
-2
tests/testyamls/test-01.yaml
tests/testyamls/test-01.yaml
+30
-32
No files found.
lm_eval/evaluator.py
View file @
4eeb8715
...
@@ -584,7 +584,6 @@ def evaluate(
...
@@ -584,7 +584,6 @@ def evaluate(
task_aggregation_list
=
{}
task_aggregation_list
=
{}
for
group_or_task
,
group_or_task_info
in
task_dict
.
items
():
for
group_or_task
,
group_or_task_info
in
task_dict
.
items
():
# Convert to string
# Convert to string
if
isinstance
(
group_or_task
,
ConfigurableGroup
):
if
isinstance
(
group_or_task
,
ConfigurableGroup
):
group_config
=
group_or_task
.
config
group_config
=
group_or_task
.
config
...
...
lm_eval/evaluator_utils.py
View file @
4eeb8715
...
@@ -232,13 +232,13 @@ def prepare_print_tasks(
...
@@ -232,13 +232,13 @@ def prepare_print_tasks(
for
task_or_group_name
,
task_or_group_obj
in
task_dict
.
items
():
for
task_or_group_name
,
task_or_group_obj
in
task_dict
.
items
():
tab_string
=
" "
*
task_depth
+
"- "
if
task_depth
>
0
else
""
tab_string
=
" "
*
task_depth
+
"- "
if
task_depth
>
0
else
""
if
isinstance
(
task_or_group_name
,
ConfigurableGroup
):
if
isinstance
(
task_or_group_name
,
ConfigurableGroup
):
string_name
=
task_or_group_name
.
group_name
#
string_name = task_or_group_name.group_name
name
=
task_or_group_name
.
task_id
name
=
task_or_group_name
.
task_id
from_configurable_group
=
True
from_configurable_group
=
True
elif
isinstance
(
task_or_group_name
,
str
):
elif
isinstance
(
task_or_group_name
,
str
):
name
=
task_or_group_name
name
=
task_or_group_name
if
isinstance
(
task_or_group_obj
,
ConfigurableTask
):
if
isinstance
(
task_or_group_obj
,
ConfigurableTask
):
string_name
=
task_or_group_obj
.
task_name
#
string_name = task_or_group_obj.task_name
name
=
task_or_group_obj
.
task_id
name
=
task_or_group_obj
.
task_id
from_configurable_group
=
False
from_configurable_group
=
False
...
...
tests/testyamls/test-01.yaml
View file @
4eeb8715
...
@@ -3,12 +3,12 @@ group_alias: test 1
...
@@ -3,12 +3,12 @@ group_alias: test 1
task
:
task
:
-
piqa
# string task
-
piqa
# string task
-
ai2_arc
# string tag
-
ai2_arc
# string tag
#
- task: super-glue-lm-eval-v1 # Should this be spread out?
-
task
:
super-glue-lm-eval-v1
# Should this be spread out?
#
num_fewshot: 3
num_fewshot
:
3
-
task
:
swag
# dict registered task
-
task
:
swag
# dict registered task
num_fewshot
:
2
num_fewshot
:
2
#
- task: mmlu
-
task
:
mmlu
#
num_fewshot: 5
num_fewshot
:
5
-
group
:
nli-tasks
# dict group
-
group
:
nli-tasks
# dict group
task
:
task
:
-
anli
-
anli
...
@@ -17,31 +17,29 @@ task:
...
@@ -17,31 +17,29 @@ task:
num_fewshot
:
4
num_fewshot
:
4
metric_list
:
metric_list
:
-
metric
:
brier_score
-
metric
:
brier_score
aggregate_metric
:
true
-
task
:
sciq
# dict registered task duplicate
task_alias
:
sciq 2-shot
# - task: sciq # dict registered task duplicate
num_fewshot
:
2
# task_alias: sciq 2-shot
-
task
:
sciq
# dict registered task duplicate
# num_fewshot: 2
task_alias
:
sciq 4-shot
# - task: sciq # dict registered task duplicate
num_fewshot
:
4
# task_alias: sciq 4-shot
-
task
:
sciq
# dict registered task duplicate
# num_fewshot: 4
task_alias
:
sciq 6-shot
# - task: sciq # dict registered task duplicate
num_fewshot
:
6
# task_alias: sciq 6-shot
-
task
:
siqa_custom
# dict task
# num_fewshot: 6
dataset_path
:
social_i_qa
# - task: siqa_custom # dict task
dataset_name
:
null
# dataset_path: social_i_qa
output_type
:
multiple_choice
# dataset_name: null
training_split
:
train
# output_type: multiple_choice
validation_split
:
validation
# training_split: train
doc_to_text
:
"
Question:
{{context}}
{{question}}
\n
Answer:"
# validation_split: validation
target_delimiter
:
"
"
# doc_to_text: "Question: {{context}} {{question}}\nAnswer:"
doc_to_choice
:
# target_delimiter: " "
-
"
{{answerA}}"
# doc_to_choice:
-
"
{{answerB}}"
# - "{{answerA}}"
-
"
{{answerC}}"
# - "{{answerB}}"
doc_to_target
:
"
{{
(label|int)
-
1
}}"
# - "{{answerC}}"
metric_list
:
# doc_to_target: "{{ (label|int) - 1 }}"
-
metric
:
acc
# metric_list:
aggregation
:
mean
# - metric: acc
higher_is_better
:
true
# aggregation: mean
# higher_is_better: true
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment