Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
af913422
Commit
af913422
authored
May 11, 2023
by
Julen Etxaniz
Browse files
update write out variable name
parent
99b0a42d
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
9 additions
and
13 deletions
+9
-13
lm_eval/evaluator.py
lm_eval/evaluator.py
+9
-13
No files found.
lm_eval/evaluator.py
View file @
af913422
...
...
@@ -188,7 +188,7 @@ def evaluate(
# TODO: we need unit tests & sanity checks or something to ensure that the return of `validation_docs` is stable
docs
=
{}
detailed_eval
_info
=
{}
write_out
_info
=
{}
docs_for_decontamination
=
collections
.
defaultdict
(
list
)
...
...
@@ -258,7 +258,7 @@ def evaluate(
)
if
write_out
:
detailed_eval
_info
[
task_name
]
=
prompt_details
write_out
_info
[
task_name
]
=
prompt_details
# Compare all tasks/sets at once to ensure a single training set scan
if
decontaminate
:
...
...
@@ -289,18 +289,16 @@ def evaluate(
process_res_queue
[(
task_name
,
doc_id
)].
append
((
i
,
resp
))
if
write_out
:
detailed_eval
_info
[
task_name
][
doc_id
][
f
"logit_
{
i
}
"
]
=
resp
write_out
_info
[
task_name
][
doc_id
][
f
"logit_
{
i
}
"
]
=
resp
task
=
task_dict
[
task_name
]
if
isinstance
(
task
,
lm_eval
.
base
.
MultipleChoiceTask
):
detailed_eval
_info
[
task_name
][
doc_id
][
"truth"
]
=
doc
[
"gold"
]
write_out
_info
[
task_name
][
doc_id
][
"truth"
]
=
doc
[
"gold"
]
elif
isinstance
(
task
,
lm_eval
.
tasks
.
winogrande
.
Winogrande
):
detailed_eval
_info
[
task_name
][
doc_id
][
"truth"
]
=
task
.
answer_to_num
[
write_out
_info
[
task_name
][
doc_id
][
"truth"
]
=
task
.
answer_to_num
[
doc
[
"answer"
]
]
else
:
detailed_eval_info
[
task_name
][
doc_id
][
"truth"
]
=
task
.
doc_to_target
(
doc
)
write_out_info
[
task_name
][
doc_id
][
"truth"
]
=
task
.
doc_to_target
(
doc
)
vals
=
collections
.
defaultdict
(
list
)
...
...
@@ -317,7 +315,7 @@ def evaluate(
vals
[(
task_name
,
metric
)].
append
(
value
)
if
write_out
:
detailed_eval
_info
[
task_name
][
doc_id
][
metric
]
=
str
(
value
)
write_out
_info
[
task_name
][
doc_id
][
metric
]
=
str
(
value
)
# Re-use the evaluation for the decontaminated set by just ignoring the overlaps
if
decontaminate
and
task_name
in
overlaps
:
...
...
@@ -363,13 +361,11 @@ def evaluate(
for
task_name
,
_
in
task_dict_items
:
with
open
(
output_base_path
.
joinpath
(
f
"
{
task_name
}
_
detailed_eval
_info.json"
),
output_base_path
.
joinpath
(
f
"
{
task_name
}
_
write_out
_info.json"
),
"w"
,
encoding
=
"utf8"
,
)
as
fp
:
json
.
dump
(
detailed_eval_info
[
task_name
],
fp
,
indent
=
4
,
ensure_ascii
=
False
)
json
.
dump
(
write_out_info
[
task_name
],
fp
,
indent
=
4
,
ensure_ascii
=
False
)
return
{
"results"
:
dict
(
results
),
"versions"
:
dict
(
versions
)}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment