Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
24ba70a3
Commit
24ba70a3
authored
Jun 27, 2024
by
Nathan Habib
Browse files
cleanup
parent
dfea19e8
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
9 additions
and
7 deletions
+9
-7
lm_eval/evaluator.py
lm_eval/evaluator.py
+9
-7
No files found.
lm_eval/evaluator.py
View file @
24ba70a3
...
...
@@ -22,7 +22,7 @@ from lm_eval.evaluator_utils import (
run_task_tests
,
)
from
lm_eval.loggers
import
EvaluationTracker
from
lm_eval.loggers.utils
import
add_env_info
,
get_git_commit_hash
from
lm_eval.loggers.utils
import
add_env_info
,
add_tokenizer_info
,
get_git_commit_hash
from
lm_eval.tasks
import
TaskManager
,
get_task_dict
from
lm_eval.utils
import
(
eval_logger
,
...
...
@@ -271,6 +271,7 @@ def simple_evaluate(
model_args
=
model_args
,
system_instruction
=
system_instruction
,
chat_template
=
lm
.
chat_template
if
apply_chat_template
else
None
,
fewshot_as_multiturn
=
fewshot_as_multiturn
,
)
results
=
evaluate
(
...
...
@@ -325,6 +326,7 @@ def simple_evaluate(
results
[
"git_hash"
]
=
get_git_commit_hash
()
results
[
"date"
]
=
start_date
add_env_info
(
results
)
# additional environment info to results
add_tokenizer_info
(
results
,
lm
)
# additional info about tokenizer
return
results
else
:
return
None
...
...
@@ -607,16 +609,16 @@ def evaluate(
]
# compute group's pooled metric and stderr
results
[
group
][
metric
]
=
lm_eval
.
api
.
metrics
.
aggregate_subtask_metrics
(
metrics
,
sizes
)
results
[
group
][
metric
]
=
(
lm_eval
.
api
.
metrics
.
aggregate_subtask_metrics
(
metrics
,
sizes
)
)
# TODO: calculate grouped metric using aggregation fn
if
"N/A"
in
stderrs
:
results
[
group
][
stderr
]
=
"N/A"
else
:
results
[
group
][
stderr
]
=
lm_eval
.
api
.
metrics
.
pooled_sample_stderr
(
stderrs
,
sizes
)
results
[
group
][
stderr
]
=
(
lm_eval
.
api
.
metrics
.
pooled_sample_stderr
(
stderrs
,
sizes
)
)
# TODO: allow GroupConfigs to choose which variance formula is used, for back-compatibility
# To use the old (likely incorrect) variance formula, comment out the above and uncomment this line:
# results[group][stderr] = lm_eval.api.metrics.combined_sample_stderr(stderrs, sizes, metrics=metrics)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment