Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
18f4eb57
Unverified
Commit
18f4eb57
authored
May 03, 2024
by
KonradSzafer
Committed by
GitHub
May 03, 2024
Browse files
eval tracker args fix (#1777)
parent
59cf408a
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
15 additions
and
14 deletions
+15
-14
lm_eval/__main__.py
lm_eval/__main__.py
+7
-7
lm_eval/logging/evaluation_tracker.py
lm_eval/logging/evaluation_tracker.py
+8
-7
No files found.
lm_eval/__main__.py
View file @
18f4eb57
...
...
@@ -3,7 +3,6 @@ import json
import
logging
import
os
import
sys
from
argparse
import
Namespace
from
functools
import
partial
from
typing
import
Union
...
...
@@ -261,15 +260,14 @@ def cli_evaluate(args: Union[argparse.Namespace, None] = None) -> None:
eval_logger
.
info
(
f
"Including path:
{
args
.
include_path
}
"
)
task_manager
=
TaskManager
(
args
.
verbosity
,
include_path
=
args
.
include_path
)
evaluation_tracker_args
=
Namespace
(
**
evaluation_tracker_args
)
if
(
evaluation_tracker_args
.
push_results_to_hub
or
evaluation_tracker_args
.
push_samples_to_hub
)
and
not
evaluation_tracker_args
.
hub_results_org
:
"push_results_to_hub"
in
evaluation_tracker_args
or
"push_samples_to_hub"
in
evaluation_tracker_args
)
and
"hub_results_org"
not
in
evaluation_tracker_args
:
raise
ValueError
(
"If push_results_to_hub or push_samples_to_hub is set, results_org must be specified."
)
if
evaluation_tracker_args
.
push_samples_to_hub
and
not
args
.
log_samples
:
if
"push_samples_to_hub"
in
evaluation_tracker_args
and
not
args
.
log_samples
:
eval_logger
.
warning
(
"Pushing samples to the Hub requires --log_samples to be set. Samples will not be pushed to the Hub."
)
...
...
@@ -376,7 +374,9 @@ def cli_evaluate(args: Union[argparse.Namespace, None] = None) -> None:
except
Exception
as
e
:
eval_logger
.
info
(
f
"Logging to Weights and Biases failed due to
{
e
}
"
)
evaluation_tracker
.
save_results_aggregated
(
results
=
results
,
samples
=
samples
)
evaluation_tracker
.
save_results_aggregated
(
results
=
results
,
samples
=
samples
if
args
.
log_samples
else
None
)
if
args
.
log_samples
:
for
task_name
,
config
in
results
[
"configs"
].
items
():
...
...
lm_eval/logging/evaluation_tracker.py
View file @
18f4eb57
...
...
@@ -131,14 +131,15 @@ class EvaluationTracker:
try
:
eval_logger
.
info
(
"Saving results aggregated"
)
# calculate cumulative hash for each task
# calculate cumulative hash for each task
- only if samples are provided
task_hashes
=
{}
for
task_name
,
task_samples
in
samples
.
items
():
sample_hashes
=
[
s
[
"doc_hash"
]
+
s
[
"prompt_hash"
]
+
s
[
"target_hash"
]
for
s
in
task_samples
]
task_hashes
[
task_name
]
=
hash_string
(
""
.
join
(
sample_hashes
))
if
samples
:
for
task_name
,
task_samples
in
samples
.
items
():
sample_hashes
=
[
s
[
"doc_hash"
]
+
s
[
"prompt_hash"
]
+
s
[
"target_hash"
]
for
s
in
task_samples
]
task_hashes
[
task_name
]
=
hash_string
(
""
.
join
(
sample_hashes
))
# update initial results dict
results
.
update
({
"task_hashes"
:
task_hashes
})
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment