Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
dbe4c391
Commit
dbe4c391
authored
Jul 04, 2025
by
Baber
Browse files
improve logging
parent
442ce51a
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
91 additions
and
38 deletions
+91
-38
lm_eval/__init__.py
lm_eval/__init__.py
+0
-4
lm_eval/config/evaluate_config.py
lm_eval/config/evaluate_config.py
+2
-5
lm_eval/evaluator.py
lm_eval/evaluator.py
+2
-4
lm_eval/tasks/__init__.py
lm_eval/tasks/__init__.py
+1
-1
lm_eval/utils.py
lm_eval/utils.py
+86
-24
No files found.
lm_eval/__init__.py
View file @
dbe4c391
import
logging
import
os
__version__
=
"0.4.9"
...
...
lm_eval/config/evaluate_config.py
View file @
dbe4c391
...
...
@@ -13,7 +13,7 @@ from lm_eval.utils import simple_parse_args_string
if
TYPE_CHECKING
:
from
lm_eval.tasks
import
TaskManager
eval_logger
=
logging
.
getLogger
(
__name__
)
DICT_KEYS
=
[
"wandb_args"
,
"wandb_config_args"
,
...
...
@@ -273,7 +273,7 @@ class EvaluatorConfig:
def
_validate_arguments
(
self
)
->
None
:
"""Validate configuration arguments and cross-field constraints."""
if
self
.
limit
:
logg
ing
.
warning
(
eval_
logg
er
.
warning
(
"--limit SHOULD ONLY BE USED FOR TESTING. "
"REAL METRICS SHOULD NOT BE COMPUTED USING LIMIT."
)
...
...
@@ -368,9 +368,6 @@ class EvaluatorConfig:
def
_apply_trust_remote_code
(
self
)
->
None
:
"""Apply trust_remote_code setting if enabled."""
if
self
.
trust_remote_code
:
eval_logger
=
logging
.
getLogger
(
__name__
)
eval_logger
.
info
(
"Setting HF_DATASETS_TRUST_REMOTE_CODE=true"
)
# HACK: import datasets and override its HF_DATASETS_TRUST_REMOTE_CODE value internally,
# because it's already been determined based on the prior env var before launching our
# script--`datasets` gets imported by lm_eval internally before these lines can update the env.
...
...
lm_eval/evaluator.py
View file @
dbe4c391
...
...
@@ -28,10 +28,10 @@ from lm_eval.loggers import EvaluationTracker
from
lm_eval.loggers.utils
import
add_env_info
,
add_tokenizer_info
,
get_git_commit_hash
from
lm_eval.tasks
import
TaskManager
,
get_task_dict
from
lm_eval.utils
import
(
get_logger
,
handle_non_serializable
,
hash_string
,
positional_deprecated
,
setup_logging
,
simple_parse_args_string
,
)
...
...
@@ -145,7 +145,7 @@ def simple_evaluate(
Dictionary of results
"""
if
verbosity
is
not
None
:
s
et
up
_logg
ing
(
verbosity
=
verbosity
)
g
et_logg
er
(
verbosity
)
start_date
=
time
.
time
()
if
limit
is
not
None
and
samples
is
not
None
:
...
...
@@ -355,8 +355,6 @@ def simple_evaluate(
verbosity
=
verbosity
,
confirm_run_unsafe_code
=
confirm_run_unsafe_code
,
)
if
verbosity
is
not
None
:
setup_logging
(
verbosity
=
verbosity
)
if
lm
.
rank
==
0
:
if
isinstance
(
model
,
str
):
...
...
lm_eval/tasks/__init__.py
View file @
dbe4c391
...
...
@@ -30,7 +30,7 @@ class TaskManager:
metadata
:
Optional
[
dict
]
=
None
,
)
->
None
:
if
verbosity
is
not
None
:
utils
.
s
et
up
_logg
ing
(
verbosity
)
utils
.
g
et_logg
er
(
verbosity
)
self
.
include_path
=
include_path
self
.
metadata
=
metadata
self
.
_task_index
=
self
.
initialize_tasks
(
...
...
lm_eval/utils.py
View file @
dbe4c391
...
...
@@ -26,8 +26,75 @@ HIGHER_IS_BETTER_SYMBOLS = {
}
def
setup_logging
(
verbosity
=
logging
.
INFO
):
# Configure the root logger
def
get_logger
(
level
:
Optional
[
str
]
=
None
)
->
logging
.
Logger
:
"""
Get a logger with a stream handler that captures all lm_eval logs.
Args:
level (Optional[str]): The logging level.
Example:
>>> logger = get_logger("INFO")
>>> logger.info("Log this")
INFO:lm_eval:Log this!
Returns:
logging.Logger: The logger.
"""
logger
=
logging
.
getLogger
(
"lm_eval"
)
if
not
logger
.
hasHandlers
():
logger
.
addHandler
(
logging
.
StreamHandler
())
logger
.
setLevel
(
logging
.
INFO
)
if
level
is
not
None
:
level
=
getattr
(
logging
,
level
.
upper
())
logger
.
setLevel
(
level
)
return
logger
def
setup_logging
(
verbosity
=
logging
.
INFO
,
suppress_third_party
=
True
):
"""
Configure logging for the lm_eval CLI application.
WARNING: This function is intended for CLI use only. Library users should
use get_logger() instead to avoid interfering with their application's
logging configuration.
Args:
verbosity: Log level (int) or string name. Can be overridden by LOGLEVEL env var.
suppress_third_party: Whether to suppress verbose third-party library logs.
Returns:
logging.Logger: The configured lm_eval logger instance.
"""
# Validate verbosity parameter
if
isinstance
(
verbosity
,
str
):
level_map
=
{
"DEBUG"
:
logging
.
DEBUG
,
"INFO"
:
logging
.
INFO
,
"WARNING"
:
logging
.
WARNING
,
"ERROR"
:
logging
.
ERROR
,
"CRITICAL"
:
logging
.
CRITICAL
,
}
verbosity
=
level_map
.
get
(
verbosity
.
upper
(),
logging
.
INFO
)
elif
not
isinstance
(
verbosity
,
int
):
verbosity
=
logging
.
INFO
# Get log level from environment or use default
if
log_level_env
:
=
os
.
environ
.
get
(
"LOGLEVEL"
,
None
):
level_map
=
{
"DEBUG"
:
logging
.
DEBUG
,
"INFO"
:
logging
.
INFO
,
"WARNING"
:
logging
.
WARNING
,
"ERROR"
:
logging
.
ERROR
,
"CRITICAL"
:
logging
.
CRITICAL
,
}
log_level
=
level_map
.
get
(
log_level_env
.
upper
(),
verbosity
)
else
:
log_level
=
verbosity
# Get the lm_eval logger directly
logger
=
logging
.
getLogger
(
"lm_eval"
)
# Configure custom formatter
class
CustomFormatter
(
logging
.
Formatter
):
def
format
(
self
,
record
):
if
record
.
name
.
startswith
(
"lm_eval."
):
...
...
@@ -39,32 +106,27 @@ def setup_logging(verbosity=logging.INFO):
datefmt
=
"%Y-%m-%d:%H:%M:%S"
,
)
log_level
=
os
.
environ
.
get
(
"LOGLEVEL"
,
verbosity
)
or
verbosity
level_map
=
{
"DEBUG"
:
logging
.
DEBUG
,
"INFO"
:
logging
.
INFO
,
"WARNING"
:
logging
.
WARNING
,
"ERROR"
:
logging
.
ERROR
,
"CRITICAL"
:
logging
.
CRITICAL
,
}
log_level
=
level_map
.
get
(
str
(
log_level
).
upper
(),
logging
.
INFO
)
if
not
logging
.
root
.
handlers
:
# Check if handler already exists to prevent duplicates
has_stream_handler
=
any
(
isinstance
(
h
,
logging
.
StreamHandler
)
for
h
in
logger
.
handlers
)
if
not
has_stream_handler
:
handler
=
logging
.
StreamHandler
()
handler
.
setFormatter
(
formatter
)
logger
.
addHandler
(
handler
)
# For CLI use, we disable propagation to avoid duplicate messages
logger
.
propagate
=
False
root_logger
=
logging
.
getLogger
()
root_logger
.
addHandler
(
handler
)
root_logger
.
setLevel
(
log_level
)
# Set the logger level
logger
.
setLevel
(
log_level
)
if
log_level
==
logging
.
DEBUG
:
third_party_loggers
=
[
"urllib3"
,
"filelock"
,
"fsspec"
]
for
logger_name
in
third_party_loggers
:
logging
.
getLogger
(
logger_name
).
setLevel
(
logging
.
INFO
)
else
:
logging
.
getLogger
().
setLevel
(
log_level
)
# Optionally suppress verbose third-party library logs
if
suppress_third_party
and
log_level
==
logging
.
DEBUG
:
third_party_loggers
=
[
"urllib3"
,
"filelock"
,
"fsspec"
]
for
logger_name
in
third_party_loggers
:
logging
.
getLogger
(
logger_name
).
setLevel
(
logging
.
INFO
)
return
logger
def
hash_string
(
string
:
str
)
->
str
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment