Unverified Commit b7ccdaf0 authored by Nikita Titov's avatar Nikita Titov Committed by GitHub
Browse files

[python] Allow to register custom logger in Python-package (#3820)

* centralize Python-package logging in one place

* continue

* fix test name

* removed unused import

* enhance test

* fix lint

* hotfix test

* workaround for GPU test

* remove custom logger from Dask-package

* replace one log func with flags by multiple funcs
parent ac706e10
...@@ -55,3 +55,11 @@ Plotting ...@@ -55,3 +55,11 @@ Plotting
plot_metric plot_metric
plot_tree plot_tree
create_tree_digraph create_tree_digraph
Utilities
---------
.. autosummary::
:toctree: pythonapi/
register_logger
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
Contributors: https://github.com/microsoft/LightGBM/graphs/contributors. Contributors: https://github.com/microsoft/LightGBM/graphs/contributors.
""" """
from .basic import Booster, Dataset from .basic import Booster, Dataset, register_logger
from .callback import (early_stopping, print_evaluation, record_evaluation, from .callback import (early_stopping, print_evaluation, record_evaluation,
reset_parameter) reset_parameter)
from .engine import cv, train, CVBooster from .engine import cv, train, CVBooster
...@@ -28,6 +28,7 @@ if os.path.isfile(os.path.join(dir_path, 'VERSION.txt')): ...@@ -28,6 +28,7 @@ if os.path.isfile(os.path.join(dir_path, 'VERSION.txt')):
__version__ = version_file.read().strip() __version__ = version_file.read().strip()
__all__ = ['Dataset', 'Booster', 'CVBooster', __all__ = ['Dataset', 'Booster', 'CVBooster',
'register_logger',
'train', 'cv', 'train', 'cv',
'LGBMModel', 'LGBMRegressor', 'LGBMClassifier', 'LGBMRanker', 'LGBMModel', 'LGBMRegressor', 'LGBMClassifier', 'LGBMRanker',
'print_evaluation', 'record_evaluation', 'reset_parameter', 'early_stopping', 'print_evaluation', 'record_evaluation', 'reset_parameter', 'early_stopping',
......
...@@ -5,8 +5,10 @@ import ctypes ...@@ -5,8 +5,10 @@ import ctypes
import json import json
import os import os
import warnings import warnings
from tempfile import NamedTemporaryFile
from collections import OrderedDict from collections import OrderedDict
from functools import wraps
from logging import Logger
from tempfile import NamedTemporaryFile
import numpy as np import numpy as np
import scipy.sparse import scipy.sparse
...@@ -15,9 +17,64 @@ from .compat import PANDAS_INSTALLED, DataFrame, Series, is_dtype_sparse, DataTa ...@@ -15,9 +17,64 @@ from .compat import PANDAS_INSTALLED, DataFrame, Series, is_dtype_sparse, DataTa
from .libpath import find_lib_path from .libpath import find_lib_path
class _DummyLogger:
def info(self, msg):
print(msg)
def warning(self, msg):
warnings.warn(msg, stacklevel=3)
_LOGGER = _DummyLogger()
def register_logger(logger):
"""Register custom logger.
Parameters
----------
logger : logging.Logger
Custom logger.
"""
if not isinstance(logger, Logger):
raise TypeError("Logger should inherit logging.Logger class")
global _LOGGER
_LOGGER = logger
def _normalize_native_string(func):
"""Join log messages from native library which come by chunks."""
msg_normalized = []
@wraps(func)
def wrapper(msg):
nonlocal msg_normalized
if msg.strip() == '':
msg = ''.join(msg_normalized)
msg_normalized = []
return func(msg)
else:
msg_normalized.append(msg)
return wrapper
def _log_info(msg):
_LOGGER.info(msg)
def _log_warning(msg):
_LOGGER.warning(msg)
@_normalize_native_string
def _log_native(msg):
_LOGGER.info(msg)
def _log_callback(msg): def _log_callback(msg):
"""Redirect logs from native library into Python console.""" """Redirect logs from native library into Python."""
print("{0:s}".format(msg.decode('utf-8')), end='') _log_native("{0:s}".format(msg.decode('utf-8')))
def _load_lib(): def _load_lib():
...@@ -329,8 +386,8 @@ def convert_from_sliced_object(data): ...@@ -329,8 +386,8 @@ def convert_from_sliced_object(data):
"""Fix the memory of multi-dimensional sliced object.""" """Fix the memory of multi-dimensional sliced object."""
if isinstance(data, np.ndarray) and isinstance(data.base, np.ndarray): if isinstance(data, np.ndarray) and isinstance(data.base, np.ndarray):
if not data.flags.c_contiguous: if not data.flags.c_contiguous:
warnings.warn("Usage of np.ndarray subset (sliced data) is not recommended " _log_warning("Usage of np.ndarray subset (sliced data) is not recommended "
"due to it will double the peak memory cost in LightGBM.") "due to it will double the peak memory cost in LightGBM.")
return np.copy(data) return np.copy(data)
return data return data
...@@ -620,7 +677,7 @@ class _InnerPredictor: ...@@ -620,7 +677,7 @@ class _InnerPredictor:
preds, nrow = self.__pred_for_np2d(data.to_numpy(), start_iteration, num_iteration, predict_type) preds, nrow = self.__pred_for_np2d(data.to_numpy(), start_iteration, num_iteration, predict_type)
else: else:
try: try:
warnings.warn('Converting data to scipy sparse matrix.') _log_warning('Converting data to scipy sparse matrix.')
csr = scipy.sparse.csr_matrix(data) csr = scipy.sparse.csr_matrix(data)
except BaseException: except BaseException:
raise TypeError('Cannot predict data for type {}'.format(type(data).__name__)) raise TypeError('Cannot predict data for type {}'.format(type(data).__name__))
...@@ -1103,9 +1160,9 @@ class Dataset: ...@@ -1103,9 +1160,9 @@ class Dataset:
.co_varnames[:getattr(self.__class__, '_lazy_init').__code__.co_argcount]) .co_varnames[:getattr(self.__class__, '_lazy_init').__code__.co_argcount])
for key, _ in params.items(): for key, _ in params.items():
if key in args_names: if key in args_names:
warnings.warn('{0} keyword has been found in `params` and will be ignored.\n' _log_warning('{0} keyword has been found in `params` and will be ignored.\n'
'Please use {0} argument of the Dataset constructor to pass this parameter.' 'Please use {0} argument of the Dataset constructor to pass this parameter.'
.format(key)) .format(key))
# user can set verbose with params, it has higher priority # user can set verbose with params, it has higher priority
if not any(verbose_alias in params for verbose_alias in _ConfigAliases.get("verbosity")) and silent: if not any(verbose_alias in params for verbose_alias in _ConfigAliases.get("verbosity")) and silent:
params["verbose"] = -1 params["verbose"] = -1
...@@ -1126,7 +1183,7 @@ class Dataset: ...@@ -1126,7 +1183,7 @@ class Dataset:
if categorical_indices: if categorical_indices:
for cat_alias in _ConfigAliases.get("categorical_feature"): for cat_alias in _ConfigAliases.get("categorical_feature"):
if cat_alias in params: if cat_alias in params:
warnings.warn('{} in param dict is overridden.'.format(cat_alias)) _log_warning('{} in param dict is overridden.'.format(cat_alias))
params.pop(cat_alias, None) params.pop(cat_alias, None)
params['categorical_column'] = sorted(categorical_indices) params['categorical_column'] = sorted(categorical_indices)
...@@ -1172,7 +1229,7 @@ class Dataset: ...@@ -1172,7 +1229,7 @@ class Dataset:
self.set_group(group) self.set_group(group)
if isinstance(predictor, _InnerPredictor): if isinstance(predictor, _InnerPredictor):
if self._predictor is None and init_score is not None: if self._predictor is None and init_score is not None:
warnings.warn("The init_score will be overridden by the prediction of init_model.") _log_warning("The init_score will be overridden by the prediction of init_model.")
self._set_init_score_by_predictor(predictor, data) self._set_init_score_by_predictor(predictor, data)
elif init_score is not None: elif init_score is not None:
self.set_init_score(init_score) self.set_init_score(init_score)
...@@ -1314,7 +1371,7 @@ class Dataset: ...@@ -1314,7 +1371,7 @@ class Dataset:
if self.reference is not None: if self.reference is not None:
reference_params = self.reference.get_params() reference_params = self.reference.get_params()
if self.get_params() != reference_params: if self.get_params() != reference_params:
warnings.warn('Overriding the parameters from Reference Dataset.') _log_warning('Overriding the parameters from Reference Dataset.')
self._update_params(reference_params) self._update_params(reference_params)
if self.used_indices is None: if self.used_indices is None:
# create valid # create valid
...@@ -1583,11 +1640,11 @@ class Dataset: ...@@ -1583,11 +1640,11 @@ class Dataset:
self.categorical_feature = categorical_feature self.categorical_feature = categorical_feature
return self._free_handle() return self._free_handle()
elif categorical_feature == 'auto': elif categorical_feature == 'auto':
warnings.warn('Using categorical_feature in Dataset.') _log_warning('Using categorical_feature in Dataset.')
return self return self
else: else:
warnings.warn('categorical_feature in Dataset is overridden.\n' _log_warning('categorical_feature in Dataset is overridden.\n'
'New categorical_feature is {}'.format(sorted(list(categorical_feature)))) 'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
self.categorical_feature = categorical_feature self.categorical_feature = categorical_feature
return self._free_handle() return self._free_handle()
else: else:
...@@ -1840,8 +1897,8 @@ class Dataset: ...@@ -1840,8 +1897,8 @@ class Dataset:
elif isinstance(self.data, DataTable): elif isinstance(self.data, DataTable):
self.data = self.data[self.used_indices, :] self.data = self.data[self.used_indices, :]
else: else:
warnings.warn("Cannot subset {} type of raw data.\n" _log_warning("Cannot subset {} type of raw data.\n"
"Returning original raw data".format(type(self.data).__name__)) "Returning original raw data".format(type(self.data).__name__))
self.need_slice = False self.need_slice = False
if self.data is None: if self.data is None:
raise LightGBMError("Cannot call `get_data` after freed raw data, " raise LightGBMError("Cannot call `get_data` after freed raw data, "
...@@ -2011,10 +2068,10 @@ class Dataset: ...@@ -2011,10 +2068,10 @@ class Dataset:
old_self_data_type) old_self_data_type)
err_msg += ("Set free_raw_data=False when construct Dataset to avoid this" err_msg += ("Set free_raw_data=False when construct Dataset to avoid this"
if was_none else "Freeing raw data") if was_none else "Freeing raw data")
warnings.warn(err_msg) _log_warning(err_msg)
self.feature_name = self.get_feature_name() self.feature_name = self.get_feature_name()
warnings.warn("Reseting categorical features.\n" _log_warning("Reseting categorical features.\n"
"You can set new categorical features via ``set_categorical_feature`` method") "You can set new categorical features via ``set_categorical_feature`` method")
self.categorical_feature = "auto" self.categorical_feature = "auto"
self.pandas_categorical = None self.pandas_categorical = None
return self return self
...@@ -2834,7 +2891,7 @@ class Booster: ...@@ -2834,7 +2891,7 @@ class Booster:
self.handle, self.handle,
ctypes.byref(out_num_class))) ctypes.byref(out_num_class)))
if verbose: if verbose:
print('Finished loading model, total used %d iterations' % int(out_num_iterations.value)) _log_info('Finished loading model, total used %d iterations' % int(out_num_iterations.value))
self.__num_class = out_num_class.value self.__num_class = out_num_class.value
self.pandas_categorical = _load_pandas_categorical(model_str=model_str) self.pandas_categorical = _load_pandas_categorical(model_str=model_str)
return self return self
......
# coding: utf-8 # coding: utf-8
"""Callbacks library.""" """Callbacks library."""
import collections import collections
import warnings
from operator import gt, lt from operator import gt, lt
from .basic import _ConfigAliases from .basic import _ConfigAliases, _log_info, _log_warning
class EarlyStopException(Exception): class EarlyStopException(Exception):
...@@ -67,7 +66,7 @@ def print_evaluation(period=1, show_stdv=True): ...@@ -67,7 +66,7 @@ def print_evaluation(period=1, show_stdv=True):
def _callback(env): def _callback(env):
if period > 0 and env.evaluation_result_list and (env.iteration + 1) % period == 0: if period > 0 and env.evaluation_result_list and (env.iteration + 1) % period == 0:
result = '\t'.join([_format_eval_result(x, show_stdv) for x in env.evaluation_result_list]) result = '\t'.join([_format_eval_result(x, show_stdv) for x in env.evaluation_result_list])
print('[%d]\t%s' % (env.iteration + 1, result)) _log_info('[%d]\t%s' % (env.iteration + 1, result))
_callback.order = 10 _callback.order = 10
return _callback return _callback
...@@ -180,15 +179,14 @@ def early_stopping(stopping_rounds, first_metric_only=False, verbose=True): ...@@ -180,15 +179,14 @@ def early_stopping(stopping_rounds, first_metric_only=False, verbose=True):
enabled[0] = not any(env.params.get(boost_alias, "") == 'dart' for boost_alias enabled[0] = not any(env.params.get(boost_alias, "") == 'dart' for boost_alias
in _ConfigAliases.get("boosting")) in _ConfigAliases.get("boosting"))
if not enabled[0]: if not enabled[0]:
warnings.warn('Early stopping is not available in dart mode') _log_warning('Early stopping is not available in dart mode')
return return
if not env.evaluation_result_list: if not env.evaluation_result_list:
raise ValueError('For early stopping, ' raise ValueError('For early stopping, '
'at least one dataset and eval metric is required for evaluation') 'at least one dataset and eval metric is required for evaluation')
if verbose: if verbose:
msg = "Training until validation scores don't improve for {} rounds" _log_info("Training until validation scores don't improve for {} rounds".format(stopping_rounds))
print(msg.format(stopping_rounds))
# split is needed for "<dataset type> <metric>" case (e.g. "train l1") # split is needed for "<dataset type> <metric>" case (e.g. "train l1")
first_metric[0] = env.evaluation_result_list[0][1].split(" ")[-1] first_metric[0] = env.evaluation_result_list[0][1].split(" ")[-1]
...@@ -205,10 +203,10 @@ def early_stopping(stopping_rounds, first_metric_only=False, verbose=True): ...@@ -205,10 +203,10 @@ def early_stopping(stopping_rounds, first_metric_only=False, verbose=True):
def _final_iteration_check(env, eval_name_splitted, i): def _final_iteration_check(env, eval_name_splitted, i):
if env.iteration == env.end_iteration - 1: if env.iteration == env.end_iteration - 1:
if verbose: if verbose:
print('Did not meet early stopping. Best iteration is:\n[%d]\t%s' % ( _log_info('Did not meet early stopping. Best iteration is:\n[%d]\t%s' % (
best_iter[i] + 1, '\t'.join([_format_eval_result(x) for x in best_score_list[i]]))) best_iter[i] + 1, '\t'.join([_format_eval_result(x) for x in best_score_list[i]])))
if first_metric_only: if first_metric_only:
print("Evaluated only: {}".format(eval_name_splitted[-1])) _log_info("Evaluated only: {}".format(eval_name_splitted[-1]))
raise EarlyStopException(best_iter[i], best_score_list[i]) raise EarlyStopException(best_iter[i], best_score_list[i])
def _callback(env): def _callback(env):
...@@ -232,10 +230,10 @@ def early_stopping(stopping_rounds, first_metric_only=False, verbose=True): ...@@ -232,10 +230,10 @@ def early_stopping(stopping_rounds, first_metric_only=False, verbose=True):
continue # train data for lgb.cv or sklearn wrapper (underlying lgb.train) continue # train data for lgb.cv or sklearn wrapper (underlying lgb.train)
elif env.iteration - best_iter[i] >= stopping_rounds: elif env.iteration - best_iter[i] >= stopping_rounds:
if verbose: if verbose:
print('Early stopping, best iteration is:\n[%d]\t%s' % ( _log_info('Early stopping, best iteration is:\n[%d]\t%s' % (
best_iter[i] + 1, '\t'.join([_format_eval_result(x) for x in best_score_list[i]]))) best_iter[i] + 1, '\t'.join([_format_eval_result(x) for x in best_score_list[i]])))
if first_metric_only: if first_metric_only:
print("Evaluated only: {}".format(eval_name_splitted[-1])) _log_info("Evaluated only: {}".format(eval_name_splitted[-1]))
raise EarlyStopException(best_iter[i], best_score_list[i]) raise EarlyStopException(best_iter[i], best_score_list[i])
_final_iteration_check(env, eval_name_splitted, i) _final_iteration_check(env, eval_name_splitted, i)
_callback.order = 30 _callback.order = 30
......
...@@ -6,7 +6,6 @@ Dask.Array and Dask.DataFrame collections. ...@@ -6,7 +6,6 @@ Dask.Array and Dask.DataFrame collections.
It is based on dask-lightgbm, which was based on dask-xgboost. It is based on dask-lightgbm, which was based on dask-xgboost.
""" """
import logging
import socket import socket
from collections import defaultdict from collections import defaultdict
from copy import deepcopy from copy import deepcopy
...@@ -22,11 +21,9 @@ from dask import dataframe as dd ...@@ -22,11 +21,9 @@ from dask import dataframe as dd
from dask import delayed from dask import delayed
from dask.distributed import Client, default_client, get_worker, wait from dask.distributed import Client, default_client, get_worker, wait
from .basic import _ConfigAliases, _LIB, _safe_call from .basic import _ConfigAliases, _LIB, _log_warning, _safe_call
from .sklearn import LGBMClassifier, LGBMRegressor, LGBMRanker from .sklearn import LGBMClassifier, LGBMRegressor, LGBMRanker
logger = logging.getLogger(__name__)
def _find_open_port(worker_ip: str, local_listen_port: int, ports_to_skip: Iterable[int]) -> int: def _find_open_port(worker_ip: str, local_listen_port: int, ports_to_skip: Iterable[int]) -> int:
"""Find an open port. """Find an open port.
...@@ -257,10 +254,10 @@ def _train(client, data, label, params, model_factory, sample_weight=None, group ...@@ -257,10 +254,10 @@ def _train(client, data, label, params, model_factory, sample_weight=None, group
'voting_parallel' 'voting_parallel'
} }
if tree_learner is None: if tree_learner is None:
logger.warning('Parameter tree_learner not set. Using "data" as default') _log_warning('Parameter tree_learner not set. Using "data" as default')
params['tree_learner'] = 'data' params['tree_learner'] = 'data'
elif tree_learner.lower() not in allowed_tree_learners: elif tree_learner.lower() not in allowed_tree_learners:
logger.warning('Parameter tree_learner set to %s, which is not allowed. Using "data" as default' % tree_learner) _log_warning('Parameter tree_learner set to %s, which is not allowed. Using "data" as default' % tree_learner)
params['tree_learner'] = 'data' params['tree_learner'] = 'data'
local_listen_port = 12400 local_listen_port = 12400
......
...@@ -2,13 +2,12 @@ ...@@ -2,13 +2,12 @@
"""Library with training routines of LightGBM.""" """Library with training routines of LightGBM."""
import collections import collections
import copy import copy
import warnings
from operator import attrgetter from operator import attrgetter
import numpy as np import numpy as np
from . import callback from . import callback
from .basic import Booster, Dataset, LightGBMError, _ConfigAliases, _InnerPredictor from .basic import Booster, Dataset, LightGBMError, _ConfigAliases, _InnerPredictor, _log_warning
from .compat import SKLEARN_INSTALLED, _LGBMGroupKFold, _LGBMStratifiedKFold from .compat import SKLEARN_INSTALLED, _LGBMGroupKFold, _LGBMStratifiedKFold
...@@ -146,12 +145,12 @@ def train(params, train_set, num_boost_round=100, ...@@ -146,12 +145,12 @@ def train(params, train_set, num_boost_round=100,
for alias in _ConfigAliases.get("num_iterations"): for alias in _ConfigAliases.get("num_iterations"):
if alias in params: if alias in params:
num_boost_round = params.pop(alias) num_boost_round = params.pop(alias)
warnings.warn("Found `{}` in params. Will use it instead of argument".format(alias)) _log_warning("Found `{}` in params. Will use it instead of argument".format(alias))
params["num_iterations"] = num_boost_round params["num_iterations"] = num_boost_round
for alias in _ConfigAliases.get("early_stopping_round"): for alias in _ConfigAliases.get("early_stopping_round"):
if alias in params: if alias in params:
early_stopping_rounds = params.pop(alias) early_stopping_rounds = params.pop(alias)
warnings.warn("Found `{}` in params. Will use it instead of argument".format(alias)) _log_warning("Found `{}` in params. Will use it instead of argument".format(alias))
params["early_stopping_round"] = early_stopping_rounds params["early_stopping_round"] = early_stopping_rounds
first_metric_only = params.get('first_metric_only', False) first_metric_only = params.get('first_metric_only', False)
...@@ -525,12 +524,12 @@ def cv(params, train_set, num_boost_round=100, ...@@ -525,12 +524,12 @@ def cv(params, train_set, num_boost_round=100,
params['objective'] = 'none' params['objective'] = 'none'
for alias in _ConfigAliases.get("num_iterations"): for alias in _ConfigAliases.get("num_iterations"):
if alias in params: if alias in params:
warnings.warn("Found `{}` in params. Will use it instead of argument".format(alias)) _log_warning("Found `{}` in params. Will use it instead of argument".format(alias))
num_boost_round = params.pop(alias) num_boost_round = params.pop(alias)
params["num_iterations"] = num_boost_round params["num_iterations"] = num_boost_round
for alias in _ConfigAliases.get("early_stopping_round"): for alias in _ConfigAliases.get("early_stopping_round"):
if alias in params: if alias in params:
warnings.warn("Found `{}` in params. Will use it instead of argument".format(alias)) _log_warning("Found `{}` in params. Will use it instead of argument".format(alias))
early_stopping_rounds = params.pop(alias) early_stopping_rounds = params.pop(alias)
params["early_stopping_round"] = early_stopping_rounds params["early_stopping_round"] = early_stopping_rounds
first_metric_only = params.get('first_metric_only', False) first_metric_only = params.get('first_metric_only', False)
......
# coding: utf-8 # coding: utf-8
"""Plotting library.""" """Plotting library."""
import warnings
from copy import deepcopy from copy import deepcopy
from io import BytesIO from io import BytesIO
import numpy as np import numpy as np
from .basic import Booster from .basic import Booster, _log_warning
from .compat import MATPLOTLIB_INSTALLED, GRAPHVIZ_INSTALLED from .compat import MATPLOTLIB_INSTALLED, GRAPHVIZ_INSTALLED
from .sklearn import LGBMModel from .sklearn import LGBMModel
...@@ -326,8 +325,7 @@ def plot_metric(booster, metric=None, dataset_names=None, ...@@ -326,8 +325,7 @@ def plot_metric(booster, metric=None, dataset_names=None,
num_metric = len(metrics_for_one) num_metric = len(metrics_for_one)
if metric is None: if metric is None:
if num_metric > 1: if num_metric > 1:
msg = "More than one metric available, picking one to plot." _log_warning("More than one metric available, picking one to plot.")
warnings.warn(msg, stacklevel=2)
metric, results = metrics_for_one.popitem() metric, results = metrics_for_one.popitem()
else: else:
if metric not in metrics_for_one: if metric not in metrics_for_one:
......
# coding: utf-8 # coding: utf-8
"""Scikit-learn wrapper interface for LightGBM.""" """Scikit-learn wrapper interface for LightGBM."""
import copy import copy
import warnings
from inspect import signature from inspect import signature
import numpy as np import numpy as np
from .basic import Dataset, LightGBMError, _ConfigAliases from .basic import Dataset, LightGBMError, _ConfigAliases, _log_warning
from .compat import (SKLEARN_INSTALLED, _LGBMClassifierBase, from .compat import (SKLEARN_INSTALLED, _LGBMClassifierBase,
LGBMNotFittedError, _LGBMLabelEncoder, _LGBMModelBase, LGBMNotFittedError, _LGBMLabelEncoder, _LGBMModelBase,
_LGBMRegressorBase, _LGBMCheckXY, _LGBMCheckArray, _LGBMCheckSampleWeight, _LGBMRegressorBase, _LGBMCheckXY, _LGBMCheckArray, _LGBMCheckSampleWeight,
...@@ -931,9 +930,9 @@ class LGBMClassifier(LGBMModel, _LGBMClassifierBase): ...@@ -931,9 +930,9 @@ class LGBMClassifier(LGBMModel, _LGBMClassifierBase):
""" """
result = super().predict(X, raw_score, start_iteration, num_iteration, pred_leaf, pred_contrib, **kwargs) result = super().predict(X, raw_score, start_iteration, num_iteration, pred_leaf, pred_contrib, **kwargs)
if callable(self._objective) and not (raw_score or pred_leaf or pred_contrib): if callable(self._objective) and not (raw_score or pred_leaf or pred_contrib):
warnings.warn("Cannot compute class probabilities or labels " _log_warning("Cannot compute class probabilities or labels "
"due to the usage of customized objective function.\n" "due to the usage of customized objective function.\n"
"Returning raw scores instead.") "Returning raw scores instead.")
return result return result
elif self._n_classes > 2 or raw_score or pred_leaf or pred_contrib: elif self._n_classes > 2 or raw_score or pred_leaf or pred_contrib:
return result return result
......
# coding: utf-8
import logging
import numpy as np
import lightgbm as lgb
def test_register_logger(tmp_path):
logger = logging.getLogger("LightGBM")
logger.setLevel(logging.DEBUG)
formatter = logging.Formatter('%(levelname)s | %(message)s')
log_filename = str(tmp_path / "LightGBM_test_logger.log")
file_handler = logging.FileHandler(log_filename, mode="w", encoding="utf-8")
file_handler.setLevel(logging.DEBUG)
file_handler.setFormatter(formatter)
logger.addHandler(file_handler)
def dummy_metric(_, __):
logger.debug('In dummy_metric')
return 'dummy_metric', 1, True
lgb.register_logger(logger)
X = np.array([[1, 2, 3],
[1, 2, 4],
[1, 2, 4],
[1, 2, 3]],
dtype=np.float32)
y = np.array([0, 1, 1, 0])
lgb_data = lgb.Dataset(X, y)
eval_records = {}
lgb.train({'objective': 'binary', 'metric': ['auc', 'binary_error']},
lgb_data, num_boost_round=10, feval=dummy_metric,
valid_sets=[lgb_data], evals_result=eval_records,
categorical_feature=[1], early_stopping_rounds=4, verbose_eval=2)
lgb.plot_metric(eval_records)
expected_log = r"""
WARNING | categorical_feature in Dataset is overridden.
New categorical_feature is [1]
INFO | [LightGBM] [Warning] There are no meaningful features, as all feature values are constant.
INFO | [LightGBM] [Info] Number of positive: 2, number of negative: 2
INFO | [LightGBM] [Info] Total Bins 0
INFO | [LightGBM] [Info] Number of data points in the train set: 4, number of used features: 0
INFO | [LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
INFO | [LightGBM] [Warning] Stopped training because there are no more leaves that meet the split requirements
DEBUG | In dummy_metric
INFO | Training until validation scores don't improve for 4 rounds
INFO | [LightGBM] [Warning] Stopped training because there are no more leaves that meet the split requirements
DEBUG | In dummy_metric
INFO | [2] training's auc: 0.5 training's binary_error: 0.5 training's dummy_metric: 1
INFO | [LightGBM] [Warning] Stopped training because there are no more leaves that meet the split requirements
DEBUG | In dummy_metric
INFO | [LightGBM] [Warning] Stopped training because there are no more leaves that meet the split requirements
DEBUG | In dummy_metric
INFO | [4] training's auc: 0.5 training's binary_error: 0.5 training's dummy_metric: 1
INFO | [LightGBM] [Warning] Stopped training because there are no more leaves that meet the split requirements
DEBUG | In dummy_metric
INFO | [LightGBM] [Warning] Stopped training because there are no more leaves that meet the split requirements
DEBUG | In dummy_metric
INFO | [6] training's auc: 0.5 training's binary_error: 0.5 training's dummy_metric: 1
INFO | [LightGBM] [Warning] Stopped training because there are no more leaves that meet the split requirements
DEBUG | In dummy_metric
INFO | [LightGBM] [Warning] Stopped training because there are no more leaves that meet the split requirements
DEBUG | In dummy_metric
INFO | [8] training's auc: 0.5 training's binary_error: 0.5 training's dummy_metric: 1
INFO | [LightGBM] [Warning] Stopped training because there are no more leaves that meet the split requirements
DEBUG | In dummy_metric
INFO | [LightGBM] [Warning] Stopped training because there are no more leaves that meet the split requirements
DEBUG | In dummy_metric
INFO | [10] training's auc: 0.5 training's binary_error: 0.5 training's dummy_metric: 1
INFO | Did not meet early stopping. Best iteration is:
[1] training's auc: 0.5 training's binary_error: 0.5 training's dummy_metric: 1
WARNING | More than one metric available, picking one to plot.
""".strip()
gpu_lines = [
"INFO | [LightGBM] [Info] This is the GPU trainer",
"INFO | [LightGBM] [Info] Using GPU Device:",
"INFO | [LightGBM] [Info] Compiling OpenCL Kernel with 16 bins...",
"INFO | [LightGBM] [Info] GPU programs have been built",
"INFO | [LightGBM] [Warning] GPU acceleration is disabled because no non-trivial dense features can be found"
]
with open(log_filename, "rt", encoding="utf-8") as f:
actual_log = f.read().strip()
actual_log_wo_gpu_stuff = []
for line in actual_log.split("\n"):
if not any(line.startswith(gpu_line) for gpu_line in gpu_lines):
actual_log_wo_gpu_stuff.append(line)
assert "\n".join(actual_log_wo_gpu_stuff) == expected_log
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment