Unverified Commit 56313661 authored by Nikita Titov's avatar Nikita Titov Committed by GitHub
Browse files

[python] fixes for supporting 2d numpy arrays for predictions, grads and hess...


[python] fixes for supporting 2d numpy arrays for predictions, grads and hess in multiclass custom objective and eval (#5030)

* fixes for supporting 2d numpy arrays for predictions, grads and hess in multiclass custom objective

* Apply suggestions from code review
Co-authored-by: default avatarJosé Morales <jmoralz92@gmail.com>
Co-authored-by: default avatarJosé Morales <jmoralz92@gmail.com>
parent 7e478047
......@@ -2752,7 +2752,7 @@ class Booster:
- ``missing_direction`` : str, split direction that missing values should go to. ``None`` for leaf nodes.
- ``missing_type`` : str, describes what types of values are treated as missing.
- ``value`` : float64, predicted value for this leaf node, multiplied by the learning rate.
- ``weight`` : float64 or int64, sum of hessian (second-order derivative of objective), summed over observations that fall in this node.
- ``weight`` : float64 or int64, sum of Hessian (second-order derivative of objective), summed over observations that fall in this node.
- ``count`` : int64, number of records in the training data that fall into this node.
Returns
......@@ -2961,7 +2961,7 @@ class Booster:
The value of the second order derivative (Hessian) of the loss
with respect to the elements of preds for each sample point.
For multi-class task, preds are a [n_samples, n_classes] numpy 2-D array,
For multi-class task, preds are numpy 2-D array of shape = [n_samples, n_classes],
and grad and hess should be returned in the same format.
Returns
......@@ -3000,9 +3000,6 @@ class Booster:
if not self.__set_objective_to_none:
self.reset_parameter({"objective": "none"}).__set_objective_to_none = True
grad, hess = fobj(self.__inner_predict(0), self.train_set)
if self.num_model_per_iteration() > 1:
grad = grad.ravel(order='F')
hess = hess.ravel(order='F')
return self.__boost(grad, hess)
def __boost(self, grad, hess):
......@@ -3012,7 +3009,7 @@ class Booster:
Score is returned before any transformation,
e.g. it is raw margin instead of probability of positive class for binary task.
For multi-class task, preds are a [n_samples, n_classes] numpy 2-D array,
For multi-class task, score are numpy 2-D array of shape = [n_samples, n_classes],
and grad and hess should be returned in the same format.
Parameters
......@@ -3029,6 +3026,9 @@ class Booster:
is_finished : bool
Whether the boost was successfully finished.
"""
if self.__num_class > 1:
grad = grad.ravel(order='F')
hess = hess.ravel(order='F')
grad = list_to_1d_numpy(grad, name='gradient')
hess = list_to_1d_numpy(hess, name='hessian')
assert grad.flags.c_contiguous
......@@ -3036,12 +3036,11 @@ class Booster:
if len(grad) != len(hess):
raise ValueError(f"Lengths of gradient ({len(grad)}) and Hessian ({len(hess)}) don't match")
num_train_data = self.train_set.num_data()
num_models = self.__num_class
if len(grad) != num_train_data * num_models:
if len(grad) != num_train_data * self.__num_class:
raise ValueError(
f"Lengths of gradient ({len(grad)}) and Hessian ({len(hess)}) "
f"don't match training data length ({num_train_data}) * "
f"number of models per one iteration ({num_models})"
f"number of models per one iteration ({self.__num_class})"
)
is_finished = ctypes.c_int(0)
_safe_call(_LIB.LGBM_BoosterUpdateOneIterCustom(
......@@ -3149,8 +3148,9 @@ class Booster:
Should accept two parameters: preds, eval_data,
and return (eval_name, eval_result, is_higher_better) or list of such tuples.
preds : numpy 1-D array
preds : numpy 1-D array or numpy 2-D array (for multi-class task)
The predicted values.
For multi-class task, preds are numpy 2-D array of shape = [n_samples, n_classes].
If ``fobj`` is specified, predicted values are returned before any transformation,
e.g. they are raw margin instead of probability of positive class for binary task in this case.
eval_data : Dataset
......@@ -3162,9 +3162,6 @@ class Booster:
is_higher_better : bool
Is eval result higher better, e.g. AUC is ``is_higher_better``.
For multi-class task, preds are a [n_samples, n_classes] numpy 2-D array,
and grad and hess should be returned in the same format.
Returns
-------
result : list
......@@ -3199,6 +3196,7 @@ class Booster:
preds : numpy 1-D array or numpy 2-D array (for multi-class task)
The predicted values.
For multi-class task, preds are numpy 2-D array of shape = [n_samples, n_classes].
If ``fobj`` is specified, predicted values are returned before any transformation,
e.g. they are raw margin instead of probability of positive class for binary task in this case.
eval_data : Dataset
......@@ -3210,9 +3208,6 @@ class Booster:
is_higher_better : bool
Is eval result higher better, e.g. AUC is ``is_higher_better``.
For multi-class task, preds are a [n_samples, n_classes] numpy 2-D array,
and grad and hess should be returned in the same format.
Returns
-------
result : list
......@@ -3232,6 +3227,7 @@ class Booster:
preds : numpy 1-D array or numpy 2-D array (for multi-class task)
The predicted values.
For multi-class task, preds are numpy 2-D array of shape = [n_samples, n_classes].
If ``fobj`` is specified, predicted values are returned before any transformation,
e.g. they are raw margin instead of probability of positive class for binary task in this case.
eval_data : Dataset
......@@ -3243,9 +3239,6 @@ class Booster:
is_higher_better : bool
Is eval result higher better, e.g. AUC is ``is_higher_better``.
For multi-class task, preds are a [n_samples, n_classes] numpy 2-D array,
and grad and hess should be returned in the same format.
Returns
-------
result : list
......
......@@ -9,13 +9,12 @@ from typing import Any, Callable, Dict, List, Optional, Tuple, Union
import numpy as np
from . import callback
from .basic import (Booster, Dataset, LightGBMError, _ArrayLike, _choose_param_value, _ConfigAliases, _InnerPredictor,
_log_warning)
from .basic import Booster, Dataset, LightGBMError, _choose_param_value, _ConfigAliases, _InnerPredictor, _log_warning
from .compat import SKLEARN_INSTALLED, _LGBMGroupKFold, _LGBMStratifiedKFold
_LGBM_CustomObjectiveFunction = Callable[
[np.ndarray, Dataset],
Tuple[_ArrayLike, _ArrayLike]
Tuple[np.ndarray, np.ndarray]
]
_LGBM_CustomMetricFunction = Callable[
[np.ndarray, Dataset],
......@@ -56,30 +55,30 @@ def train(
Should accept two parameters: preds, train_data,
and return (grad, hess).
preds : numpy 1-D array
preds : numpy 1-D array or numpy 2-D array (for multi-class task)
The predicted values.
Predicted values are returned before any transformation,
e.g. they are raw margin instead of probability of positive class for binary task.
train_data : Dataset
The training dataset.
grad : list, numpy 1-D array or pandas Series
grad : numpy 1-D array or numpy 2-D array (for multi-class task)
The value of the first order derivative (gradient) of the loss
with respect to the elements of preds for each sample point.
hess : list, numpy 1-D array or pandas Series
hess : numpy 1-D array or numpy 2-D array (for multi-class task)
The value of the second order derivative (Hessian) of the loss
with respect to the elements of preds for each sample point.
For multi-class task, the preds is group by class_id first, then group by row_id.
If you want to get i-th row preds in j-th class, the access way is score[j * num_data + i]
and you should group grad and hess in this way as well.
For multi-class task, preds are numpy 2-D array of shape = [n_samples, n_classes],
and grad and hess should be returned in the same format.
feval : callable, list of callable, or None, optional (default=None)
Customized evaluation function.
Each evaluation function should accept two parameters: preds, eval_data,
and return (eval_name, eval_result, is_higher_better) or list of such tuples.
preds : numpy 1-D array
preds : numpy 1-D array or numpy 2-D array (for multi-class task)
The predicted values.
For multi-class task, preds are numpy 2-D array of shape = [n_samples, n_classes].
If ``fobj`` is specified, predicted values are returned before any transformation,
e.g. they are raw margin instead of probability of positive class for binary task in this case.
eval_data : Dataset
......@@ -91,8 +90,6 @@ def train(
is_higher_better : bool
Is eval result higher better, e.g. AUC is ``is_higher_better``.
For multi-class task, the preds is group by class_id first, then group by row_id.
If you want to get i-th row preds in j-th class, the access way is preds[j * num_data + i].
To ignore the default metric corresponding to the used objective,
set the ``metric`` parameter to the string ``"None"`` in ``params``.
init_model : str, pathlib.Path, Booster or None, optional (default=None)
......@@ -411,30 +408,30 @@ def cv(params, train_set, num_boost_round=100,
Should accept two parameters: preds, train_data,
and return (grad, hess).
preds : numpy 1-D array
preds : numpy 1-D array or numpy 2-D array (for multi-class task)
The predicted values.
Predicted values are returned before any transformation,
e.g. they are raw margin instead of probability of positive class for binary task.
train_data : Dataset
The training dataset.
grad : list, numpy 1-D array or pandas Series
grad : numpy 1-D array or numpy 2-D array (for multi-class task)
The value of the first order derivative (gradient) of the loss
with respect to the elements of preds for each sample point.
hess : list, numpy 1-D array or pandas Series
hess : numpy 1-D array or numpy 2-D array (for multi-class task)
The value of the second order derivative (Hessian) of the loss
with respect to the elements of preds for each sample point.
For multi-class task, the preds is group by class_id first, then group by row_id.
If you want to get i-th row preds in j-th class, the access way is score[j * num_data + i]
and you should group grad and hess in this way as well.
For multi-class task, preds are numpy 2-D array of shape = [n_samples, n_classes],
and grad and hess should be returned in the same format.
feval : callable, list of callable, or None, optional (default=None)
Customized evaluation function.
Each evaluation function should accept two parameters: preds, eval_data,
and return (eval_name, eval_result, is_higher_better) or list of such tuples.
preds : numpy 1-D array
preds : numpy 1-D array or numpy 2-D array (for multi-class task)
The predicted values.
For multi-class task, preds are numpy 2-D array of shape = [n_samples, n_classes].
If ``fobj`` is specified, predicted values are returned before any transformation,
e.g. they are raw margin instead of probability of positive class for binary task in this case.
eval_data : Dataset
......@@ -446,8 +443,6 @@ def cv(params, train_set, num_boost_round=100,
is_higher_better : bool
Is eval result higher better, e.g. AUC is ``is_higher_better``.
For multi-class task, the preds is group by class_id first, then group by row_id.
If you want to get i-th row preds in j-th class, the access way is preds[j * num_data + i].
To ignore the default metric corresponding to the used objective,
set ``metrics`` to the string ``"None"``.
init_model : str, pathlib.Path, Booster or None, optional (default=None)
......
......@@ -556,7 +556,7 @@ def create_tree_digraph(
- ``'internal_count'`` : number of records from the training data that fall into this non-leaf node
- ``'internal_weight'`` : total weight of all nodes that fall into this non-leaf node
- ``'leaf_count'`` : number of records from the training data that fall into this leaf node
- ``'leaf_weight'`` : total weight (sum of hessian) of all observations that fall into this leaf node
- ``'leaf_weight'`` : total weight (sum of Hessian) of all observations that fall into this leaf node
- ``'data_percentage'`` : percentage of training data that fall into this node
precision : int or None, optional (default=3)
Used to restrict the display of floating point values to a certain precision.
......@@ -649,7 +649,7 @@ def plot_tree(
- ``'internal_count'`` : number of records from the training data that fall into this non-leaf node
- ``'internal_weight'`` : total weight of all nodes that fall into this non-leaf node
- ``'leaf_count'`` : number of records from the training data that fall into this leaf node
- ``'leaf_weight'`` : total weight (sum of hessian) of all observations that fall into this leaf node
- ``'leaf_weight'`` : total weight (sum of Hessian) of all observations that fall into this leaf node
- ``'data_percentage'`` : percentage of training data that fall into this node
precision : int or None, optional (default=3)
Used to restrict the display of floating point values to a certain precision.
......
......@@ -6,7 +6,7 @@ from typing import Any, Callable, Dict, List, Optional, Tuple, Union
import numpy as np
from .basic import Booster, Dataset, LightGBMError, _ArrayLike, _choose_param_value, _ConfigAliases, _log_warning
from .basic import Booster, Dataset, LightGBMError, _choose_param_value, _ConfigAliases, _log_warning
from .callback import record_evaluation
from .compat import (SKLEARN_INSTALLED, LGBMNotFittedError, _LGBMAssertAllFinite, _LGBMCheckArray,
_LGBMCheckClassificationTargets, _LGBMCheckSampleWeight, _LGBMCheckXY, _LGBMClassifierBase,
......@@ -19,11 +19,11 @@ _EvalResultType = Tuple[str, float, bool]
_LGBM_ScikitCustomObjectiveFunction = Union[
Callable[
[np.ndarray, np.ndarray],
Tuple[_ArrayLike, _ArrayLike]
Tuple[np.ndarray, np.ndarray]
],
Callable[
[np.ndarray, np.ndarray, np.ndarray],
Tuple[_ArrayLike, _ArrayLike]
Tuple[np.ndarray, np.ndarray]
],
]
_LGBM_ScikitCustomEvalFunction = Union[
......@@ -72,13 +72,13 @@ class _ObjectiveFunctionWrapper:
grad : numpy 1-D array of shape = [n_samples] or numpy 2-D array of shape [n_samples, n_classes] (for multi-class task)
The value of the first order derivative (gradient) of the loss
with respect to the elements of y_pred for each sample point.
hess : numpy 1-D array of shape = [n_samples] or numpy 2-D array of shape [n_samples, n_classes] (for multi-class task)
hess : numpy 1-D array of shape = [n_samples] or numpy 2-D array of shape = [n_samples, n_classes] (for multi-class task)
The value of the second order derivative (Hessian) of the loss
with respect to the elements of y_pred for each sample point.
.. note::
For multi-class task, preds are a [n_samples, n_classes] numpy 2-D array,
For multi-class task, y_pred is a numpy 2-D array of shape = [n_samples, n_classes],
and grad and hess should be returned in the same format.
"""
self.func = func
......@@ -95,10 +95,10 @@ class _ObjectiveFunctionWrapper:
Returns
-------
grad : numpy 1-D array of shape = [n_samples] or numpy 2-D array of shape [n_samples, n_classes] (for multi-class task)
grad : numpy 1-D array of shape = [n_samples] or numpy 2-D array of shape = [n_samples, n_classes] (for multi-class task)
The value of the first order derivative (gradient) of the loss
with respect to the elements of preds for each sample point.
hess : numpy 1-D array of shape = [n_samples] or numpy 2-D array of shape [n_samples, n_classes] (for multi-class task)
hess : numpy 1-D array of shape = [n_samples] or numpy 2-D array of shape = [n_samples, n_classes] (for multi-class task)
The value of the second order derivative (Hessian) of the loss
with respect to the elements of preds for each sample point.
"""
......@@ -162,11 +162,6 @@ class _EvalFunctionWrapper:
The eval result.
is_higher_better : bool
Is eval result higher better, e.g. AUC is ``is_higher_better``.
.. note::
For multi-class task, preds are a [n_samples, n_classes] numpy 2-D array,
and grad and hess should be returned in the same format.
"""
self.func = func
......@@ -297,9 +292,6 @@ _lgbmmodel_doc_custom_eval_note = """
The eval result.
is_higher_better : bool
Is eval result higher better, e.g. AUC is ``is_higher_better``.
For multi-class task, preds are a [n_samples, n_classes] numpy 2-D array,
and grad and hess should be returned in the same format.
"""
_lgbmmodel_doc_predict = (
......@@ -415,7 +407,7 @@ class LGBMModel(_LGBMModelBase):
min_split_gain : float, optional (default=0.)
Minimum loss reduction required to make a further partition on a leaf node of the tree.
min_child_weight : float, optional (default=1e-3)
Minimum sum of instance weight (hessian) needed in a child (leaf).
Minimum sum of instance weight (Hessian) needed in a child (leaf).
min_child_samples : int, optional (default=20)
Minimum number of data needed in a child (leaf).
subsample : float, optional (default=1.)
......@@ -473,7 +465,7 @@ class LGBMModel(_LGBMModelBase):
The value of the second order derivative (Hessian) of the loss
with respect to the elements of y_pred for each sample point.
For multi-class task, preds are a [n_samples, n_classes] numpy 2-D array,
For multi-class task, y_pred is a numpy 2-D array of shape = [n_samples, n_classes],
and grad and hess should be returned in the same format.
"""
if not SKLEARN_INSTALLED:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment